├── .gitattributes
├── .github
    └── workflows
    │   └── iris-contest-workflows.yml
├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── LICENSE
├── README.md
├── docker-compose.yml
├── iris-aa-server
    ├── Dockerfile
    ├── README.md
    ├── data
    │   ├── Campaign.csv
    │   ├── HateSpeech.xml
    │   ├── IrisDataset.xml
    │   ├── Loader.xml
    │   ├── LoanPerformance.xml
    │   ├── NLPUtils.cls
    │   ├── appointment-noshows.csv
    │   ├── breast-cancer.csv
    │   ├── hate-speech.tar
    │   ├── loans.gof
    │   ├── readmission.csv
    │   └── titanic.csv
    ├── iris.script
    └── src
    │   └── Util
    │       └── Loader.cls
├── jupyter-samples
    ├── ED_visit_90_day.ipynb
    ├── biomedical-integratedml-PyODBC.ipynb
    ├── campaign-integratedml-jdbc.ipynb
    ├── intersystems-jdbc-3.1.0.jar
    ├── libirisodbcu35.so
    ├── odbc.ini
    ├── odbcinst.ini
    └── readmission-integratedml-jdbc.ipynb
└── tf2-jupyter-jdbc
    ├── Dockerfile
    └── requirements.txt


/.gitattributes:
--------------------------------------------------------------------------------
 1 | *.cls linguist-language=ObjectScript
 2 | *.mac linguist-language=ObjectScript
 3 | *.int linguist-language=ObjectScript
 4 | *.inc linguist-language=ObjectScript
 5 | *.csp linguist-language=Html
 6 | 
 7 | *.sh text eol=lf
 8 | *.cls text eol=lf
 9 | *.mac text eol=lf
10 | *.int text eol=lf
11 | *.inc text eol=lf
12 | Dockerfil* text eol=lf
13 | 


--------------------------------------------------------------------------------
/.github/workflows/iris-contest-workflows.yml:
--------------------------------------------------------------------------------
 1 | name: objectscriptquality
 2 | on: push
 3 | 
 4 | jobs:
 5 |   linux:
 6 |     name: Linux build
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Execute ObjectScript Quality Analysis
11 |       run: wget https://raw.githubusercontent.com/litesolutions/objectscriptquality-jenkins-integration/master/iris-community-hook.sh && sh ./iris-community-hook.sh
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [    
 4 |       {
 5 |         "type": "objectscript",
 6 |         "request": "launch",
 7 |         "name": "ObjectScript Debug Class", 
 8 |         "program": "##class(PackageSample.ObjectScript).Test()",
 9 |       },
10 |       {
11 |         "type": "objectscript",
12 |         "request": "attach",
13 |         "name": "ObjectScript Attach",
14 |         "processId": "${command:PickProcess}",
15 |         "system": true
16 |       }
17 |     ]
18 |   }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files.associations": {
 3 |     
 4 |     "iris.script": "objectscript",
 5 |   },
 6 |   "objectscript.conn" :{
 7 |     "ns": "USER",
 8 |     "username": "superuser",
 9 |     "active": true,
10 |     "docker-compose": {
11 |       "service": "irisimlsvr",
12 |       "internalPort": 52773
13 |     },
14 |     "links": {
15 |       "IRIS IntegratedML Jupyter": "http://localhost:8896/tree",
16 |       "Webterminal": "http://localhost:8092/terminal/"
17 |     }
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 InterSystems Developer Community
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # integratedml-demo-template
  2 | This is a template for IntegratedML - InterSystems Github repository.
  3 | 
  4 | This repository comes with a few example Jupyter notebooks (http://jupyter.org) which demonstrate how to use IntegratedML in InterSystems IRIS Community Edition (Advanced Analytics including IntegratedML) in a docker container.
  5 | 
  6 | ## Contents
  7 | * [What is IntegratedML?](#what-is-integratedml)
  8 | * [What's inside this template](#whats-inside-this-template)
  9 | * [Pre-configured environment, and sample data](#pre-configured-environment-and-sample-data)
 10 |    * [Sample notebooks to get you started](#sample-notebooks-to-get-you-started)
 11 |    * [Demo environment topology](#demo-environment-topology)
 12 |    * [Prerequisites](#prerequisites)
 13 |    * [Tested environments](#tested-environments)
 14 |    * [Installation](#installation)
 15 | * [How to develop your IntegragedML solution with the IntegratedML Template Repo](#how-to-develop-your-integragedml-solution-with-the-integratedml-template-repository)
 16 |    * [Use this template](#use-this-template)
 17 |    * [Checkout the repo](#checkout-the-repo)
 18 |    * [Start developing](#start-developing)
 19 | * [How to Import data into InterSystems IRIS](#how-to-import-data-into-intersystems-iris)
 20 |    * [Importing data from CSV file](#importing-data-from-csv-file)
 21 |    * [Importing data from CSV URL](#importing-data-from-csv-url)
 22 | 
 23 | ## What is IntegratedML?
 24 | IntegratedML is a feature of the InterSystems IRIS data platform that brings machine learning to SQL developers.
 25 | <p align="center">
 26 |   <img src="https://user-images.githubusercontent.com/8899513/85149599-7848f900-b21f-11ea-9b65-b5d703752de3.PNG" width="600" title="docker environment topology after installation">
 27 | </p>
 28 | 
 29 | IntegratedML is
 30 | - all-SQL -- Build and train machine learning models using intuitive custom SQL commands, fully integrated within the InterSystems IRIS SQL processor
 31 | - turnkey -- no packages or programming languages to learn, nothing to install
 32 | - modular -- leverages "best of breed" open source and proprietary AutoML frameworks
 33 | 
 34 | Learn more about InterSystems IRIS and IntegratedML at the [InterSystems Learning site](https://learning.intersystems.com/course/view.php?name=Learn%20IntegratedML)
 35 | 
 36 | ## What's inside this template
 37 | 
 38 | ### Pre-configured environment, and sample data
 39 | This template creates a docker environment (via "docker-compose up") of 2 pre-configured containers:
 40 |   1. tf2jupyter: Jupyter+Tensorflow2.2(without GPU), with a few sample notebook files (in its Dockerfile)
 41 |   2. irisimlsvr another one for an IRIS 2020.3 Community Edition, with pre-loaded sample data in USER namespace(see its [Dockerfile](iris-aa-server/Dockerfile) and [iris.script](iris-aa-server/iris.script) that is run at startup)
 42 | 
 43 | ### Sample notebooks to get you started
 44 | 4 sample notebook files -- by default this template starts Jupyter at http://localhost:8896/tree :
 45 | - [campaign-integratedml-jdbc.ipynb](jupyter-samples/campaign-integratedml-jdbc.ipynb): A simple JDBC connection from tf2jupyter into a sample data table (Marketing Campaign data) within InterSystems IRIS's USER namespace, showing some use of IntegratedML including VALIDATE MODEL command usage.
 46 | - [readmission-integratedml-jdbc.ipynb](jupyter-samples/readmission-integratedml-jdbc.ipynb): Demonstrates use of IntegratedML on a hospital readmission prediction dataset. 
 47 | - [biomedical-integratedml-PyODBC.ipynb](jupyter-samples/biomedical-integratedml-PyODBC.ipynb): Connection to InterSystems IRIS server over PyODBC, building and using an IntegratedML machine learning model, with a complex SQL query using the PREDICT() and PROBABILITY() IntegratedML SQL functions.
 48 | - [ED_visit_90_day.ipynb](jupyter-samples/ED_visit_90_day.ipynb): Building and using an IntegratedML machine learning model to predict visits to Emergency Department, utilizing data from a Health Insight server, kindly provided by Joseph Cofone at Baystate Health. *NOTE: this notebook is not executable!*
 49 | 
 50 | ## Demo environment topology
 51 | <p align="center">
 52 |   <img src="https://user-images.githubusercontent.com/8899513/85151307-a0d1f280-b221-11ea-81d8-f0e11ca45d4c.PNG" width="600" title="docker environment topology after installation">
 53 | </p>
 54 | 
 55 | ## Prerequisites
 56 | Make sure you have [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) and [Docker desktop](https://www.docker.com/products/docker-desktop) installed.
 57 | 
 58 | ## Tested environments
 59 | This template is tested breifly on AWS Ubuntu, Mac OS, and Windows 10(using Docker Toolbox only). It should work on other Docker environment too - let us know if you encounter any issues.
 60 | 
 61 | ## Installation
 62 | 
 63 | Clone/git pull the repo into any local directory
 64 | 
 65 | ```
 66 | $ git clone https://github.com/intersystems-community/integratedml-demo-template.git
 67 | ```
 68 | 
 69 | Open a Docker terminal in this directory and run:
 70 | 
 71 | ```
 72 | $ docker-compose build
 73 | ```
 74 | 
 75 | 3. Run the IRIS container, and Jupyter notebook server images:
 76 | 
 77 | ```
 78 | $ docker-compose up -d
 79 | ```
 80 | 
 81 | 4. Open browser to access the notebooks
 82 | 
 83 | ```
 84 | http://localhost:8896/tree
 85 | ```
 86 | Note: use `docker-compose ps` to confirm tf2juyter's ports; make sure right localhost port is used if over SSL tunneling to remotehost)
 87 | 
 88 | 5. Examine the test data with webterminal
 89 | Open terminal with: SuperUser / SYS credentials
 90 | ```
 91 | http://localhost:8092/terminal/
 92 | ```
 93 | Enter **/sql** mode and make SQL queries to examine data in IRIS.
 94 | <img width="1229" alt="Screenshot 2023-10-28 at 12 08 44 AM" src="https://github.com/intersystems-community/integratedml-demo-template/assets/2781759/9e7fac7d-3198-4f38-9621-cf4558cf65ff">
 95 | 
 96 | # How to develop your IntegragedML solution with the IntegratedML Template Repository
 97 | ## Use this template
 98 | Click the button "Use this template" on Github to create a new repository which will be the copy of this one.
 99 | 
100 | ## Checkout the repo
101 | Clone your new repo to a local folder.  
102 | 
103 | ## Start developing
104 | Install [VSCode](https://code.visualstudio.com/), [Docker Desctop](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-docker) and [ObjectScript](https://marketplace.visualstudio.com/items?itemName=daimor.vscode-objectscript) plugin and open the folder in VSCode.
105 | 
106 | Import your data as listed below, rebuild containers to let the data be imported, and use IntegratedML via SQL tools, as described in Jupyter notebooks.
107 | 
108 | 
109 | # How to Import data into InterSystems IRIS 
110 | ## Importing data from CSV file
111 | 1. Add csv file into the repository, e.g. like [this titanic.csv](https://github.com/intersystems-community/integratedml-demo-template/blob/master/iris-aa-server/data/titanic.csv)
112 | 2. Introduce an import data call into your IRIS initalisation script.
113 | This is an [example line to import titanic.csv](https://github.com/intersystems-community/integratedml-demo-template/blob/0db187b7fd127ff5432b68617bca7cfdadaaed2b/iris-aa-server/iris.script#L13) into IRIS Titanic.Passenger class along with data.
114 | 3. Query the data from any SQL tool, web terminal or from InterSystems ObjectScript with:
115 | ```
116 | SELECT * From Titanic.Passenger
117 | ```
118 | ## Importing data from CSV URL
119 | If your file is accessible remotely, you can import it as follows:
120 | 1. Add the import CSV from URL line into [iris.script]().
121 | Here is an example line to [import countries.csv data from URL](https://github.com/intersystems-community/integratedml-demo-template/blob/7feaffef0a47c7c46cc683d89bdbaedbce48071c/iris-aa-server/iris.script#L17)
122 | 2. Rebuild the docker image (the easiest way is to rebuild via _docker-compose_ -- ```docker-compose build```). This will create User.Countries class and import data which you can query with SQL from Countries table:
123 | ```
124 | SELECT * FROM COUNTRIES
125 | ```
126 | 
127 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 | 
 5 |   irisimlsvr:
 6 |     build:
 7 |       context: ./iris-aa-server
 8 |     image: iris-aa-server:2021.2
 9 |     hostname: irisimlsvr
10 |     restart: on-failure
11 |     ports:
12 |     - 8091:1972 # 1972 is the superserver default port
13 |     - 8092:52773 # 52773 is the webserver/management portal port
14 |     volumes:
15 |     - ./iris-shared:/shared
16 |   tf2jupyter:   # tensorflow with jupyter 
17 |     build:
18 |       context: ./tf2-jupyter-jdbc
19 |     image: tf2-jupyter-jdbc:1.0.0-iml-template
20 |     hostname: tf2jupyter
21 |     restart: on-failure
22 |     ports:
23 |     - 8896:8888 # 8888 is the docker jupyter service port
24 |     - 6026:6006 # 6006 is the tensorboard port
25 |     volumes:
26 |     - ./jupyter-samples:/tf   #shared volumes
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/iris-aa-server/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG IMAGE=store/intersystems/iris-aa-community:2020.3.0AA.331.0
 2 | ARG IMAGE=intersystemsdc/iris-aa-community:2020.3.0AA.331.0-zpm
 3 | ARG IMAGE=intersystemsdc/iris-ml-community:2021.2.0.651.0-zpm
 4 | ARG IMAGE=intersystemsdc/iris-ml-community
 5 | FROM $IMAGE
 6 | LABEL maintainer="Thomas Dyar <Thomas.Dyar@intersystems.com>"
 7 | 
 8 | USER root
 9 | RUN mkdir /data \
10 | 	&& chown irisowner /data
11 | USER irisowner
12 | 
13 | # copy files
14 | COPY data /data
15 | COPY src /data/src
16 | COPY iris.script /tmp/iris.script
17 | 
18 | # special extract treatment for hate-speech dataset
19 | # RUN mkdir /data/hate-speech/ \
20 | #	&& tar -xf /data/hate-speech.tar -C /data/
21 | 
22 | # load demo stuff
23 | RUN iris start IRIS \
24 | 	&& iris session IRIS < /tmp/iris.script
25 | 
26 | # RUN rm -r /data/*
27 | 


--------------------------------------------------------------------------------
/iris-aa-server/README.md:
--------------------------------------------------------------------------------
 1 | # IRIS Advanced Analytics with IntegratedML Demo 
 2 | 
 3 | This folder contains a few simple datasets to demonstrate InterSystems IRIS IntegratedML (previously known as QuickML). The enclosed Dockerfile can be used separately from the rest of the integratedml-demo-template, if you do not want to use the Jupyter Notebook interface.
 4 | 
 5 | ## How to build
 6 | 
 7 | The included Dockerfile will pull the IRIS Advanced Analytics Community Edition (with IntegratedML) container image from the InterSystems public Docker repository, and set up a few simple datasets.
 8 | 
 9 | ```
10 | docker build --tag integratedml-demo .
11 | ```
12 | 
13 | To start your container, use the following command (or your favourite equivalent, as this one will drop your container after stopping)
14 | 
15 | ```
16 | docker run --rm -d -p 9091:51773 -p 9092:52773 --name integratedml integratedml-demo
17 | ```
18 | 
19 | The IRIS password is initialized as SYS, but you can get in directly through the following command, the SMP or connecting through a SQL client such as [DBeaver](https://dbeaver.io/)
20 | 
21 | ```
22 | docker exec -it integratedml iris sql IRIS
23 | ```
24 | 
25 | ## How to demo
26 | 
27 | Using IntegratedML takes only three simple commands:
28 | 
29 | ```sql
30 | CREATE MODEL Flowers PREDICTING (Species) FROM DataMining.IrisDataset;
31 | TRAIN MODEL Flowers FROM DataMining.IrisDataset;
32 | SELECT TOP 20 PREDICT(Flowers) AS PredictedSpecies, Species AS ActualSpecies FROM DataMining.IrisDataset;
33 | ```
34 | 
35 | Note that the semicolons at the end are for use in a multiline-style client such as DBeaver or SQuirreL and not part of regular IRIS SQL. See the [IntegratedML Syntax overview](https://usconfluence.iscinternal.com/display/TBD/IntegratedML+Syntax) if you want to be more creative. For example, you can add ```USING { "provider": "H2O" }``` to your CREATE or TRAIN commands to test the H2O provider instead of the default one.
36 | 
37 | ### Included datasets
38 | 
39 | These are broadly available datasets, but we may not have permission to _re_-distribute them, so keep this repo to yourself:
40 | - \[SQLUser.\]Campaign: as used in the campaign showcase in the [ML Toolkit](https://github.com/intersystems/MLToolkit). The target column to put your crosshairs on is RESPONSE
41 | - \[SQLUser.\]BreastCancer
42 | 


--------------------------------------------------------------------------------
/iris-aa-server/data/HateSpeech.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Export generator="IRIS" version="26" zv="IRIS for Windows (x86-64) 2019.4.0ML (Build 176U)" ts="2020-01-29 15:25:04">
  3 | <Class name="NLP.HateSpeech">
  4 | <Super>%Persistent</Super>
  5 | <TimeChanged>65407,55491.800962</TimeChanged>
  6 | <TimeCreated>65407,46597.061384</TimeCreated>
  7 | 
  8 | <Property name="Comment">
  9 | <Type>%String</Type>
 10 | <Parameter name="MAXLEN"/>
 11 | </Property>
 12 | 
 13 | <Property name="FileName">
 14 | <Type>%String</Type>
 15 | </Property>
 16 | 
 17 | <Property name="UserId">
 18 | <Type>%String</Type>
 19 | </Property>
 20 | 
 21 | <Property name="SubforumId">
 22 | <Type>%String</Type>
 23 | </Property>
 24 | 
 25 | <Property name="ContextNeeded">
 26 | <Type>%Integer</Type>
 27 | </Property>
 28 | 
 29 | <UDLText name="T">
 30 | <Content><![CDATA[
 31 | // training or test
 32 | 
 33 | ]]></Content>
 34 | </UDLText>
 35 | 
 36 | <Property name="Sample">
 37 | <Type>%String</Type>
 38 | </Property>
 39 | 
 40 | <Property name="Label">
 41 | <Type>%String</Type>
 42 | </Property>
 43 | 
 44 | <Method name="Load">
 45 | <ClassMethod>1</ClassMethod>
 46 | <FormalSpec>pDir:%String="C:\Users\bdeboe\Documents\GitHub\hate-speech-dataset\"</FormalSpec>
 47 | <ReturnType>%Status</ReturnType>
 48 | <Implementation><![CDATA[
 49 | 	set tSC = $$$OK
 50 | 	try {
 51 | 		do ..%KillExtent()
 52 | 		
 53 | 		set tSeparator = $s($$$isWINDOWS:"\",1:"/")
 54 | 		set tFile = ##class(%Stream.FileCharacter).%New()
 55 | 		set tSC = tFile.LinkToFile(pDir_"annotations_metadata.csv")
 56 | 		quit:$$$ISERR(tSC)
 57 | 		
 58 | 		do tFile.ReadLine() // skip header
 59 | 		
 60 | 		while 'tFile.AtEnd {
 61 | 			set tLine = $zstrip(tFile.ReadLine(),"<>WC")
 62 | 			set tRow = ..%New()
 63 | 			set tRow.FileName = $piece(tLine,",",1)_".txt"
 64 | 			set tRow.UserId = $piece(tLine,",",2)
 65 | 			set tRow.SubforumId = $piece(tLine,",",3)
 66 | 			set tRow.ContextNeeded = $piece(tLine,",",4)
 67 | 			set tRow.Label = $piece(tLine,",",5)
 68 | 			
 69 | 			set tCommentFile = ##class(%Stream.FileCharacter).%New()
 70 | 			set tSC = tCommentFile.LinkToFile(pDir_"all_files"_tSeparator_tRow.FileName)
 71 | 			quit:$$$ISERR(tSC)
 72 | 			set tRow.Comment = tCommentFile.Read()
 73 | 			
 74 | 			if ##class(%File).Exists(pDir_"sampled_train"_tSeparator_tRow.FileName) {
 75 | 				set tRow.Sample = "train"
 76 | 			} elseif ##class(%File).Exists(pDir_"sampled_test"_tSeparator_tRow.FileName) {
 77 | 				set tRow.Sample = "test"
 78 | 			}
 79 | 			
 80 | 			set tSC = tRow.%Save()
 81 | 			quit:$$$ISERR(tSC)
 82 | 		}
 83 | 		
 84 | 	} catch (ex) {
 85 | 		set tSC = ex.AsStatus()
 86 | 	}
 87 | 	quit tSC
 88 | ]]></Implementation>
 89 | </Method>
 90 | 
 91 | <Storage name="Default">
 92 | <Type>%Storage.Persistent</Type>
 93 | <DataLocation>^NLP.HateSpeechD</DataLocation>
 94 | <DefaultData>HateSpeechDefaultData</DefaultData>
 95 | <IdLocation>^NLP.HateSpeechD</IdLocation>
 96 | <IndexLocation>^NLP.HateSpeechI</IndexLocation>
 97 | <StreamLocation>^NLP.HateSpeechS</StreamLocation>
 98 | <Data name="HateSpeechDefaultData">
 99 | <Value name="1">
100 | <Value>%%CLASSNAME</Value>
101 | </Value>
102 | <Value name="2">
103 | <Value>Comment</Value>
104 | </Value>
105 | <Value name="3">
106 | <Value>FileName</Value>
107 | </Value>
108 | <Value name="4">
109 | <Value>UserId</Value>
110 | </Value>
111 | <Value name="5">
112 | <Value>SubforumId</Value>
113 | </Value>
114 | <Value name="6">
115 | <Value>ContextNeeded</Value>
116 | </Value>
117 | <Value name="7">
118 | <Value>Label</Value>
119 | </Value>
120 | <Value name="8">
121 | <Value>Sample</Value>
122 | </Value>
123 | </Data>
124 | </Storage>
125 | </Class>
126 | </Export>
127 | 


--------------------------------------------------------------------------------
/iris-aa-server/data/IrisDataset.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Export generator="IRIS" version="26" zv="IRIS for Windows (x86-64) 2019.4.0L (Build 368U)" ts="2019-09-17 23:14:57">
  3 | <Class name="DataMining.IrisDataset">
  4 | <Description><![CDATA[
  5 | This class represents the Iris dataset, perhaps the best known database to be found in the pattern recognition literature.
  6 | <p>
  7 | The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. Each record has 5 attributes:
  8 | <ol>
  9 | <li> sepal length in cm
 10 | <li> sepal width in cm
 11 | <li> petal length in cm
 12 | <li> petal width in cm
 13 | <li> class (species):
 14 | <ul>
 15 | <li>Iris Setosa
 16 | <li>Iris Versicolour
 17 | <li>Iris Virginica
 18 | </ul>
 19 | </ol>
 20 | <p>
 21 | The dataset is taken from <a href="http://archive.ics.uci.edu/ml/datasets/Iris">UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science by Frank, A. & Asuncion, A. (2010).</a>]]></Description>
 22 | <Super>%Persistent</Super>
 23 | <TimeChanged>64926,50261.117</TimeChanged>
 24 | <TimeCreated>62312,41576.508001</TimeCreated>
 25 | 
 26 | <Property name="SepalLength">
 27 | <Type>%Double</Type>
 28 | </Property>
 29 | 
 30 | <Property name="SepalWidth">
 31 | <Type>%Double</Type>
 32 | </Property>
 33 | 
 34 | <Property name="PetalLength">
 35 | <Type>%Double</Type>
 36 | </Property>
 37 | 
 38 | <Property name="PetalWidth">
 39 | <Type>%Double</Type>
 40 | </Property>
 41 | 
 42 | <Property name="Species">
 43 | <Type>%String</Type>
 44 | </Property>
 45 | 
 46 | <Method name="load">
 47 | <ClassMethod>1</ClassMethod>
 48 | <ReturnType>%Status</ReturnType>
 49 | <Implementation><![CDATA[
 50 | 	#dim sc As %Status = $$$OK
 51 | 	#dim ex As %Exception.AbstractException
 52 | 	#dim input As %Stream.TmpCharacter
 53 | 	#dim line As %String
 54 | 	#dim len, count As %Integer
 55 | 
 56 | 	Try {
 57 | 		Set input=##class(%Dictionary.CompiledXData).%OpenId("DataMining.IrisDataset||Iris").Data
 58 | 		If '$IsObject(input) Set sc=%objlasterror Quit
 59 | 
 60 | 		Quit:$$$ISERR(sc)
 61 | 
 62 | 		Set count = 0
 63 | 		While ($$$ISOK(sc)) {
 64 | 			set len = 32000
 65 | 			set line = input.ReadLine(.len,.sc)
 66 | 			Quit:len<1
 67 | 			Quit:$$$ISERR(sc)
 68 | 			
 69 | 			Continue:line["<"
 70 | 			Continue:line[">"
 71 | 		
 72 | 			set count = $i(count)
 73 | 			
 74 | 			set flower = ..%New()
 75 | 			set flower.SepalLength = $p(line,",",1)
 76 | 			set flower.SepalWidth  = $p(line,",",2)
 77 | 			set flower.PetalLength = $p(line,",",3)
 78 | 			set flower.PetalWidth  = $p(line,",",4)
 79 | 			set flower.Species     = $p(line,",",5)
 80 | 			
 81 | 			set sc = flower.%Save()
 82 | 			Quit:$$$ISERR(sc)
 83 | 		}
 84 | 		
 85 | 	} Catch (ex) {
 86 | 		set sc = ex.AsStatus()
 87 | 	}
 88 | 
 89 | 	if ($$$ISERR(sc)) {
 90 | 		do $system.OBJ.DisplayError(sc)
 91 | 	}
 92 | 
 93 | 	Quit sc
 94 | ]]></Implementation>
 95 | </Method>
 96 | 
 97 | <XData name="Iris">
 98 | <Data><![CDATA[
 99 | <data>
100 | <![CDATA[
101 | 5.1,3.5,1.4,0.2,Iris-setosa
102 | 4.9,3.0,1.4,0.2,Iris-setosa
103 | 4.7,3.2,1.3,0.2,Iris-setosa
104 | 4.6,3.1,1.5,0.2,Iris-setosa
105 | 5.0,3.6,1.4,0.2,Iris-setosa
106 | 5.4,3.9,1.7,0.4,Iris-setosa
107 | 4.6,3.4,1.4,0.3,Iris-setosa
108 | 5.0,3.4,1.5,0.2,Iris-setosa
109 | 4.4,2.9,1.4,0.2,Iris-setosa
110 | 4.9,3.1,1.5,0.1,Iris-setosa
111 | 5.4,3.7,1.5,0.2,Iris-setosa
112 | 4.8,3.4,1.6,0.2,Iris-setosa
113 | 4.8,3.0,1.4,0.1,Iris-setosa
114 | 4.3,3.0,1.1,0.1,Iris-setosa
115 | 5.8,4.0,1.2,0.2,Iris-setosa
116 | 5.7,4.4,1.5,0.4,Iris-setosa
117 | 5.4,3.9,1.3,0.4,Iris-setosa
118 | 5.1,3.5,1.4,0.3,Iris-setosa
119 | 5.7,3.8,1.7,0.3,Iris-setosa
120 | 5.1,3.8,1.5,0.3,Iris-setosa
121 | 5.4,3.4,1.7,0.2,Iris-setosa
122 | 5.1,3.7,1.5,0.4,Iris-setosa
123 | 4.6,3.6,1.0,0.2,Iris-setosa
124 | 5.1,3.3,1.7,0.5,Iris-setosa
125 | 4.8,3.4,1.9,0.2,Iris-setosa
126 | 5.0,3.0,1.6,0.2,Iris-setosa
127 | 5.0,3.4,1.6,0.4,Iris-setosa
128 | 5.2,3.5,1.5,0.2,Iris-setosa
129 | 5.2,3.4,1.4,0.2,Iris-setosa
130 | 4.7,3.2,1.6,0.2,Iris-setosa
131 | 4.8,3.1,1.6,0.2,Iris-setosa
132 | 5.4,3.4,1.5,0.4,Iris-setosa
133 | 5.2,4.1,1.5,0.1,Iris-setosa
134 | 5.5,4.2,1.4,0.2,Iris-setosa
135 | 4.9,3.1,1.5,0.1,Iris-setosa
136 | 5.0,3.2,1.2,0.2,Iris-setosa
137 | 5.5,3.5,1.3,0.2,Iris-setosa
138 | 4.9,3.1,1.5,0.1,Iris-setosa
139 | 4.4,3.0,1.3,0.2,Iris-setosa
140 | 5.1,3.4,1.5,0.2,Iris-setosa
141 | 5.0,3.5,1.3,0.3,Iris-setosa
142 | 4.5,2.3,1.3,0.3,Iris-setosa
143 | 4.4,3.2,1.3,0.2,Iris-setosa
144 | 5.0,3.5,1.6,0.6,Iris-setosa
145 | 5.1,3.8,1.9,0.4,Iris-setosa
146 | 4.8,3.0,1.4,0.3,Iris-setosa
147 | 5.1,3.8,1.6,0.2,Iris-setosa
148 | 4.6,3.2,1.4,0.2,Iris-setosa
149 | 5.3,3.7,1.5,0.2,Iris-setosa
150 | 5.0,3.3,1.4,0.2,Iris-setosa
151 | 7.0,3.2,4.7,1.4,Iris-versicolor
152 | 6.4,3.2,4.5,1.5,Iris-versicolor
153 | 6.9,3.1,4.9,1.5,Iris-versicolor
154 | 5.5,2.3,4.0,1.3,Iris-versicolor
155 | 6.5,2.8,4.6,1.5,Iris-versicolor
156 | 5.7,2.8,4.5,1.3,Iris-versicolor
157 | 6.3,3.3,4.7,1.6,Iris-versicolor
158 | 4.9,2.4,3.3,1.0,Iris-versicolor
159 | 6.6,2.9,4.6,1.3,Iris-versicolor
160 | 5.2,2.7,3.9,1.4,Iris-versicolor
161 | 5.0,2.0,3.5,1.0,Iris-versicolor
162 | 5.9,3.0,4.2,1.5,Iris-versicolor
163 | 6.0,2.2,4.0,1.0,Iris-versicolor
164 | 6.1,2.9,4.7,1.4,Iris-versicolor
165 | 5.6,2.9,3.6,1.3,Iris-versicolor
166 | 6.7,3.1,4.4,1.4,Iris-versicolor
167 | 5.6,3.0,4.5,1.5,Iris-versicolor
168 | 5.8,2.7,4.1,1.0,Iris-versicolor
169 | 6.2,2.2,4.5,1.5,Iris-versicolor
170 | 5.6,2.5,3.9,1.1,Iris-versicolor
171 | 5.9,3.2,4.8,1.8,Iris-versicolor
172 | 6.1,2.8,4.0,1.3,Iris-versicolor
173 | 6.3,2.5,4.9,1.5,Iris-versicolor
174 | 6.1,2.8,4.7,1.2,Iris-versicolor
175 | 6.4,2.9,4.3,1.3,Iris-versicolor
176 | 6.6,3.0,4.4,1.4,Iris-versicolor
177 | 6.8,2.8,4.8,1.4,Iris-versicolor
178 | 6.7,3.0,5.0,1.7,Iris-versicolor
179 | 6.0,2.9,4.5,1.5,Iris-versicolor
180 | 5.7,2.6,3.5,1.0,Iris-versicolor
181 | 5.5,2.4,3.8,1.1,Iris-versicolor
182 | 5.5,2.4,3.7,1.0,Iris-versicolor
183 | 5.8,2.7,3.9,1.2,Iris-versicolor
184 | 6.0,2.7,5.1,1.6,Iris-versicolor
185 | 5.4,3.0,4.5,1.5,Iris-versicolor
186 | 6.0,3.4,4.5,1.6,Iris-versicolor
187 | 6.7,3.1,4.7,1.5,Iris-versicolor
188 | 6.3,2.3,4.4,1.3,Iris-versicolor
189 | 5.6,3.0,4.1,1.3,Iris-versicolor
190 | 5.5,2.5,4.0,1.3,Iris-versicolor
191 | 5.5,2.6,4.4,1.2,Iris-versicolor
192 | 6.1,3.0,4.6,1.4,Iris-versicolor
193 | 5.8,2.6,4.0,1.2,Iris-versicolor
194 | 5.0,2.3,3.3,1.0,Iris-versicolor
195 | 5.6,2.7,4.2,1.3,Iris-versicolor
196 | 5.7,3.0,4.2,1.2,Iris-versicolor
197 | 5.7,2.9,4.2,1.3,Iris-versicolor
198 | 6.2,2.9,4.3,1.3,Iris-versicolor
199 | 5.1,2.5,3.0,1.1,Iris-versicolor
200 | 5.7,2.8,4.1,1.3,Iris-versicolor
201 | 6.3,3.3,6.0,2.5,Iris-virginica
202 | 5.8,2.7,5.1,1.9,Iris-virginica
203 | 7.1,3.0,5.9,2.1,Iris-virginica
204 | 6.3,2.9,5.6,1.8,Iris-virginica
205 | 6.5,3.0,5.8,2.2,Iris-virginica
206 | 7.6,3.0,6.6,2.1,Iris-virginica
207 | 4.9,2.5,4.5,1.7,Iris-virginica
208 | 7.3,2.9,6.3,1.8,Iris-virginica
209 | 6.7,2.5,5.8,1.8,Iris-virginica
210 | 7.2,3.6,6.1,2.5,Iris-virginica
211 | 6.5,3.2,5.1,2.0,Iris-virginica
212 | 6.4,2.7,5.3,1.9,Iris-virginica
213 | 6.8,3.0,5.5,2.1,Iris-virginica
214 | 5.7,2.5,5.0,2.0,Iris-virginica
215 | 5.8,2.8,5.1,2.4,Iris-virginica
216 | 6.4,3.2,5.3,2.3,Iris-virginica
217 | 6.5,3.0,5.5,1.8,Iris-virginica
218 | 7.7,3.8,6.7,2.2,Iris-virginica
219 | 7.7,2.6,6.9,2.3,Iris-virginica
220 | 6.0,2.2,5.0,1.5,Iris-virginica
221 | 6.9,3.2,5.7,2.3,Iris-virginica
222 | 5.6,2.8,4.9,2.0,Iris-virginica
223 | 7.7,2.8,6.7,2.0,Iris-virginica
224 | 6.3,2.7,4.9,1.8,Iris-virginica
225 | 6.7,3.3,5.7,2.1,Iris-virginica
226 | 7.2,3.2,6.0,1.8,Iris-virginica
227 | 6.2,2.8,4.8,1.8,Iris-virginica
228 | 6.1,3.0,4.9,1.8,Iris-virginica
229 | 6.4,2.8,5.6,2.1,Iris-virginica
230 | 7.2,3.0,5.8,1.6,Iris-virginica
231 | 7.4,2.8,6.1,1.9,Iris-virginica
232 | 7.9,3.8,6.4,2.0,Iris-virginica
233 | 6.4,2.8,5.6,2.2,Iris-virginica
234 | 6.3,2.8,5.1,1.5,Iris-virginica
235 | 6.1,2.6,5.6,1.4,Iris-virginica
236 | 7.7,3.0,6.1,2.3,Iris-virginica
237 | 6.3,3.4,5.6,2.4,Iris-virginica
238 | 6.4,3.1,5.5,1.8,Iris-virginica
239 | 6.0,3.0,4.8,1.8,Iris-virginica
240 | 6.9,3.1,5.4,2.1,Iris-virginica
241 | 6.7,3.1,5.6,2.4,Iris-virginica
242 | 6.9,3.1,5.1,2.3,Iris-virginica
243 | 5.8,2.7,5.1,1.9,Iris-virginica
244 | 6.8,3.2,5.9,2.3,Iris-virginica
245 | 6.7,3.3,5.7,2.5,Iris-virginica
246 | 6.7,3.0,5.2,2.3,Iris-virginica
247 | 6.3,2.5,5.0,1.9,Iris-virginica
248 | 6.5,3.0,5.2,2.0,Iris-virginica
249 | 6.2,3.4,5.4,2.3,Iris-virginica
250 | 5.9,3.0,5.1,1.8,Iris-virginica
251 | ]]]]><![CDATA[>
252 | </data>
253 | ]]></Data>
254 | </XData>
255 | 
256 | <Storage name="Default">
257 | <Type>%Storage.Persistent</Type>
258 | <DataLocation>^DataMining.IrisDatasetD</DataLocation>
259 | <DefaultData>IrisDatasetDefaultData</DefaultData>
260 | <IdLocation>^DataMining.IrisDatasetD</IdLocation>
261 | <IndexLocation>^DataMining.IrisDatasetI</IndexLocation>
262 | <StreamLocation>^DataMining.IrisDatasetS</StreamLocation>
263 | <Data name="IrisDatasetDefaultData">
264 | <Structure>listnode</Structure>
265 | <Subscript/>
266 | <Value name="1">
267 | <Value>%%CLASSNAME</Value>
268 | </Value>
269 | <Value name="2">
270 | <Value>SepalLength</Value>
271 | </Value>
272 | <Value name="3">
273 | <Value>SepalWidth</Value>
274 | </Value>
275 | <Value name="4">
276 | <Value>PetalLength</Value>
277 | </Value>
278 | <Value name="5">
279 | <Value>PetalWidth</Value>
280 | </Value>
281 | <Value name="6">
282 | <Value>Species</Value>
283 | </Value>
284 | </Data>
285 | </Storage>
286 | </Class>
287 | 
288 | 
289 | <Class name="DataMining.PMML.Iris">
290 | <Description><![CDATA[
291 | Sample PMML file based on Iris measurements in <class>DataMining.IrisDataset</class>.]]></Description>
292 | <ProcedureBlock>1</ProcedureBlock>
293 | <Super>%DeepSee.PMML.Definition</Super>
294 | <TimeChanged>65204,51678.214</TimeChanged>
295 | <TimeCreated>62888,42919.529995</TimeCreated>
296 | 
297 | <XData name="PMML">
298 | <XMLNamespace>http://www.intersystems.com/deepsee/pmml</XMLNamespace>
299 | <Data><![CDATA[
300 | <PMML version="4.1">
301 | <Header>
302 | <Application name="KNIME" version="2.8.2"/>
303 | <Timestamp>03/11/2013 11:54:41</Timestamp>
304 | </Header>
305 | <DataDictionary numberOfFields="5">
306 | 
307 | <!-- This custom extension allows mapping table or cube data to model input -->
308 | <Extension name="isc:datasource">
309 | <X-SQLDataSource name="Analysis dataset" idField="ID">
310 | <X-FieldMap fieldName="PetalLength" spec="PetalLength" />
311 | <X-FieldMap fieldName="PetalWidth" spec="PetalWidth" />
312 | <X-FieldMap fieldName="SepalLength" spec="SepalLength" />
313 | <X-FieldMap fieldName="SepalWidth" spec="SepalWidth" />
314 | <X-FieldMap fieldName="Species" spec="Species" />
315 | <X-SQL>SELECT ID, PetalLength, PetalWidth, SepalLength, SepalWidth, Species FROM DataMining.IrisDataset</X-SQL>
316 | </X-SQLDataSource>
317 | 
318 | </Extension>
319 | 
320 | <DataField name="Species" optype="categorical" dataType="string">
321 | <Value value="Iris-setosa"/>
322 | <Value value="Iris-versicolor"/>
323 | <Value value="Iris-virginica"/>
324 | </DataField>
325 | <DataField name="PetalLength" optype="continuous" dataType="double" />
326 | <DataField name="PetalWidth" optype="continuous" dataType="double" />
327 | <DataField name="SepalLength" optype="continuous" dataType="double" />
328 | <DataField name="SepalWidth" optype="continuous" dataType="double" />
329 | </DataDictionary>
330 | 
331 | <TreeModel modelName="DecisionTree" functionName="classification" splitCharacteristic="binarySplit" missingValueStrategy="lastPrediction" noTrueChildStrategy="returnNullPrediction">
332 | <MiningSchema>
333 | <MiningField name="PetalLength" invalidValueTreatment="asIs"/>
334 | <MiningField name="PetalWidth" invalidValueTreatment="asIs"/>
335 | <MiningField name="SepalLength" invalidValueTreatment="asIs"/>
336 | <MiningField name="SepalWidth" invalidValueTreatment="asIs"/>
337 | <MiningField name="Species" invalidValueTreatment="asIs" usageType="predicted"/>
338 | </MiningSchema>
339 | <Output>
340 | <OutputField name="Species" feature="predictedValue" />
341 | <OutputField name="Probability" feature="probability" />
342 | </Output>
343 | <Node id="0" score="Iris-setosa" recordCount="150.0">
344 | <True/>
345 | <ScoreDistribution value="Iris-setosa" recordCount="50.0"/>
346 | <ScoreDistribution value="Iris-versicolor" recordCount="50.0"/>
347 | <ScoreDistribution value="Iris-virginica" recordCount="50.0"/>
348 | <Node id="1" score="Iris-setosa" recordCount="50.0">
349 | <SimplePredicate field="PetalWidth" operator="lessOrEqual" value="0.8"/>
350 | <ScoreDistribution value="Iris-setosa" recordCount="50.0"/>
351 | <ScoreDistribution value="Iris-versicolor" recordCount="0.0"/>
352 | <ScoreDistribution value="Iris-virginica" recordCount="0.0"/>
353 | </Node>
354 | <Node id="2" score="Iris-versicolor" recordCount="100.0">
355 | <SimplePredicate field="PetalWidth" operator="greaterThan" value="0.8"/>
356 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
357 | <ScoreDistribution value="Iris-versicolor" recordCount="50.0"/>
358 | <ScoreDistribution value="Iris-virginica" recordCount="50.0"/>
359 | <Node id="3" score="Iris-versicolor" recordCount="54.0">
360 | <SimplePredicate field="PetalWidth" operator="lessOrEqual" value="1.75"/>
361 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
362 | <ScoreDistribution value="Iris-versicolor" recordCount="49.0"/>
363 | <ScoreDistribution value="Iris-virginica" recordCount="5.0"/>
364 | <Node id="4" score="Iris-versicolor" recordCount="48.0">
365 | <SimplePredicate field="PetalLength" operator="lessOrEqual" value="4.95"/>
366 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
367 | <ScoreDistribution value="Iris-versicolor" recordCount="47.0"/>
368 | <ScoreDistribution value="Iris-virginica" recordCount="1.0"/>
369 | </Node>
370 | <Node id="7" score="Iris-virginica" recordCount="6.0">
371 | <SimplePredicate field="PetalLength" operator="greaterThan" value="4.95"/>
372 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
373 | <ScoreDistribution value="Iris-versicolor" recordCount="2.0"/>
374 | <ScoreDistribution value="Iris-virginica" recordCount="4.0"/>
375 | <Node id="8" score="Iris-virginica" recordCount="3.0">
376 | <SimplePredicate field="PetalWidth" operator="lessOrEqual" value="1.55"/>
377 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
378 | <ScoreDistribution value="Iris-versicolor" recordCount="0.0"/>
379 | <ScoreDistribution value="Iris-virginica" recordCount="3.0"/>
380 | </Node>
381 | <Node id="9" score="Iris-versicolor" recordCount="3.0">
382 | <SimplePredicate field="PetalWidth" operator="greaterThan" value="1.55"/>
383 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
384 | <ScoreDistribution value="Iris-versicolor" recordCount="2.0"/>
385 | <ScoreDistribution value="Iris-virginica" recordCount="1.0"/>
386 | </Node>
387 | </Node>
388 | </Node>
389 | <Node id="10" score="Iris-virginica" recordCount="46.0">
390 | <SimplePredicate field="PetalWidth" operator="greaterThan" value="1.75"/>
391 | <ScoreDistribution value="Iris-setosa" recordCount="0.0"/>
392 | <ScoreDistribution value="Iris-versicolor" recordCount="1.0"/>
393 | <ScoreDistribution value="Iris-virginica" recordCount="45.0"/>
394 | </Node>
395 | </Node>
396 | </Node>
397 | </TreeModel>
398 | 
399 | 
400 | <GeneralRegressionModel modelName="SepalLength Regression" modelType="generalizedLinear" 
401 | functionName="regression" algorithmName="glm" distribution="normal" linkFunction="identity">
402 | <MiningSchema>
403 | <MiningField name="SepalLength" usageType="predicted"/>
404 | <MiningField name="SepalWidth" usageType="active"/>
405 | <MiningField name="PetalLength" usageType="active"/>
406 | <MiningField name="PetalWidth" usageType="active"/>
407 | <MiningField name="Species" usageType="active"/>
408 | </MiningSchema>
409 | 
410 | <ParameterList>
411 | <Parameter name="p0" label="Intercept"/>
412 | <Parameter name="p1" label="SepalWidth"/>
413 | <Parameter name="p2" label="PetalLength"/>
414 | <Parameter name="p3" label="PetalWidth"/>
415 | <Parameter name="p4" label="classIris-versicolor"/>
416 | <Parameter name="p5" label="classIris-virginica"/>
417 | </ParameterList>
418 | <FactorList>
419 | <Predictor name="Species"/>
420 | </FactorList>
421 | <CovariateList>
422 | <Predictor name="SepalWidth"/>
423 | <Predictor name="PetalLength"/>
424 | <Predictor name="PetalWidth"/>
425 | </CovariateList>
426 | <PPMatrix>
427 | <PPCell value="1" predictorName="SepalWidth" parameterName="p1"/>
428 | <PPCell value="1" predictorName="PetalLength" parameterName="p2"/>
429 | <PPCell value="1" predictorName="PetalWidth" parameterName="p3"/>
430 | <PPCell value="Iris-versicolor" predictorName="Species" parameterName="p4"/>
431 | <PPCell value="Iris-virginica" predictorName="Species" parameterName="p5"/>
432 | </PPMatrix>
433 | <ParamMatrix>
434 | <PCell parameterName="p0" df="1" beta="2.22413677241697"/>
435 | <PCell parameterName="p1" df="1" beta="0.462828301295544"/>
436 | <PCell parameterName="p2" df="1" beta="0.912364461921148"/>
437 | <PCell parameterName="p3" df="1" beta="-0.384539403624331"/>
438 | <PCell parameterName="p4" df="1" beta="-0.944151669544111"/>
439 | <PCell parameterName="p5" df="1" beta="-1.33483727247749"/>
440 | </ParamMatrix>
441 | </GeneralRegressionModel>
442 | </PMML>
443 | ]]></Data>
444 | </XData>
445 | </Class>
446 | </Export>
447 | 


--------------------------------------------------------------------------------
/iris-aa-server/data/Loader.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Export generator="IRIS" version="26" zv="IRIS for Windows (x86-64) 2019.4.0ML (Build 139U)" ts="2019-11-13 19:34:16">
 3 | <Class name="Util.Loader">
 4 | <TimeChanged>65330,70445.358752</TimeChanged>
 5 | <TimeCreated>65330,67823.562348</TimeCreated>
 6 | 
 7 | <Method name="Load">
 8 | <ClassMethod>1</ClassMethod>
 9 | <FormalSpec>pFile:%String,pClass:%String,pSeparator:%String=","</FormalSpec>
10 | <ReturnType>%Status</ReturnType>
11 | <Implementation><![CDATA[
12 | 	set tSC = $$$OK
13 | 	try {
14 | 		do $classmethod(pClass,"%KillExtent")
15 | 		set tClassDef = ##class(%Dictionary.ClassDefinition).%OpenId(pClass)
16 | 		for i=1:1:tClassDef.Properties.Count() {
17 | 			set tProp = tClassDef.Properties.GetAt(i)
18 | 			set tProps($$$UPPER(tProp.Name)) = tProp.Name
19 | 			set tPropTypes($$$UPPER(tProp.Name)) = tProp.Type
20 | 		}
21 | 		
22 | 		set tFile = ##class(%Stream.FileCharacter).%New()
23 | 		do tFile.LinkToFile(pFile)
24 | 		set tHeader = $zstrip(tFile.ReadLine(),"<>WC")
25 | 		for i = 1:1:$l(tHeader,pSeparator) {
26 | 			set h = $p(tHeader,pSeparator,i)
27 | 			quit:h=""
28 | 			set tColumns($i(tColumns))=tProps($$$UPPER($tr(h,"_"))),
29 | 				tTypes(tColumns) = tPropTypes($$$UPPER($tr(h,"_")))
30 | 		}
31 | 		
32 | 		while 'tFile.AtEnd {
33 | 			set tLine = $zstrip(tFile.ReadLine(),"<>WC")
34 | 			continue:tLine=""
35 | 			
36 | 			set tObj = $classmethod(pClass,"%New")
37 | 			for i=1:1:tColumns {
38 | 				set tValue = $piece(tLine,pSeparator,i)
39 | 				set:tTypes(i)="%Library.Date" tValue = $zdateh($tr(tValue,"/","-"),3)
40 | 				set $property(tObj, tColumns(i)) = tValue
41 | 			}
42 | 			set tSC = tObj.%Save()
43 | 			quit:$$$ISERR(tSC)
44 | 			set c = $i(c)
45 | 		}
46 | 		quit:$$$ISERR(tSC)
47 | 		
48 | 		write !,"Read ",c," records for table ",pClass
49 | 		
50 | 	} catch (ex) {
51 | 		set tSC = ex.AsStatus()
52 | 	}
53 | 	quit tSC
54 | ]]></Implementation>
55 | </Method>
56 | </Class>
57 | </Export>
58 | 


--------------------------------------------------------------------------------
/iris-aa-server/data/LoanPerformance.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Export generator="IRIS" version="26" zv="IRIS for UNIX (Ubuntu Server LTS for x86-64 Containers) 2019.4.0ML (Build 140U)" ts="2019-11-12 09:21:03">
  3 | <Class name="User.LoanPerformance">
  4 | <Super>%Persistent</Super>
  5 | <TimeChanged>65329,33578.231722</TimeChanged>
  6 | <TimeCreated>65268,55574.517277</TimeCreated>
  7 | 
  8 | <Property name="DisbursedAmount">
  9 | <Type>%Integer</Type>
 10 | </Property>
 11 | 
 12 | <Property name="AssetCost">
 13 | <Type>%Integer</Type>
 14 | </Property>
 15 | 
 16 | <Property name="LTV">
 17 | <Type>%Double</Type>
 18 | </Property>
 19 | 
 20 | <Property name="BranchId">
 21 | <Type>%Integer</Type>
 22 | </Property>
 23 | 
 24 | <Property name="SupplierId">
 25 | <Type>%Integer</Type>
 26 | </Property>
 27 | 
 28 | <Property name="ManufacturerId">
 29 | <Type>%Integer</Type>
 30 | </Property>
 31 | 
 32 | <Property name="CurrentPincodeId">
 33 | <Type>%Integer</Type>
 34 | </Property>
 35 | 
 36 | <Property name="DateOfBirth">
 37 | <Type>%Date</Type>
 38 | </Property>
 39 | 
 40 | <Property name="EmploymentType">
 41 | <Type>%String</Type>
 42 | </Property>
 43 | 
 44 | <Property name="DisbursalDate">
 45 | <Type>%Date</Type>
 46 | </Property>
 47 | 
 48 | <Property name="StateId">
 49 | <Type>%Integer</Type>
 50 | </Property>
 51 | 
 52 | <Property name="EmployeeCodeId">
 53 | <Type>%Integer</Type>
 54 | </Property>
 55 | 
 56 | <Property name="MobileNumberAvailable">
 57 | <Type>%Boolean</Type>
 58 | </Property>
 59 | 
 60 | <Property name="Aadhar">
 61 | <Type>%Boolean</Type>
 62 | </Property>
 63 | 
 64 | <Property name="PAN">
 65 | <Type>%Boolean</Type>
 66 | </Property>
 67 | 
 68 | <Property name="Voter">
 69 | <Type>%Boolean</Type>
 70 | </Property>
 71 | 
 72 | <Property name="Driving">
 73 | <Type>%Boolean</Type>
 74 | </Property>
 75 | 
 76 | <Property name="Passport">
 77 | <Type>%Boolean</Type>
 78 | </Property>
 79 | 
 80 | <Property name="CNSScore">
 81 | <Type>%Integer</Type>
 82 | </Property>
 83 | 
 84 | <Property name="CNSScoreDescription">
 85 | <Type>%String</Type>
 86 | <Parameter name="MAXLEN" value="256"/>
 87 | </Property>
 88 | 
 89 | <Property name="PrimaryAccounts">
 90 | <Type>%Integer</Type>
 91 | </Property>
 92 | 
 93 | <Property name="PrimaryAccountsActive">
 94 | <Type>%Integer</Type>
 95 | </Property>
 96 | 
 97 | <Property name="PrimaryAccountsOverdue">
 98 | <Type>%Integer</Type>
 99 | </Property>
100 | 
101 | <Property name="PrimaryCurrentBalance">
102 | <Type>%Integer</Type>
103 | </Property>
104 | 
105 | <Property name="PrimarySanctionedAmount">
106 | <Type>%Integer</Type>
107 | </Property>
108 | 
109 | <Property name="PrimaryDisbursedAmount">
110 | <Type>%Integer</Type>
111 | </Property>
112 | 
113 | <Property name="SecondaryAccounts">
114 | <Type>%Integer</Type>
115 | </Property>
116 | 
117 | <Property name="SecondaryAccountsActive">
118 | <Type>%Integer</Type>
119 | </Property>
120 | 
121 | <Property name="SecondaryAccountsOverdue">
122 | <Type>%Integer</Type>
123 | </Property>
124 | 
125 | <Property name="SecondaryCurrentBalance">
126 | <Type>%Integer</Type>
127 | </Property>
128 | 
129 | <Property name="SecondarySanctionedAmount">
130 | <Type>%Integer</Type>
131 | </Property>
132 | 
133 | <Property name="SecondaryDisbursedAmount">
134 | <Type>%Integer</Type>
135 | </Property>
136 | 
137 | <Property name="PrimaryInstallmentAmount">
138 | <Type>%Integer</Type>
139 | </Property>
140 | 
141 | <Property name="SecondaryInstallmentAmount">
142 | <Type>%Integer</Type>
143 | </Property>
144 | 
145 | <Property name="NewAccountsLastSixMonths">
146 | <Type>%Integer</Type>
147 | </Property>
148 | 
149 | <Property name="DelinquentAccountsLastSixMonths">
150 | <Type>%Integer</Type>
151 | </Property>
152 | 
153 | <Property name="AverageAccountAgeMonths">
154 | <Type>%Integer</Type>
155 | </Property>
156 | 
157 | <Property name="CreditHistoryLengthMonths">
158 | <Type>%Integer</Type>
159 | </Property>
160 | 
161 | <Property name="Inquiries">
162 | <Type>%Integer</Type>
163 | </Property>
164 | 
165 | <Property name="LoanDefault">
166 | <Type>%Boolean</Type>
167 | </Property>
168 | 
169 | <Index name="LoanDefault">
170 | <Type>bitmap</Type>
171 | <Properties>LoanDefault</Properties>
172 | </Index>
173 | 
174 | <Index name="AssetCost">
175 | <Properties>AssetCost</Properties>
176 | </Index>
177 | 
178 | <Method name="Load">
179 | <ClassMethod>1</ClassMethod>
180 | <FormalSpec>csv:%String</FormalSpec>
181 | <Implementation><![CDATA[
182 | 	Do ..%KillExtent()
183 | 	Set in=##class(%Stream.FileCharacter).%New()
184 | 	Set sc=in.LinkToFile(csv)
185 | 	Set count = 0
186 | 	While 'in.AtEnd {
187 | 		Set line=in.ReadLine()
188 | 		If '+line CONTINUE //skip header
189 | 		Do ..LoadOne(line)
190 | 		Set count = count + 1
191 | 	}
192 | 	Write count _ " records loaded",!
193 | ]]></Implementation>
194 | </Method>
195 | 
196 | <Method name="LoadOne">
197 | <ClassMethod>1</ClassMethod>
198 | <FormalSpec>line:%String</FormalSpec>
199 | <Implementation><![CDATA[
200 | 	Set o = ..%New()
201 | 	Set col = 1
202 | 	Set o.DisbursedAmount = $p(line,",",$i(col))
203 | 	Set o.AssetCost = $p(line,",",$i(col))
204 | 	Set o.LTV = $p(line,",",$i(col))
205 | 	Set o.BranchId = $p(line,",",$i(col))
206 | 	Set o.SupplierId = $p(line,",",$i(col))
207 | 	Set o.ManufacturerId = $p(line,",",$i(col))
208 | 	Set o.CurrentPincodeId = $p(line,",",$i(col))
209 | 	Set o.DateOfBirth = ..ToDate($p(line,",",$i(col)))
210 | 	Set o.EmploymentType = $p(line,",",$i(col))
211 | 	Set o.DisbursalDate = ..ToDate($p(line,",",$i(col)))
212 | 	Set o.StateId = $p(line,",",$i(col))
213 | 	Set o.EmployeeCodeId = $p(line,",",$i(col))
214 | 	Set o.MobileNumberAvailable = $p(line,",",$i(col))
215 | 	Set o.Aadhar = $p(line,",",$i(col))
216 | 	Set o.PAN = $p(line,",",$i(col))
217 | 	Set o.Voter = $p(line,",",$i(col))
218 | 	Set o.Driving = $p(line,",",$i(col))
219 | 	Set o.Passport = $p(line,",",$i(col))
220 | 	Set o.CNSScore = $p(line,",",$i(col))
221 | 	Set o.CNSScoreDescription = $p(line,",",$i(col))
222 | 	Set o.PrimaryAccounts = $p(line,",",$i(col))
223 | 	Set o.PrimaryAccountsActive = $p(line,",",$i(col))
224 | 	Set o.PrimaryAccountsOverdue = $p(line,",",$i(col))
225 | 	Set o.PrimaryCurrentBalance = $p(line,",",$i(col))
226 | 	Set o.PrimarySanctionedAmount = $p(line,",",$i(col))
227 | 	Set o.PrimaryDisbursedAmount = $p(line,",",$i(col))
228 | 	Set o.SecondaryAccounts = $p(line,",",$i(col))
229 | 	Set o.SecondaryAccountsActive = $p(line,",",$i(col))
230 | 	Set o.SecondaryAccountsOverdue = $p(line,",",$i(col))
231 | 	Set o.SecondaryCurrentBalance = $p(line,",",$i(col))
232 | 	Set o.SecondarySanctionedAmount = $p(line,",",$i(col))
233 | 	Set o.SecondaryDisbursedAmount = $p(line,",",$i(col))
234 | 	Set o.PrimaryInstallmentAmount = $p(line,",",$i(col))
235 | 	Set o.SecondaryInstallmentAmount = $p(line,",",$i(col))
236 | 	Set o.NewAccountsLastSixMonths = $p(line,",",$i(col))
237 | 	Set o.DelinquentAccountsLastSixMonths = $p(line,",",$i(col))
238 | 	Set o.AverageAccountAgeMonths = ..ToMonths($p(line,",",$i(col)))
239 | 	Set o.CreditHistoryLengthMonths = ..ToMonths($p(line,",",$i(col)))
240 | 	Set o.Inquiries = $p(line,",",$i(col))
241 | 	Set o.LoanDefault = $p(line,",",$i(col))
242 | 	Set sc = o.%Save()
243 | 	If 'sc throw ##class(%Exception.StatusException).CreateFromStatus(sc)
244 | ]]></Implementation>
245 | </Method>
246 | 
247 | <Method name="ToMonths">
248 | <ClassMethod>1</ClassMethod>
249 | <FormalSpec>str</FormalSpec>
250 | <ReturnType>%Integer</ReturnType>
251 | <Implementation><![CDATA[
252 | 	Set months = 0
253 | 	Set months = months + (+str * 12)
254 | 	Set months = months + (+$p(str," ",2))
255 | 	Return months
256 | ]]></Implementation>
257 | </Method>
258 | 
259 | <Method name="ToDate">
260 | <ClassMethod>1</ClassMethod>
261 | <FormalSpec>str</FormalSpec>
262 | <ReturnType>%Date</ReturnType>
263 | <Implementation><![CDATA[
264 | 	Set day = $p(str,"-",1)
265 | 	Set mon = $p(str,"-",2)
266 | 	Set yr2 = $p(str,"-",3)
267 | 	Set yr = $s(yr2 < 20:"20",1:"19")_yr2
268 | 	Return ##class(%Date).DisplayToLogical(mon_"/"_day_"/"_yr)
269 | ]]></Implementation>
270 | </Method>
271 | 
272 | <Storage name="Default">
273 | <Type>%Storage.Persistent</Type>
274 | <DataLocation>^User.LoanPerformanceD</DataLocation>
275 | <DefaultData>LoanPerformanceDefaultData</DefaultData>
276 | <IdLocation>^User.LoanPerformanceD</IdLocation>
277 | <IndexLocation>^User.LoanPerformanceI</IndexLocation>
278 | <StreamLocation>^User.LoanPerformanceS</StreamLocation>
279 | <IdFunction>sequence</IdFunction>
280 | <Data name="LoanPerformanceDefaultData">
281 | <Value name="1">
282 | <Value>%%CLASSNAME</Value>
283 | </Value>
284 | <Value name="2">
285 | <Value>DisbursedAmount</Value>
286 | </Value>
287 | <Value name="3">
288 | <Value>AssetCost</Value>
289 | </Value>
290 | <Value name="4">
291 | <Value>LTV</Value>
292 | </Value>
293 | <Value name="5">
294 | <Value>BranchId</Value>
295 | </Value>
296 | <Value name="6">
297 | <Value>SupplierId</Value>
298 | </Value>
299 | <Value name="7">
300 | <Value>ManufacturerId</Value>
301 | </Value>
302 | <Value name="8">
303 | <Value>CurrentPincodeId</Value>
304 | </Value>
305 | <Value name="9">
306 | <Value>DateOfBirth</Value>
307 | </Value>
308 | <Value name="10">
309 | <Value>EmploymentType</Value>
310 | </Value>
311 | <Value name="11">
312 | <Value>DisbursalDate</Value>
313 | </Value>
314 | <Value name="12">
315 | <Value>StateId</Value>
316 | </Value>
317 | <Value name="13">
318 | <Value>EmployeeCodeId</Value>
319 | </Value>
320 | <Value name="14">
321 | <Value>MobileNumberAvailable</Value>
322 | </Value>
323 | <Value name="15">
324 | <Value>Aadhar</Value>
325 | </Value>
326 | <Value name="16">
327 | <Value>PAN</Value>
328 | </Value>
329 | <Value name="17">
330 | <Value>Voter</Value>
331 | </Value>
332 | <Value name="18">
333 | <Value>Driving</Value>
334 | </Value>
335 | <Value name="19">
336 | <Value>Passport</Value>
337 | </Value>
338 | <Value name="20">
339 | <Value>CNSScore</Value>
340 | </Value>
341 | <Value name="21">
342 | <Value>PrimaryAccounts</Value>
343 | </Value>
344 | <Value name="22">
345 | <Value>PrimaryAccountsActive</Value>
346 | </Value>
347 | <Value name="23">
348 | <Value>PrimaryAccountsOverdue</Value>
349 | </Value>
350 | <Value name="24">
351 | <Value>PrimaryCurrentBalance</Value>
352 | </Value>
353 | <Value name="25">
354 | <Value>PrimarySanctionedAmount</Value>
355 | </Value>
356 | <Value name="26">
357 | <Value>PrimaryDisbursedAmount</Value>
358 | </Value>
359 | <Value name="27">
360 | <Value>SecondaryAccounts</Value>
361 | </Value>
362 | <Value name="28">
363 | <Value>SecondaryAccoutsActive</Value>
364 | </Value>
365 | <Value name="29">
366 | <Value>SecondaryAccountsOverdue</Value>
367 | </Value>
368 | <Value name="30">
369 | <Value>SecondaryCurrentBalance</Value>
370 | </Value>
371 | <Value name="31">
372 | <Value>SecondarySanctionedAmount</Value>
373 | </Value>
374 | <Value name="32">
375 | <Value>SecondaryDisbursedAmount</Value>
376 | </Value>
377 | <Value name="33">
378 | <Value>PrimaryInstallmentAmount</Value>
379 | </Value>
380 | <Value name="34">
381 | <Value>SecondaryInstallmentAmount</Value>
382 | </Value>
383 | <Value name="35">
384 | <Value>NewAccountsLastSixMonths</Value>
385 | </Value>
386 | <Value name="36">
387 | <Value>DelinquentAccountsLastSixMonths</Value>
388 | </Value>
389 | <Value name="37">
390 | <Value>AverageAccountAgeMonths</Value>
391 | </Value>
392 | <Value name="38">
393 | <Value>CreditHistoryLengthMonths</Value>
394 | </Value>
395 | <Value name="39">
396 | <Value>Inquiries</Value>
397 | </Value>
398 | <Value name="40">
399 | <Value>LoanDefault</Value>
400 | </Value>
401 | <Value name="41">
402 | <Value>CNSScoreDescription</Value>
403 | </Value>
404 | <Value name="42">
405 | <Value>SecondaryAccountsActive</Value>
406 | </Value>
407 | </Data>
408 | </Storage>
409 | </Class>
410 | </Export>
411 | 


--------------------------------------------------------------------------------
/iris-aa-server/data/NLPUtils.cls:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Export generator="IRIS" version="26">
  3 | <Class name="%ML.NLPUtils">
  4 | <IncludeCode>%IKInclude</IncludeCode>
  5 | <TimeCreated>65407,36216.674515</TimeCreated>
  6 | 
  7 | <Method name="DropView">
  8 | <ClassMethod>1</ClassMethod>
  9 | <FormalSpec>pViewName:%String</FormalSpec>
 10 | <ReturnType>%Status</ReturnType>
 11 | <SqlName>Drop_NLP</SqlName>
 12 | <SqlProc>1</SqlProc>
 13 | <Implementation><![CDATA[
 14 | 	set tSC = $$$OK
 15 | 	try {
 16 | 		
 17 | 		// drop domain
 18 | 		do $classmethod(pViewName_".Domain","%DropData")
 19 | 		do $system.OBJ.Delete(pViewName_".Domain","-d")
 20 | 		
 21 | 		// drop table
 22 | 		do $classmethod(pViewName_".Features","%KillExtent")
 23 | 		do $system.OBJ.Delete(pViewName_".Features","-d")
 24 | 		
 25 | 		// drop view
 26 | 		do $system.OBJ.Delete(pViewName,"-d")
 27 | 		
 28 | 	} catch (ex) {
 29 | 		set tSC = ex.AsStatus()
 30 | 	}
 31 | 	quit tSC
 32 | ]]></Implementation>
 33 | </Method>
 34 | 
 35 | <Method name="BuildView">
 36 | <ClassMethod>1</ClassMethod>
 37 | <FormalSpec><![CDATA[pFrom:%String,pTextColumn:%String,&pViewName:%String,pIDColumn:%String="ID",pTopCount:%Integer=200,pTopMetric:%String="freq",pVerbose:%Boolean=1]]></FormalSpec>
 38 | <ReturnType>%Status</ReturnType>
 39 | <SqlName>Build_NLP</SqlName>
 40 | <SqlProc>1</SqlProc>
 41 | <Implementation><![CDATA[
 42 | 	set tSC = $$$OK
 43 | 	try {
 44 | 		// check / generate name
 45 | 		if $g(pViewName)="" {
 46 | 			set i = "", pViewName = $s($zname(pFrom,4):pFrom, 1:"NLP.Temp")
 47 | 			while $$$defClassDefined(pViewName_i) { set i=i+1 }
 48 | 			set pViewName = pViewName_i
 49 | 		} elseif '$zname(pViewName,4) {
 50 | 			set tSC = $$$ERROR($$$GeneralError, "Invalid name for view: "_pViewName)
 51 | 			quit:$$$ISERR(tSC)
 52 | 		}
 53 | 		
 54 | 		set tTopMetric = $e($$$UPPER(pTopMetric),1,4)
 55 | 		
 56 | 		// check query
 57 | 		set tStatement = ##class(%SQL.Statement).%New()
 58 | 		set tSQL = $s($f(pFrom," "):pFrom,1:"SELECT * FROM "_pFrom)
 59 | 		set tSC = tStatement.%Prepare(tSQL)
 60 | 		quit:$$$ISERR(tSC)
 61 | 		
 62 | 		// iKnow Domain
 63 | 		set tBuildFlags = $tr($$$MINBUILDFLAGS,$c($$$IKBFNEVER,$$$IKBFALWAYS),"09"),
 64 | 			$e(tBuildFlags,$$$IKBENTUNIDET)=9
 65 | 		set:tTopMetric="FREQ" $e(tBuildFlags,$$$IKBENTUNIFREQ)=9
 66 | 		set:tTopMetric="SPRE" $e(tBuildFlags,$$$IKBENTUNISPREAD)=9
 67 | 		set tDomain = ##class(%iKnow.Model.domain).%New()
 68 | 		set tDomain.name = pViewName
 69 | 		set tDomain.buildFlags = tBuildFlags
 70 | 		set tDomain.data = ##class(%iKnow.Model.data).%New()
 71 | 		set tDomainSource = ##class(%iKnow.Model.listQuery).%New()
 72 | 		set tDomainSource.data = tDomain.data
 73 | 		set tDomainSource.sql = tSQL
 74 | 		set tDomainSource.idField = pIDColumn
 75 | 		set tDomainSource.groupField = pIDColumn
 76 | 		set tDomainSource.dataFields = pTextColumn
 77 | 		set tDomainParam = ##class(%iKnow.Model.parameter).%New()
 78 | 		set tDomainParam.parent = tDomain
 79 | 		set tDomainParam.name = $$$IKPSIMPLEEXTIDS
 80 | 		set tDomainParam.value = 1
 81 | 		if pVerbose {
 82 | 			set tDomainParam = ##class(%iKnow.Model.parameter).%New()
 83 | 			set tDomainParam.parent = tDomain
 84 | 			set tDomainParam.name = $$$IKPSTATUS
 85 | 			set tDomainParam.value = 1
 86 | 		}
 87 | 		
 88 | 		set tSC = tDomain.%SaveToClass(pViewName_".Domain",0,pVerbose,1)
 89 | 		quit:$$$ISERR(tSC)
 90 | 		
 91 | 		
 92 | 		// Feature table
 93 | 		set tTableClass = ##class(%Dictionary.ClassDefinition).%New()
 94 | 		set tTableClass.Name = pViewName_".Features"
 95 | 		set tTableClass.SqlRowIdPrivate = 1
 96 | 		set tTableClass.ProcedureBlock = 1
 97 | 		set tTableClass.Super = "%Persistent"
 98 | 		
 99 | 		do AddColumn(tTableClass, "NLP_ID", "Identifier corresponding to '"_pIDColumn_"' in source query")
100 | 		set tIndexDef = ##class(%Dictionary.IndexDefinition).%New()
101 | 		set tIndexDef.parent = tTableClass
102 | 		set tIndexDef.Name = "IDKEY"
103 | 		set tIndexDef.Properties = "NLPID"
104 | 		set tIndexDef.Unique = 1
105 | 		set tIndexDef.PrimaryKey = 1
106 | 		set tIndexDef.IdKey = 1
107 | 		
108 | 		set tSC = tTableClass.%Save()
109 | 		quit:$$$ISERR(tSC)
110 | 		
111 | 		// view
112 | 		set tViewClass = ##class(%Dictionary.ClassDefinition).%New()
113 | 		set tViewClass.Name = pViewName
114 | 		set tViewClass.ClassType = "view"
115 | 		set tViewClass.ViewQuery = "SELECT * FROM "_$s($f(pFrom," "):"("_pFrom_")",1:pFrom)_" s JOIN "_$tr(pViewName,".","_")_".Features f ON f.NLP_ID = s."_pIDColumn
116 | 		set tViewClass.CompileAfter = tTableClass.Name
117 | 		set tViewClass.ProcedureBlock = 1
118 | 		w:pVerbose !,"View query:",!,tViewClass.ViewQuery,!
119 | 		
120 | 		set tSC = tViewClass.%Save()
121 | 		quit:$$$ISERR(tSC)
122 | 		
123 | 		set tSC = $system.OBJ.Compile(tViewClass.Name,"ck"_$s(pVerbose:"d",1:"-d"))
124 | 		quit:$$$ISERR(tSC)
125 | 		
126 | 		// build domain
127 | 		set tSC = $classmethod(pViewName_".Domain","%Build",pVerbose)
128 | 		quit:$$$ISERR(tSC)
129 | 		
130 | 		// select features
131 | 		write:pVerbose !,"Retrieving top ",pTopCount," entities by ",pTopMetric
132 | 		set tDomainId = $system.iKnow.GetDomainId(pViewName)
133 | 		if tTopMetric="FREQ" {
134 | 			set tSC = ##class(%iKnow.Queries.EntityAPI).GetTop(.tEntities, tDomainId, 1, pTopCount,,,$$$SORTBYFREQUENCY)
135 | 		} elseif tTopMetric="SPRE" {
136 | 			set tSC = ##class(%iKnow.Queries.EntityAPI).GetTop(.tEntities, tDomainId, 1, pTopCount,,,$$$SORTBYSPREAD)
137 | 		} elseif tTopMetric="BM25" {
138 | 			set tSC = ##class(%iKnow.Queries.EntityAPI).GetTopBM25(.tEntities, tDomainId, 1, pTopCount)
139 | 		} elseif tTopMetric="TFID" {
140 | 			set tSC = ##class(%iKnow.Queries.EntityAPI).GetTopTFIDF(.tEntities, tDomainId, 1, pTopCount)
141 | 		} else {
142 | 			set tSC = $$$ERROR($$$GeneralError, "Unknown entity selection metric: "_pTopMetric)
143 | 		}
144 | 		quit:$$$ISERR(tSC)
145 | 		write:pVerbose !,"Setting up feature table"
146 | 		
147 | 		set i="", tEntUniIds=""
148 | 		for {
149 | 			set i = $order(tEntities(i),1,tEntity)
150 | 			quit:i=""
151 | 			
152 | 			set tEntUniId = $lg(tEntity,1),
153 | 				tEntValue = $lg(tEntity,2),
154 | 				tPropName = "NLP"_$tr(tEntValue,"_- ""'+$=<>[](){}/\|.;,:=?"),
155 | 				tPropNames(tEntUniId)=tPropName,
156 | 				tEntUniIds = tEntUniIds_","_tEntUniId
157 | 			
158 | 			if $d(tPropIndex(tPropName)) {
159 | 				// duplicate! - ignore for now, we'll just sum up frequencies
160 | 			} else {
161 | 				do AddColumn(tTableClass,tPropName,"Frequency for '"_tEntValue_"' (ID:"_tEntUniId_")") 
162 | 			}
163 | 			set tPropIndex(tPropName,tEntUniId)=""
164 | 		}
165 | 		set tSC = tTableClass.%Save()
166 | 		quit:$$$ISERR(tSC)
167 | 		
168 | 		set tSC = $system.OBJ.Compile(tTableClass.Name,"ck"_$s(pVerbose:"d",1:"-d"))
169 | 		quit:$$$ISERR(tSC)
170 | 		
171 | 		write:pVerbose !,"Populating feature table"
172 | 		set tSC = tStatement.%Prepare("SELECT EntUniId, FrequencyAsConcept+FrequencyAsRelation Frequency FROM %iKnow_Objects.EntityInSourceDetails WHERE DomainId = "_tDomainId_" AND SourceId = ? AND EntUniId IN ("_$e(tEntUniIds,2,*)_")")
173 | 		quit:$$$ISERR(tSC)
174 | 		set tRS1 = ##class(%SQL.Statement).%ExecDirect(,"SELECT SourceId, LocalReference FROM %iKnow_Objects.Source where domainid = "_tDomainId)
175 | 		while tRS1.%Next() {
176 | 			set tRow = $classmethod(tTableClass.Name,"%New")
177 | 			set tRow.NLPID = tRS1.%Get("LocalReference")
178 | 			set tRS2 = tStatement.%Execute(tRS1.%Get("SourceId"))
179 | 			while tRS2.%Next() {
180 | 				set $property(tRow, tPropNames(tRS2.%Get("EntUniId"))) = $property(tRow, tPropNames(tRS2.%Get("EntUniId"))) + tRS2.%Get("Frequency")
181 | 			}
182 | 			set tSC = tRow.%Save()
183 | 			quit:$$$ISERR(tSC)
184 | 		}
185 | 		quit:$$$ISERR(tSC)
186 | 		
187 | 		write:pVerbose !
188 | 		
189 | 	} catch (ex) {
190 | 		set tSC = ex.AsStatus()
191 | 	}
192 | 	if $$$ISERR(tSC) && pVerbose {
193 | 		write !
194 | 		do $system.OBJ.DisplayError(tSC)
195 | 		write !
196 | 	}
197 | 	quit tSC
198 | 	
199 | AddColumn(cls, name, desc, type="%Integer")
200 | 	set prop = ##class(%Dictionary.PropertyDefinition).%New()
201 | 	set prop.parent = cls
202 | 	if '$zname(name,6) {
203 | 		set prop.SqlFieldName = name
204 | 		set prop.Name = $tr(name,"_- ""'+$=<>[](){}/\|.;,:=?")
205 | 	} else {
206 | 		set prop.Name = name
207 | 	}
208 | 	set prop.Description = desc
209 | 	set prop.Type = type
210 | 	quit
211 | 	
212 | AddParam(cls, name, value)
213 | 	set param = ##class(%Dictionary.ParameterDefinition).%New()
214 | 	set param.parent = cls
215 | 	set param.Name = name
216 | 	set param.Default = value
217 | ]]></Implementation>
218 | </Method>
219 | </Class>
220 | </Export>
221 | 


--------------------------------------------------------------------------------
/iris-aa-server/data/hate-speech.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/iris-aa-server/data/hate-speech.tar


--------------------------------------------------------------------------------
/iris-aa-server/data/loans.gof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/iris-aa-server/data/loans.gof


--------------------------------------------------------------------------------
/iris-aa-server/iris.script:
--------------------------------------------------------------------------------
 1 |  zn "%SYS"
 2 |  w ##class(Security.Users).UnExpireUserPasswords("*")
 3 | 
 4 |  zn "USER"
 5 |  zpm "install sslclient"
 6 |  zpm "install csvgen"
 7 |  zpm "install webterminal"
 8 | 
 9 |  // Load all the class definitions from the data directory
10 |  do $system.OBJ.LoadDir("/data","cuk",,1)
11 | 
12 |  // Show how csvgen can load data from csv files locally or remotely
13 |  do ##class(community.csvgen).Generate("/data/titanic.csv",",","Titanic.Passenger")
14 |  do ##class(community.csvgen).GenerateFromURL("https://raw.githubusercontent.com/datasciencedojo/datasets/master/WorldDBTables/CountryTable.csv",",","SQLUser.Countries")
15 |  do ##class(community.csvgen).Generate("/data/readmission.csv",",","Patient.Readmission")
16 |  do ##class(community.csvgen).Generate("/data/Campaign.csv",";","Marketing.Campaign")
17 |  do ##class(community.csvgen).Generate("/data/breast-cancer.csv",",","Biomedical.BreastCancer")
18 | 
19 |  // Load globals and build indices for the LoanPerformance table
20 |  do $system.OBJ.Load("/data/loans.gof")
21 |  do ##class(User.LoanPerformance).%BuildIndices()
22 | 
23 |  // do $system.OBJ.Load("/data/Loader.xml","cf")
24 | 
25 | 
26 |  halt
27 | 


--------------------------------------------------------------------------------
/iris-aa-server/src/Util/Loader.cls:
--------------------------------------------------------------------------------
 1 | Class Util.Loader
 2 | {
 3 | 
 4 | ClassMethod Load(pFile As %String, pClass As %String, pSeparator As %String = ",") As %Status
 5 | {
 6 | 	set tSC = $$$OK
 7 | 	try {
 8 | 		do $classmethod(pClass,"%KillExtent")
 9 | 		set tClassDef = ##class(%Dictionary.ClassDefinition).%OpenId(pClass)
10 | 		for i=1:1:tClassDef.Properties.Count() {
11 | 			set tProp = tClassDef.Properties.GetAt(i)
12 | 			set tProps($$$UPPER(tProp.Name)) = tProp.Name
13 | 			set tPropTypes($$$UPPER(tProp.Name)) = tProp.Type
14 | 		}
15 | 		
16 | 		set tFile = ##class(%Stream.FileCharacter).%New()
17 | 		do tFile.LinkToFile(pFile)
18 | 		set tHeader = $zstrip(tFile.ReadLine(),"<>WC")
19 | 		for i = 1:1:$l(tHeader,pSeparator) {
20 | 			set h = $p(tHeader,pSeparator,i)
21 | 			quit:h=""
22 | 			set tColumns($i(tColumns))=tProps($$$UPPER($tr(h,"_"))),
23 | 				tTypes(tColumns) = tPropTypes($$$UPPER($tr(h,"_")))
24 | 		}
25 | 		
26 | 		while 'tFile.AtEnd {
27 | 			set tLine = $zstrip(tFile.ReadLine(),"<>WC")
28 | 			continue:tLine=""
29 | 			
30 | 			set tObj = $classmethod(pClass,"%New")
31 | 			for i=1:1:tColumns {
32 | 				set tValue = $piece(tLine,pSeparator,i)
33 | 				set:tTypes(i)="%Library.Date" tValue = $zdateh($tr(tValue,"/","-"),3)
34 | 				set $property(tObj, tColumns(i)) = tValue
35 | 			}
36 | 			set tSC = tObj.%Save()
37 | 			quit:$$$ISERR(tSC)
38 | 			set c = $i(c)
39 | 		}
40 | 		quit:$$$ISERR(tSC)
41 | 		
42 | 		write !,"Read ",c," records for table ",pClass,!
43 | 		
44 | 	} catch (ex) {
45 | 		set tSC = ex.AsStatus()
46 | 	}
47 | 	quit tSC
48 | }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/jupyter-samples/ED_visit_90_day.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Train an IntegratedML model on ED Readmit likelihood Dataset\n",
  8 |     "## Use JDBC to connect to InterSystems IRIS database\n",
  9 |     "**NOTE: This Notebook will not run as-is!**\n",
 10 |     "This Notebook demonstrates:\n",
 11 |     "- Using the JayDeBeApi Python library to connect to InterSystems IRIS\n",
 12 |     "- Creating views to segment data into training and test sets\n",
 13 |     "- Defining and training an IntegratedML model to predict ED Readmits in the next 90 days\n",
 14 |     "- Comparing the resulting model's predictions to data in the test set (that the model was not trained on)\n",
 15 |     "- Using the IntegratedML \"VALIDATE MODEL\" command to calculate accuracy metrics on the test set data"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "### 1. Get jdbc connection and cursor"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 | <<<<<<< HEAD
 28 |    "execution_count": null,
 29 | =======
 30 |    "execution_count": 1,
 31 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import pandas as pd"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 | <<<<<<< HEAD
 41 |    "execution_count": null,
 42 | =======
 43 |    "execution_count": 2,
 44 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "%run -i '../Initializations/Conns.py'"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "### 2. Create and specify the source data table(s)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 | <<<<<<< HEAD
 61 |    "execution_count": null,
 62 | =======
 63 |    "execution_count": 3,
 64 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "#Use this block to create a starting data set that you can/will build upon.\n",
 69 |     "#NOTE: It is always useful to have a unique identifier in the data\n",
 70 |     "TargetTable = 'Data.EDEncsPredB90View'\n",
 71 |     "TrainTable = 'Data.EDEncsTraining'\n",
 72 |     "TestTable = 'Data.EDEncsTesting'"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "### 3. Review the data to ensure the Target variable and Independent variables are in good standing."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 | <<<<<<< HEAD
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "scrolled": true
 88 |    },
 89 |    "outputs": [],
 90 | =======
 91 |    "execution_count": 4,
 92 |    "metadata": {
 93 |     "scrolled": true
 94 |    },
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "   ID  DRID  HPS   Age Gend Mar InsRel PlanCode  T30  T60  ...  S180  S365  \\\n",
101 |       "0   1     1  4.0  52.0    F   M      S      L05  0.0  0.0  ...  53.0  87.0   \n",
102 |       "1   2     2  2.0  48.0    F   M      1    13947  0.0  0.0  ...   0.0   1.0   \n",
103 |       "2   3     3  1.0  66.0    F   M      S      W01  NaN  NaN  ...   NaN   NaN   \n",
104 |       "3   4     4  NaN  62.0    M   D      S      S20  NaN  NaN  ...   NaN   NaN   \n",
105 |       "4   5     5  NaN  51.0    M   S      S      W01  0.0  1.0  ...   NaN   NaN   \n",
106 |       "\n",
107 |       "   Gen  dGen  Med  dMed  Appt  dAppt   Rx  dRx  \n",
108 |       "0  9.0   7.0  9.0   3.0   0.0    0.0  2.0  2.0  \n",
109 |       "1  NaN   NaN  NaN   NaN   NaN    NaN  NaN  NaN  \n",
110 |       "2  NaN   NaN  NaN   NaN   NaN    NaN  NaN  NaN  \n",
111 |       "3  NaN   NaN  NaN   NaN   NaN    NaN  NaN  NaN  \n",
112 |       "4  NaN   NaN  NaN   NaN   NaN    NaN  NaN  NaN  \n",
113 |       "\n",
114 |       "[5 rows x 75 columns]\n",
115 |       "Index(['ID', 'DRID', 'HPS', 'Age', 'Gend', 'Mar', 'InsRel', 'PlanCode', 'T30',\n",
116 |       "       'T60', 'T90', 'E30', 'E90', 'E180', 'E365', 'I180', 'I365', 'O30',\n",
117 |       "       'O90', 'O180', 'O365', 'ObsHt', 'dObsHt', 'ObsWt', 'dObsWt', 'ObsBMI',\n",
118 |       "       'dObsBMI', 'ObsBSA', 'dObsBSA', 'ObsTemp', 'dObsTemp', 'ObsPulse',\n",
119 |       "       'dObsPulse', 'ObsBPS', 'dObsBPS', 'ObsBPD', 'dObsBPD', 'ObsO2',\n",
120 |       "       'dObsO2', 'Labs', 'dLabs', 'Meds', 'dMeds', 'Rads', 'dRads', 'Vax',\n",
121 |       "       'dVax', 'HypChol_E78', 'dHypChol_E78', 'TII_E11', 'dTII_E11',\n",
122 |       "       'GenEnc_Z00', 'dGenEnc_Z00', 'Scrn_Z13', 'dScrn_Z13', 'Couns_Z71',\n",
123 |       "       'dCouns_Z71', 'OWt_E66', 'dOWt_E66', 'HypThy_E03', 'dHypThy_E03',\n",
124 |       "       'Scrn_Z12', 'dScrn_Z12', 'S30', 'S90', 'S180', 'S365', 'Gen', 'dGen',\n",
125 |       "       'Med', 'dMed', 'Appt', 'dAppt', 'Rx', 'dRx'],\n",
126 |       "      dtype='object')\n"
127 |      ]
128 |     }
129 |    ],
130 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
131 |    "source": [
132 |     "tKeep()\n",
133 |     "import pandas as pd\n",
134 |     "from IPython.display import display\n",
135 |     "\n",
136 | <<<<<<< HEAD
137 |     "df = pd.read_sql(\"select top 3 * from Data.PatientCostData\", iconn)\n",
138 | =======
139 |     "df = pd.read_sql(\"select top 5 * from Data.PatientCostData\", iconn)\n",
140 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
141 |     "print(df)\n",
142 |     "print(df.columns)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### Drop and unwanted fields"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 | <<<<<<< HEAD
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 | =======
159 |    "execution_count": 5,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "'ID,DRID,HPS,Age,Gend,Mar,InsRel,PlanCode,E30,E90,E180,E365,I180,I365,O30,O90,O180,O365,ObsHt,dObsHt,ObsWt,dObsWt,ObsBMI,dObsBMI,ObsBSA,dObsBSA,ObsTemp,dObsTemp,ObsPulse,dObsPulse,ObsBPS,dObsBPS,ObsBPD,dObsBPD,ObsO2,dObsO2,Labs,dLabs,Meds,dMeds,Rads,dRads,Vax,dVax,HypChol_E78,dHypChol_E78,TII_E11,dTII_E11,GenEnc_Z00,dGenEnc_Z00,Scrn_Z13,dScrn_Z13,Couns_Z71,dCouns_Z71,OWt_E66,dOWt_E66,HypThy_E03,dHypThy_E03,Scrn_Z12,dScrn_Z12,S30,S90,S180,S365,Gen,dGen,Med,dMed,Appt,dAppt,Rx,dRx'"
166 |       ]
167 |      },
168 |      "execution_count": 5,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
174 |    "source": [
175 |     "df = df.drop(['T30', 'T60', 'T90'], axis = 1)\n",
176 |     "Usable = str(list(df.columns)).replace(\"', '\", \",\")[2:-2]\n",
177 |     "Usable"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 | <<<<<<< HEAD
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 | =======
187 |    "execution_count": 6,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/html": [
193 |        "<div>\n",
194 |        "<style scoped>\n",
195 |        "    .dataframe tbody tr th:only-of-type {\n",
196 |        "        vertical-align: middle;\n",
197 |        "    }\n",
198 |        "\n",
199 |        "    .dataframe tbody tr th {\n",
200 |        "        vertical-align: top;\n",
201 |        "    }\n",
202 |        "\n",
203 |        "    .dataframe thead th {\n",
204 |        "        text-align: right;\n",
205 |        "    }\n",
206 |        "</style>\n",
207 |        "<table border=\"1\" class=\"dataframe\">\n",
208 |        "  <thead>\n",
209 |        "    <tr style=\"text-align: right;\">\n",
210 |        "      <th></th>\n",
211 |        "      <th>Recs</th>\n",
212 |        "    </tr>\n",
213 |        "  </thead>\n",
214 |        "  <tbody>\n",
215 |        "    <tr>\n",
216 |        "      <th>0</th>\n",
217 |        "      <td>126633</td>\n",
218 |        "    </tr>\n",
219 |        "  </tbody>\n",
220 |        "</table>\n",
221 |        "</div>"
222 |       ],
223 |       "text/plain": [
224 |        "     Recs\n",
225 |        "0  126633"
226 |       ]
227 |      },
228 |      "metadata": {},
229 |      "output_type": "display_data"
230 |     }
231 |    ],
232 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
233 |    "source": [
234 |     "icurs.execute(' \\\n",
235 |     "    create or replace view %s as \\\n",
236 |     "        select case when t90 > 0 then 1 else 0 end as B90, %s \\\n",
237 |     "        from Data.PatientCostData' % (TargetTable, Usable))\n",
238 |     "df1 = pd.read_sql('SELECT COUNT(*) as Recs FROM %s' % TargetTable, iconn)\n",
239 |     "TargetVar = 'B90'\n",
240 |     "display(df1)"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 | <<<<<<< HEAD
246 |    "execution_count": null,
247 |    "metadata": {},
248 |    "outputs": [],
249 | =======
250 |    "execution_count": 7,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/html": [
256 |        "<div>\n",
257 |        "<style scoped>\n",
258 |        "    .dataframe tbody tr th:only-of-type {\n",
259 |        "        vertical-align: middle;\n",
260 |        "    }\n",
261 |        "\n",
262 |        "    .dataframe tbody tr th {\n",
263 |        "        vertical-align: top;\n",
264 |        "    }\n",
265 |        "\n",
266 |        "    .dataframe thead th {\n",
267 |        "        text-align: right;\n",
268 |        "    }\n",
269 |        "</style>\n",
270 |        "<table border=\"1\" class=\"dataframe\">\n",
271 |        "  <thead>\n",
272 |        "    <tr style=\"text-align: right;\">\n",
273 |        "      <th></th>\n",
274 |        "      <th>B90</th>\n",
275 |        "      <th>Recs</th>\n",
276 |        "    </tr>\n",
277 |        "  </thead>\n",
278 |        "  <tbody>\n",
279 |        "    <tr>\n",
280 |        "      <th>0</th>\n",
281 |        "      <td>0</td>\n",
282 |        "      <td>120181</td>\n",
283 |        "    </tr>\n",
284 |        "    <tr>\n",
285 |        "      <th>1</th>\n",
286 |        "      <td>1</td>\n",
287 |        "      <td>6452</td>\n",
288 |        "    </tr>\n",
289 |        "  </tbody>\n",
290 |        "</table>\n",
291 |        "</div>"
292 |       ],
293 |       "text/plain": [
294 |        "   B90    Recs\n",
295 |        "0    0  120181\n",
296 |        "1    1    6452"
297 |       ]
298 |      },
299 |      "execution_count": 7,
300 |      "metadata": {},
301 |      "output_type": "execute_result"
302 |     }
303 |    ],
304 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
305 |    "source": [
306 |     "Distro = pd.read_sql('select %s, count(*) as Recs from %s group by %s' % (TargetVar, TargetTable, TargetVar), iconn)\n",
307 |     "Distro"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "metadata": {},
313 |    "source": [
314 |     "### 4. Assess the probability of your target and sample accordingly into split training and testing datasets"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 | <<<<<<< HEAD
320 |    "execution_count": null,
321 | =======
322 |    "execution_count": 8,
323 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": [
327 |     "#we want to split the data into Training (80%) and Test (20%), ...\n",
328 |     "# but also reduce the ratio of Negative (ED Enc = 0) to Positive\n",
329 |     "Train = 0.8\n",
330 |     "TVRatio = 2\n",
331 |     "PT_List = pd.read_sql('select DRID, %s from %s order by %s, DRID' % (TargetVar, TargetTable, TargetVar), iconn)\n",
332 |     "PT_List.index = PT_List['DRID']"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 | <<<<<<< HEAD
338 |    "execution_count": null,
339 |    "metadata": {
340 |     "scrolled": true
341 |    },
342 |    "outputs": [],
343 | =======
344 |    "execution_count": 9,
345 |    "metadata": {
346 |     "scrolled": true
347 |    },
348 |    "outputs": [
349 |     {
350 |      "name": "stdout",
351 |      "output_type": "stream",
352 |      "text": [
353 |       "      DRID\n",
354 |       "B90       \n",
355 |       "0    10323\n",
356 |       "1     5161\n",
357 |       "      DRID\n",
358 |       "B90       \n",
359 |       "0    27787\n",
360 |       "1     1291\n"
361 |      ]
362 |     }
363 |    ],
364 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
365 |    "source": [
366 |     "#Create the lists, check the ratios, and create the \"In lists\":\n",
367 |     "TrainList = PT_List[PT_List[TargetVar] == 0].sample(int(Distro['Recs'].loc[1]*TVRatio*Train)) \\\n",
368 |     "    .append(PT_List[PT_List[TargetVar] == 1].sample(int(Distro['Recs'].loc[1]*Train)))\n",
369 |     "TrainList['Flag'] = 1\n",
370 |     "TrainList.index = TrainList['DRID']\n",
371 |     "print(TrainList.pivot_table(index = TargetVar, values = 'DRID', aggfunc = 'count'))\n",
372 |     "#NOTE: It is IMPERATIVE that Test does NOT contain any Train data\n",
373 |     "TestList = PT_List.join(TrainList['Flag'], how = 'left')\n",
374 |     "TestList = TestList[(TestList['Flag'] != 1)]\n",
375 |     "TestList = TestList[(TestList[TargetVar] == 1)].append(TestList[TestList[TargetVar] == 0].sample(int(len(TestList)*0.25)))\n",
376 |     "print(TestList.pivot_table(index = TargetVar, values = 'DRID', aggfunc = 'count'))\n",
377 |     "TrainIns = str(list(TrainList['DRID']))[1:-1]\n",
378 |     "TestIns = str(list(TestList['DRID']))[1:-1]"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 | <<<<<<< HEAD
384 |    "execution_count": null,
385 | =======
386 |    "execution_count": 10,
387 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
388 |    "metadata": {},
389 |    "outputs": [],
390 |    "source": [
391 |     "# Training set view\n",
392 |     "icurs.execute(''' \\\n",
393 |     "    CREATE or replace VIEW %s AS \\\n",
394 |     "        SELECT * FROM %s \n",
395 |     "        WHERE DRID in (%s)''' \\\n",
396 |     "    % (TrainTable, TargetTable, TrainIns))\n",
397 |     "# Prediction set\n",
398 |     "icurs.execute(''' \\\n",
399 |     "    CREATE or replace VIEW %s AS \\\n",
400 |     "        SELECT * FROM %s \n",
401 |     "        WHERE DRID in (%s)''' \\\n",
402 |     "    % (TestTable, TargetTable, TestIns))"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "### 6. Create and Train an IntegratedML Model using default settings\n",
410 |     "IntegratedML only needs a model name, the name of the column that is the target column to predict, and a table (or SELECT query to specify input columns."
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 | <<<<<<< HEAD
416 |    "execution_count": null,
417 | =======
418 |    "execution_count": 11,
419 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
420 |    "metadata": {},
421 |    "outputs": [],
422 |    "source": [
423 |     "try:\n",
424 |     "    icurs.execute(\"CREATE MODEL NewEncModel PREDICTING (%s) FROM %s\" % (TargetVar, TrainTable))\n",
425 |     "except:\n",
426 |     "    icurs.execute(\"DROP MODEL NewEncModel\")\n",
427 |     "    icurs.execute(\"CREATE MODEL NewEncModel PREDICTING (%s) FROM %s\" % (TargetVar, TrainTable))"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {},
433 |    "source": [
434 |     "Now that the model is defined, you can TRAIN it, which invokes the AutoML machine learning procedure."
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 | <<<<<<< HEAD
440 |    "execution_count": null,
441 |    "metadata": {},
442 |    "outputs": [],
443 | =======
444 |    "execution_count": 15,
445 |    "metadata": {},
446 |    "outputs": [
447 |     {
448 |      "ename": "DatabaseError",
449 |      "evalue": "java.sql.SQLException: [SQLCODE: <-400>:<Fatal error occurred>]\r\n[Location: <ServerLoop>]\r\n[%msg: <ERROR #2800: %ML General Error 'Training failed:': \u0007\u0001error)\u0001{\"message\": \"Deployment limit reached\"}.>]",
450 |      "output_type": "error",
451 |      "traceback": [
452 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
453 |       "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
454 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.execute\u001b[0;34m()\u001b[0m\n",
455 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.Update\u001b[0;34m()\u001b[0m\n",
456 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
457 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
458 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
459 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.processError\u001b[0;34m()\u001b[0m\n",
460 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getServerError\u001b[0;34m()\u001b[0m\n",
461 |       "\u001b[0;31mException\u001b[0m: Java Exception",
462 |       "\nThe above exception was the direct cause of the following exception:\n",
463 |       "\u001b[0;31mjava.sql.SQLException\u001b[0m                     Traceback (most recent call last)",
464 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m    533\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m             \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    535\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
465 |       "\u001b[0;31mjava.sql.SQLException\u001b[0m: java.sql.SQLException: [SQLCODE: <-400>:<Fatal error occurred>]\r\n[Location: <ServerLoop>]\r\n[%msg: <ERROR #2800: %ML General Error 'Training failed:': \u0007\u0001error)\u0001{\"message\": \"Deployment limit reached\"}.>]",
466 |       "\nDuring handling of the above exception, another exception occurred:\n",
467 |       "\u001b[0;31mDatabaseError\u001b[0m                             Traceback (most recent call last)",
468 |       "\u001b[0;32m~/Initializations/Conns.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m#icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_Auto\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0micurs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"set ml configuration DRCfg\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0micurs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"TRAIN MODEL NewEncModel as NewEncModel_DR\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
469 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m    534\u001b[0m             \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    535\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 536\u001b[0;31m             \u001b[0m_handle_sql_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    537\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_rs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    538\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetResultSet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
470 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36m_handle_sql_exception_jpype\u001b[0;34m()\u001b[0m\n\u001b[1;32m    163\u001b[0m         \u001b[0mexc_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mInterfaceError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m     \u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    166\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    167\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_jdbc_connect_jpype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjclassname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdriver_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjars\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
471 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m     55\u001b[0m             \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     56\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     58\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
472 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m    532\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_stmt_parms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    533\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m             \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    535\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    536\u001b[0m             \u001b[0m_handle_sql_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
473 |       "\u001b[0;31mDatabaseError\u001b[0m: java.sql.SQLException: [SQLCODE: <-400>:<Fatal error occurred>]\r\n[Location: <ServerLoop>]\r\n[%msg: <ERROR #2800: %ML General Error 'Training failed:': \u0007\u0001error)\u0001{\"message\": \"Deployment limit reached\"}.>]"
474 |      ]
475 |     }
476 |    ],
477 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
478 |    "source": [
479 |     "icurs.execute(\"set ml configuration %AutoML\")\n",
480 |     "icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_Auto\")\n",
481 |     "icurs.execute(\"set ml configuration DRCfg\")\n",
482 |     "icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_DR\")"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "markdown",
487 |    "metadata": {},
488 |    "source": [
489 |     "Once that finishes, you can see some information about the model in the \"ML_TRAINED_MODELS\" table."
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 | <<<<<<< HEAD
495 |    "execution_count": null,
496 |    "metadata": {},
497 |    "outputs": [],
498 | =======
499 |    "execution_count": 16,
500 |    "metadata": {},
501 |    "outputs": [
502 |     {
503 |      "data": {
504 |       "text/html": [
505 |        "<div>\n",
506 |        "<style scoped>\n",
507 |        "    .dataframe tbody tr th:only-of-type {\n",
508 |        "        vertical-align: middle;\n",
509 |        "    }\n",
510 |        "\n",
511 |        "    .dataframe tbody tr th {\n",
512 |        "        vertical-align: top;\n",
513 |        "    }\n",
514 |        "\n",
515 |        "    .dataframe thead th {\n",
516 |        "        text-align: right;\n",
517 |        "    }\n",
518 |        "</style>\n",
519 |        "<table border=\"1\" class=\"dataframe\">\n",
520 |        "  <thead>\n",
521 |        "    <tr style=\"text-align: right;\">\n",
522 |        "      <th></th>\n",
523 |        "      <th>MODEL_NAME</th>\n",
524 |        "      <th>TRAINED_MODEL_NAME</th>\n",
525 |        "      <th>PROVIDER</th>\n",
526 |        "      <th>TRAINED_TIMESTAMP</th>\n",
527 |        "      <th>MODEL_TYPE</th>\n",
528 |        "      <th>MODEL_INFO</th>\n",
529 |        "    </tr>\n",
530 |        "  </thead>\n",
531 |        "  <tbody>\n",
532 |        "    <tr>\n",
533 |        "      <th>0</th>\n",
534 |        "      <td>NoShowModel</td>\n",
535 |        "      <td>NoShowModel_t2</td>\n",
536 |        "      <td>AutoML</td>\n",
537 |        "      <td>2020-10-26 15:57:18.731000</td>\n",
538 |        "      <td>classification</td>\n",
539 |        "      <td>ModelType:Random Forest, Package:sklearn, Prob...</td>\n",
540 |        "    </tr>\n",
541 |        "    <tr>\n",
542 |        "      <th>1</th>\n",
543 |        "      <td>NewEncModelTD</td>\n",
544 |        "      <td>NewEncModelAutoML</td>\n",
545 |        "      <td>AutoML</td>\n",
546 |        "      <td>2020-10-27 01:46:39.459000</td>\n",
547 |        "      <td>classification</td>\n",
548 |        "      <td>ModelType:Logistic Regression, Package:sklearn...</td>\n",
549 |        "    </tr>\n",
550 |        "  </tbody>\n",
551 |        "</table>\n",
552 |        "</div>"
553 |       ],
554 |       "text/plain": [
555 |        "      MODEL_NAME TRAINED_MODEL_NAME PROVIDER           TRAINED_TIMESTAMP  \\\n",
556 |        "0    NoShowModel     NoShowModel_t2   AutoML  2020-10-26 15:57:18.731000   \n",
557 |        "1  NewEncModelTD  NewEncModelAutoML   AutoML  2020-10-27 01:46:39.459000   \n",
558 |        "\n",
559 |        "       MODEL_TYPE                                         MODEL_INFO  \n",
560 |        "0  classification  ModelType:Random Forest, Package:sklearn, Prob...  \n",
561 |        "1  classification  ModelType:Logistic Regression, Package:sklearn...  "
562 |       ]
563 |      },
564 |      "execution_count": 16,
565 |      "metadata": {},
566 |      "output_type": "execute_result"
567 |     }
568 |    ],
569 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
570 |    "source": [
571 |     "pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_TRAINED_MODELS\", iconn)"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "markdown",
576 |    "metadata": {},
577 |    "source": [
578 |     "### 7. Compare model output to data it has not seen yet\n",
579 |     "Now you can use SQL to SELECT data from another table, run the IntegratedML model on this new data, and see how well the predictions match the data!"
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "code",
584 | <<<<<<< HEAD
585 |    "execution_count": null,
586 |    "metadata": {},
587 |    "outputs": [],
588 |    "source": [
589 |     "TestSet = pd.read_sql('''\n",
590 |     "    SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs,\n",
591 |     "        case when B90 = 1 then 1 end AS ActualPos,\n",
592 |     "        case when B90 != 1 then 0 end AS ActualNeg\n",
593 |     "    FROM %s''' % (TestTable), iconn)"
594 | =======
595 |    "execution_count": 14,
596 |    "metadata": {},
597 |    "outputs": [
598 |     {
599 |      "ename": "DatabaseError",
600 |      "evalue": "Execution failed on sql 'SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs,                   case when B90 = 1 then 1 end AS ActualPos,                   case when B90 != 1 then 0 end AS ActualNeg                   FROM Data.EDEncsTesting': java.sql.SQLException: [SQLCODE: <-181>:<Model or Trained Model not found>]\r\n[Location: <Prepare>]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]",
601 |      "output_type": "error",
602 |      "traceback": [
603 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
604 |       "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
605 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.prepareStatement\u001b[0;34m()\u001b[0m\n",
606 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.prepareStatement\u001b[0;34m()\u001b[0m\n",
607 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getOrCreatePossiblyShardedIRISPreparedStatement\u001b[0;34m()\u001b[0m\n",
608 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.<init>\u001b[0;34m()\u001b[0m\n",
609 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.prepare\u001b[0;34m()\u001b[0m\n",
610 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.prepareInternal\u001b[0;34m()\u001b[0m\n",
611 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
612 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
613 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
614 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.processError\u001b[0;34m()\u001b[0m\n",
615 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getServerError\u001b[0;34m()\u001b[0m\n",
616 |       "\u001b[0;31mException\u001b[0m: Java Exception",
617 |       "\nThe above exception was the direct cause of the following exception:\n",
618 |       "\u001b[0;31mjava.sql.SQLException\u001b[0m                     Traceback (most recent call last)",
619 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1680\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1681\u001b[0;31m             \u001b[0mcur\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1682\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mcur\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
620 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m    530\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_close_last\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprepareStatement\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moperation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    532\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_stmt_parms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
621 |       "\u001b[0;31mjava.sql.SQLException\u001b[0m: java.sql.SQLException: [SQLCODE: <-181>:<Model or Trained Model not found>]\r\n[Location: <Prepare>]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]",
622 |       "\nThe above exception was the direct cause of the following exception:\n",
623 |       "\u001b[0;31mDatabaseError\u001b[0m                             Traceback (most recent call last)",
624 |       "\u001b[0;32m~/Initializations/Conns.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m                   \u001b[0mcase\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mB90\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mthen\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mend\u001b[0m \u001b[0mAS\u001b[0m \u001b[0mActualPos\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m                   \u001b[0mcase\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mB90\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mthen\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0mend\u001b[0m \u001b[0mAS\u001b[0m \u001b[0mActualNeg\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m                   FROM %s\" % (TestTable), iconn)\n\u001b[0m",
625 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_sql\u001b[0;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[1;32m    487\u001b[0m             \u001b[0mcoerce_float\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoerce_float\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    488\u001b[0m             \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparse_dates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m             \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunksize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    490\u001b[0m         )\n\u001b[1;32m    491\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
626 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_query\u001b[0;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize)\u001b[0m\n\u001b[1;32m   1725\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1726\u001b[0m         \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_convert_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1727\u001b[0;31m         \u001b[0mcursor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1728\u001b[0m         \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mcol_desc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol_desc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescription\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
627 |       "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1691\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1692\u001b[0m             \u001b[0mex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDatabaseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Execution failed on sql '{args[0]}': {exc}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1693\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mex\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1694\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1695\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
628 |       "\u001b[0;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs,                   case when B90 = 1 then 1 end AS ActualPos,                   case when B90 != 1 then 0 end AS ActualNeg                   FROM Data.EDEncsTesting': java.sql.SQLException: [SQLCODE: <-181>:<Model or Trained Model not found>]\r\n[Location: <Prepare>]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]"
629 |      ]
630 |     }
631 |    ],
632 |    "source": [
633 |     "TestSet = pd.read_sql(\"SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, \\\n",
634 |     "                  case when B90 = 1 then 1 end AS ActualPos, \\\n",
635 |     "                  case when B90 != 1 then 0 end AS ActualNeg \\\n",
636 |     "                  FROM %s\" % (TestTable), iconn)"
637 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
638 |    ]
639 |   },
640 |   {
641 |    "cell_type": "code",
642 |    "execution_count": null,
643 |    "metadata": {},
644 |    "outputs": [],
645 |    "source": [
646 |     "print(pd.pivot_table(TestSet, index = 'PredictedEncs', values = ['ActualPos', 'ActualNeg'], aggfunc = 'count'))\n",
647 |     "print('Accuracy: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == TestSet['ActualPos']) \\\n",
648 |     "            | (TestSet['PredictedEncs'] == TestSet['ActualNeg'])])/len(TestSet))*100))+'%')\n",
649 |     "print('Misclassification Rate: '+str(round((len(TestSet[(TestSet['PredictedEncs'] != TestSet['ActualPos']) \\\n",
650 |     "            & (TestSet['PredictedEncs'] != TestSet['ActualNeg'])])/len(TestSet))*100))+'%')\n",
651 |     "print('%FP: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == 1) & (TestSet['ActualNeg'] == 0)])/ \\\n",
652 |     "            len(TestSet[TestSet['ActualNeg'] == 0]))*100))+'%')\n",
653 |     "print('%FN: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == 0) & (TestSet['ActualPos'] == 1)])/ \\\n",
654 |     "            len(TestSet[TestSet['ActualPos'] == 1]))*100))+'%')"
655 |    ]
656 |   },
657 |   {
658 |    "cell_type": "markdown",
659 |    "metadata": {},
660 |    "source": [
661 |     "### 8. VALIDATE MODEL command calculates accuracy metrics\n",
662 |     "You can certainly take that output above and calculate the accuracy using a standard formula, but IntegratedML has a built-in function to do that!\n",
663 |     "\n",
664 |     "Each time you run the command \"VALIDATE MODEL...\" it generates a set of metrics calculated on the data passed into the query. Since this table can be a bit difficult to read in its raw form we use a simple \"pivot\" call to arrange the data."
665 |    ]
666 |   },
667 |   {
668 |    "cell_type": "code",
669 |    "execution_count": null,
670 |    "metadata": {},
671 |    "outputs": [],
672 |    "source": [
673 | <<<<<<< HEAD
674 |     "icurs.execute(\"VALIDATE model NewEncModel use NewEncModel_Auto FROM Data.EDEncsTesting\")\n",
675 | =======
676 |     "#icurs.execute(\"VALIDATE model NewEncModel FROM Data.EDEncsTesting\")\n",
677 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
678 |     "#df5 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_VALIDATION_METRICS\", iconn)\n",
679 |     "#df5\n",
680 |     "#df6 = df5.pivot(index='VALIDATION_RUN_NAME', columns='METRIC_NAME', values='METRIC_VALUE')\n",
681 |     "#display(df6)"
682 |    ]
683 |   }
684 |  ],
685 |  "metadata": {
686 |   "kernelspec": {
687 |    "display_name": "Python 3",
688 |    "language": "python",
689 |    "name": "python3"
690 |   },
691 |   "language_info": {
692 |    "codemirror_mode": {
693 |     "name": "ipython",
694 |     "version": 3
695 |    },
696 |    "file_extension": ".py",
697 |    "mimetype": "text/x-python",
698 |    "name": "python",
699 |    "nbconvert_exporter": "python",
700 |    "pygments_lexer": "ipython3",
701 |    "version": "3.6.8"
702 |   }
703 |  },
704 |  "nbformat": 4,
705 |  "nbformat_minor": 4
706 | }
707 | 


--------------------------------------------------------------------------------
/jupyter-samples/biomedical-integratedml-PyODBC.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# IntegratedML applied to biomedical data, using PyODBC\n",
  8 |     "This notebook demonstrates the following:\n",
  9 |     "- Connecting to InterSystems IRIS via PyODBC connection\n",
 10 |     "- Creating, Training and Executing (PREDICT() function) an IntegratedML machine learning model, applied to breast cancer tumor diagnoses\n",
 11 |     "- INSERTING machine learning predictions into a new SQL table\n",
 12 |     "- Executing a relatively complex SQL query containing IntegratedML PREDICT() and PROBABILITY() functions, and flexibly using the results to filter and sort the output"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "### ODBC and pyODBC Resources\n",
 20 |     "Often, connecting to a database is more than half the battle when developing SQL-heavy applications, especially if you are not familiar with the tools, or more importantly the particular database system. If this is the case, and you are just getting started using PyODBC and InterSystems IRIS, this notebook and these resources below may help you get up to speed!\n",
 21 |     "\n",
 22 |     "https://gettingstarted.intersystems.com/development-setup/odbc-connections/\n",
 23 |     "\n",
 24 |     "https://irisdocs.intersystems.com/irislatest/csp/docbook/DocBook.UI.Page.cls?KEY=BNETODBC_support#BNETODBC_support_pyodbc\n",
 25 |     "\n",
 26 |     "https://stackoverflow.com/questions/46405777/connect-docker-python-to-sql-server-with-pyodbc\n",
 27 |     "\n",
 28 |     "https://stackoverflow.com/questions/44527452/cant-open-lib-odbc-driver-13-for-sql-server-sym-linking-issue"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "data": {
 38 |       "text/html": [
 39 |        "<style>.container { width:100% !important; }</style>"
 40 |       ],
 41 |       "text/plain": [
 42 |        "<IPython.core.display.HTML object>"
 43 |       ]
 44 |      },
 45 |      "metadata": {},
 46 |      "output_type": "display_data"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# make the notebook full screen\n",
 51 |     "from IPython.core.display import display, HTML\n",
 52 |     "display(HTML(\"<style>.container { width:100% !important; }</style>\"))"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### 1. Install system packages for ODBC"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 2,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "Hit:1 http://archive.ubuntu.com/ubuntu bionic InRelease\n",
 72 |       "Get:2 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n",
 73 |       "Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]    \n",
 74 |       "Get:4 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]    \n",
 75 |       "Fetched 252 kB in 1s (322 kB/s)                                                \n",
 76 |       "Reading package lists... Done\n",
 77 |       "Reading package lists... Done\n",
 78 |       "Building dependency tree       \n",
 79 |       "Reading state information... Done\n",
 80 |       "gcc is already the newest version (4:7.4.0-1ubuntu2.3).\n",
 81 |       "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n",
 82 |       "Reading package lists... Done\n",
 83 |       "Building dependency tree       \n",
 84 |       "Reading state information... Done\n",
 85 |       "unixodbc-dev is already the newest version (2.3.4-1.1ubuntu3).\n",
 86 |       "tdsodbc is already the newest version (1.00.82-2ubuntu0.1).\n",
 87 |       "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n",
 88 |       "Reading package lists... Done\n",
 89 |       "Building dependency tree       \n",
 90 |       "Reading state information... Done\n",
 91 |       "unixodbc-bin is already the newest version (2.3.0-4build1).\n",
 92 |       "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "!apt-get update\n",
 98 |     "!apt-get install gcc\n",
 99 |     "!apt-get install -y tdsodbc unixodbc-dev\n",
100 |     "!apt install unixodbc-bin -y\n",
101 |     "!apt-get clean "
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "#### Use this command to troubleshoot a failed pyodbc installation:\n",
109 |     "!pip install --upgrade --global-option=build_ext --global-option=\"-I/usr/local/include\"  --global-option=\"-L/usr/local/lib\" pyodbc"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 3,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "Requirement already satisfied: pyodbc in /usr/local/lib/python3.6/dist-packages (4.0.32)\n",
122 |       "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n"
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "!pip install pyodbc"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 4,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "!rm /etc/odbcinst.ini\n",
137 |     "!rm /etc/odbc.ini"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 5,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "!ln -s /tf/odbcinst.ini /etc/odbcinst.ini\n",
147 |     "!ln -s /tf/odbc.ini /etc/odbc.ini"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 6,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "[InterSystems ODBC35]\r\n",
160 |       "UsageCount=1\r\n",
161 |       "Driver=/tf/libirisodbcu35.so\r\n",
162 |       "Setup=/tf/libirisodbcu35.so\r\n",
163 |       "SQLLevel=1\r\n",
164 |       "FileUsage=0\r\n",
165 |       "DriverODBCVer=02.10\r\n",
166 |       "ConnectFunctions=YYN\r\n",
167 |       "APILevel=1\r\n",
168 |       "DEBUG=1\r\n",
169 |       "CPTimeout=<not pooled>\r\n",
170 |       "\r\n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "!cat /tf/odbcinst.ini"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 7,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "name": "stdout",
185 |      "output_type": "stream",
186 |      "text": [
187 |       "[user]\r\n",
188 |       "Driver=InterSystems ODBC35\r\n",
189 |       "Protocol=TCP\r\n",
190 |       "Host=irisimlsvr\r\n",
191 |       "Port=51773\r\n",
192 |       "Namespace=USER\r\n",
193 |       "UID=SUPERUSER\r\n",
194 |       "Password=SYS\r\n",
195 |       "Description=Sample namespace\r\n",
196 |       "Query Timeout=0\r\n",
197 |       "Static Cursors=0\r\n",
198 |       "\r\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "!cat /tf/odbc.ini"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 8,
209 |    "metadata": {},
210 |    "outputs": [
211 |     {
212 |      "name": "stdout",
213 |      "output_type": "stream",
214 |      "text": [
215 |       "unixODBC 2.3.4\r\n",
216 |       "DRIVERS............: /etc/odbcinst.ini\r\n",
217 |       "SYSTEM DATA SOURCES: /etc/odbc.ini\r\n",
218 |       "FILE DATA SOURCES..: /etc/ODBCDataSources\r\n",
219 |       "USER DATA SOURCES..: /root/.odbc.ini\r\n",
220 |       "SQLULEN Size.......: 8\r\n",
221 |       "SQLLEN Size........: 8\r\n",
222 |       "SQLSETPOSIROW Size.: 8\r\n"
223 |      ]
224 |     }
225 |    ],
226 |    "source": [
227 |     "!odbcinst -j"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "### 2. Verify you see \"InterSystems ODBC35\" in the drivers list"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 9,
240 |    "metadata": {},
241 |    "outputs": [
242 |     {
243 |      "name": "stdout",
244 |      "output_type": "stream",
245 |      "text": [
246 |       "['InterSystems ODBC35']\n"
247 |      ]
248 |     }
249 |    ],
250 |    "source": [
251 |     "import pyodbc\n",
252 |     "print(pyodbc.drivers())"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "### 3. Get an ODBC connection "
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 10,
265 |    "metadata": {},
266 |    "outputs": [],
267 |    "source": [
268 |     "import pyodbc \n",
269 |     "import time\n",
270 |     "\n",
271 |     "\n",
272 |     "#input(\"Hit any key to start\")\n",
273 |     "dsn = 'IRIS IntegratedML demo via PyODBC'\n",
274 |     "server = 'irisimlsvr' #'192.168.99.101' \n",
275 |     "port = '1972' #'9091'\n",
276 |     "database = 'USER' \n",
277 |     "username = 'SUPERUSER' \n",
278 |     "password = 'SYS' \n",
279 |     "cnxn = pyodbc.connect('DRIVER={InterSystems ODBC35};SERVER='+server+';PORT='+port+';DATABASE='+database+';UID='+username+';PWD='+ password)\n",
280 |     "\n",
281 |     "### Ensure it read strings correctly.\n",
282 |     "cnxn.setdecoding(pyodbc.SQL_CHAR, encoding='utf8')\n",
283 |     "cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf8')\n",
284 |     "cnxn.setencoding(encoding='utf8')"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "### 4. Get a cursor; start the timer"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 11,
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "cursor = cnxn.cursor()\n",
301 |     "start= time.clock()"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "### 5. Specify the training data, and give a model name"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 12,
314 |    "metadata": {},
315 |    "outputs": [],
316 |    "source": [
317 |     "dataTable = 'Biomedical.BreastCancer'\n",
318 |     "dataTablePredict = 'Result02'\n",
319 |     "dataColumn =  'Diagnosis'\n",
320 |     "dataColumnPredict = \"PredictedDiagnosis\"\n",
321 |     "modelName = \"bc\" #chose a name - must be unique in server end"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     " ### Cleaning before retrying"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": 13,
334 |    "metadata": {},
335 |    "outputs": [],
336 |    "source": [
337 |     "#If we re-run the notebook just drop model and table\n",
338 |     "#cursor.execute(\"DROP MODEL %s\" % modelName)\n",
339 |     "#cursor.execute(\"DROP TABLE %s\" % dataTablePredict)"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "### 6. Train and predict"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 14,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "cursor.execute(\"CREATE MODEL %s PREDICTING (%s)  FROM %s\" % (modelName, dataColumn, dataTable))\n",
356 |     "cursor.execute(\"TRAIN MODEL %s FROM %s\" % (modelName, dataTable))\n",
357 |     "cursor.execute(\"Create Table %s (%s VARCHAR(100), %s VARCHAR(100))\" % (dataTablePredict, dataColumnPredict, dataColumn))\n",
358 |     "cursor.execute(\"INSERT INTO %s  SELECT TOP 20 PREDICT(%s) AS %s, %s FROM %s\" % (dataTablePredict, modelName, dataColumnPredict, dataColumn, dataTable)) \n",
359 |     "cnxn.commit()"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "markdown",
364 |    "metadata": {},
365 |    "source": [
366 |     "### 7. Show the predict result"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 15,
372 |    "metadata": {},
373 |    "outputs": [
374 |     {
375 |      "data": {
376 |       "text/html": [
377 |        "<div>\n",
378 |        "<style scoped>\n",
379 |        "    .dataframe tbody tr th:only-of-type {\n",
380 |        "        vertical-align: middle;\n",
381 |        "    }\n",
382 |        "\n",
383 |        "    .dataframe tbody tr th {\n",
384 |        "        vertical-align: top;\n",
385 |        "    }\n",
386 |        "\n",
387 |        "    .dataframe thead th {\n",
388 |        "        text-align: right;\n",
389 |        "    }\n",
390 |        "</style>\n",
391 |        "<table border=\"1\" class=\"dataframe\">\n",
392 |        "  <thead>\n",
393 |        "    <tr style=\"text-align: right;\">\n",
394 |        "      <th></th>\n",
395 |        "      <th>PredictedDiagnosis</th>\n",
396 |        "      <th>Diagnosis</th>\n",
397 |        "    </tr>\n",
398 |        "  </thead>\n",
399 |        "  <tbody>\n",
400 |        "    <tr>\n",
401 |        "      <th>0</th>\n",
402 |        "      <td>M</td>\n",
403 |        "      <td>M</td>\n",
404 |        "    </tr>\n",
405 |        "    <tr>\n",
406 |        "      <th>1</th>\n",
407 |        "      <td>M</td>\n",
408 |        "      <td>M</td>\n",
409 |        "    </tr>\n",
410 |        "    <tr>\n",
411 |        "      <th>2</th>\n",
412 |        "      <td>M</td>\n",
413 |        "      <td>M</td>\n",
414 |        "    </tr>\n",
415 |        "    <tr>\n",
416 |        "      <th>3</th>\n",
417 |        "      <td>M</td>\n",
418 |        "      <td>M</td>\n",
419 |        "    </tr>\n",
420 |        "    <tr>\n",
421 |        "      <th>4</th>\n",
422 |        "      <td>M</td>\n",
423 |        "      <td>M</td>\n",
424 |        "    </tr>\n",
425 |        "    <tr>\n",
426 |        "      <th>5</th>\n",
427 |        "      <td>M</td>\n",
428 |        "      <td>M</td>\n",
429 |        "    </tr>\n",
430 |        "    <tr>\n",
431 |        "      <th>6</th>\n",
432 |        "      <td>M</td>\n",
433 |        "      <td>M</td>\n",
434 |        "    </tr>\n",
435 |        "    <tr>\n",
436 |        "      <th>7</th>\n",
437 |        "      <td>M</td>\n",
438 |        "      <td>M</td>\n",
439 |        "    </tr>\n",
440 |        "    <tr>\n",
441 |        "      <th>8</th>\n",
442 |        "      <td>M</td>\n",
443 |        "      <td>M</td>\n",
444 |        "    </tr>\n",
445 |        "    <tr>\n",
446 |        "      <th>9</th>\n",
447 |        "      <td>M</td>\n",
448 |        "      <td>M</td>\n",
449 |        "    </tr>\n",
450 |        "    <tr>\n",
451 |        "      <th>10</th>\n",
452 |        "      <td>M</td>\n",
453 |        "      <td>M</td>\n",
454 |        "    </tr>\n",
455 |        "    <tr>\n",
456 |        "      <th>11</th>\n",
457 |        "      <td>M</td>\n",
458 |        "      <td>M</td>\n",
459 |        "    </tr>\n",
460 |        "    <tr>\n",
461 |        "      <th>12</th>\n",
462 |        "      <td>M</td>\n",
463 |        "      <td>M</td>\n",
464 |        "    </tr>\n",
465 |        "    <tr>\n",
466 |        "      <th>13</th>\n",
467 |        "      <td>M</td>\n",
468 |        "      <td>M</td>\n",
469 |        "    </tr>\n",
470 |        "    <tr>\n",
471 |        "      <th>14</th>\n",
472 |        "      <td>M</td>\n",
473 |        "      <td>M</td>\n",
474 |        "    </tr>\n",
475 |        "    <tr>\n",
476 |        "      <th>15</th>\n",
477 |        "      <td>M</td>\n",
478 |        "      <td>M</td>\n",
479 |        "    </tr>\n",
480 |        "    <tr>\n",
481 |        "      <th>16</th>\n",
482 |        "      <td>M</td>\n",
483 |        "      <td>M</td>\n",
484 |        "    </tr>\n",
485 |        "    <tr>\n",
486 |        "      <th>17</th>\n",
487 |        "      <td>M</td>\n",
488 |        "      <td>M</td>\n",
489 |        "    </tr>\n",
490 |        "    <tr>\n",
491 |        "      <th>18</th>\n",
492 |        "      <td>M</td>\n",
493 |        "      <td>M</td>\n",
494 |        "    </tr>\n",
495 |        "    <tr>\n",
496 |        "      <th>19</th>\n",
497 |        "      <td>B</td>\n",
498 |        "      <td>B</td>\n",
499 |        "    </tr>\n",
500 |        "  </tbody>\n",
501 |        "</table>\n",
502 |        "</div>"
503 |       ],
504 |       "text/plain": [
505 |        "   PredictedDiagnosis Diagnosis\n",
506 |        "0                   M         M\n",
507 |        "1                   M         M\n",
508 |        "2                   M         M\n",
509 |        "3                   M         M\n",
510 |        "4                   M         M\n",
511 |        "5                   M         M\n",
512 |        "6                   M         M\n",
513 |        "7                   M         M\n",
514 |        "8                   M         M\n",
515 |        "9                   M         M\n",
516 |        "10                  M         M\n",
517 |        "11                  M         M\n",
518 |        "12                  M         M\n",
519 |        "13                  M         M\n",
520 |        "14                  M         M\n",
521 |        "15                  M         M\n",
522 |        "16                  M         M\n",
523 |        "17                  M         M\n",
524 |        "18                  M         M\n",
525 |        "19                  B         B"
526 |       ]
527 |      },
528 |      "metadata": {},
529 |      "output_type": "display_data"
530 |     }
531 |    ],
532 |    "source": [
533 |     "import pandas as pd\n",
534 |     "from IPython.display import display\n",
535 |     "\n",
536 |     "df1 = pd.read_sql(\"SELECT * from %s ORDER BY ID\" % dataTablePredict, cnxn)\n",
537 |     "display(df1)"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "markdown",
542 |    "metadata": {},
543 |    "source": [
544 |     "### 8. Show a complicated query\n",
545 |     "IntegratedML function PREDICT() and PROBABILITY() can appear virtually anywhere in a SQL query, for maximal flexibility!\n",
546 |     "Below we are SELECTing columns as well as the result of the PROBABILITY function, and then filtering on the result of the PREDICT function. To top it off, ORDER BY is using the output of PROBSBILITY for sorting."
547 |    ]
548 |   },
549 |   {
550 |    "cell_type": "code",
551 |    "execution_count": 16,
552 |    "metadata": {},
553 |    "outputs": [
554 |     {
555 |      "data": {
556 |       "text/html": [
557 |        "<div>\n",
558 |        "<style scoped>\n",
559 |        "    .dataframe tbody tr th:only-of-type {\n",
560 |        "        vertical-align: middle;\n",
561 |        "    }\n",
562 |        "\n",
563 |        "    .dataframe tbody tr th {\n",
564 |        "        vertical-align: top;\n",
565 |        "    }\n",
566 |        "\n",
567 |        "    .dataframe thead th {\n",
568 |        "        text-align: right;\n",
569 |        "    }\n",
570 |        "</style>\n",
571 |        "<table border=\"1\" class=\"dataframe\">\n",
572 |        "  <thead>\n",
573 |        "    <tr style=\"text-align: right;\">\n",
574 |        "      <th></th>\n",
575 |        "      <th>ID</th>\n",
576 |        "      <th>Probability</th>\n",
577 |        "      <th>diagnosis</th>\n",
578 |        "    </tr>\n",
579 |        "  </thead>\n",
580 |        "  <tbody>\n",
581 |        "    <tr>\n",
582 |        "      <th>0</th>\n",
583 |        "      <td>74</td>\n",
584 |        "      <td>0.508227</td>\n",
585 |        "      <td>M</td>\n",
586 |        "    </tr>\n",
587 |        "    <tr>\n",
588 |        "      <th>1</th>\n",
589 |        "      <td>298</td>\n",
590 |        "      <td>0.675269</td>\n",
591 |        "      <td>M</td>\n",
592 |        "    </tr>\n",
593 |        "    <tr>\n",
594 |        "      <th>2</th>\n",
595 |        "      <td>216</td>\n",
596 |        "      <td>0.863261</td>\n",
597 |        "      <td>M</td>\n",
598 |        "    </tr>\n",
599 |        "    <tr>\n",
600 |        "      <th>3</th>\n",
601 |        "      <td>42</td>\n",
602 |        "      <td>0.955022</td>\n",
603 |        "      <td>M</td>\n",
604 |        "    </tr>\n",
605 |        "    <tr>\n",
606 |        "      <th>4</th>\n",
607 |        "      <td>147</td>\n",
608 |        "      <td>0.961170</td>\n",
609 |        "      <td>M</td>\n",
610 |        "    </tr>\n",
611 |        "    <tr>\n",
612 |        "      <th>5</th>\n",
613 |        "      <td>101</td>\n",
614 |        "      <td>0.994392</td>\n",
615 |        "      <td>M</td>\n",
616 |        "    </tr>\n",
617 |        "    <tr>\n",
618 |        "      <th>6</th>\n",
619 |        "      <td>45</td>\n",
620 |        "      <td>0.995220</td>\n",
621 |        "      <td>M</td>\n",
622 |        "    </tr>\n",
623 |        "    <tr>\n",
624 |        "      <th>7</th>\n",
625 |        "      <td>6</td>\n",
626 |        "      <td>0.995779</td>\n",
627 |        "      <td>M</td>\n",
628 |        "    </tr>\n",
629 |        "    <tr>\n",
630 |        "      <th>8</th>\n",
631 |        "      <td>40</td>\n",
632 |        "      <td>0.996360</td>\n",
633 |        "      <td>M</td>\n",
634 |        "    </tr>\n",
635 |        "    <tr>\n",
636 |        "      <th>9</th>\n",
637 |        "      <td>194</td>\n",
638 |        "      <td>0.998938</td>\n",
639 |        "      <td>M</td>\n",
640 |        "    </tr>\n",
641 |        "    <tr>\n",
642 |        "      <th>10</th>\n",
643 |        "      <td>8</td>\n",
644 |        "      <td>0.999320</td>\n",
645 |        "      <td>M</td>\n",
646 |        "    </tr>\n",
647 |        "    <tr>\n",
648 |        "      <th>11</th>\n",
649 |        "      <td>127</td>\n",
650 |        "      <td>0.999456</td>\n",
651 |        "      <td>M</td>\n",
652 |        "    </tr>\n",
653 |        "    <tr>\n",
654 |        "      <th>12</th>\n",
655 |        "      <td>502</td>\n",
656 |        "      <td>0.999470</td>\n",
657 |        "      <td>M</td>\n",
658 |        "    </tr>\n",
659 |        "    <tr>\n",
660 |        "      <th>13</th>\n",
661 |        "      <td>172</td>\n",
662 |        "      <td>0.999546</td>\n",
663 |        "      <td>M</td>\n",
664 |        "    </tr>\n",
665 |        "    <tr>\n",
666 |        "      <th>14</th>\n",
667 |        "      <td>15</td>\n",
668 |        "      <td>0.999659</td>\n",
669 |        "      <td>M</td>\n",
670 |        "    </tr>\n",
671 |        "    <tr>\n",
672 |        "      <th>15</th>\n",
673 |        "      <td>44</td>\n",
674 |        "      <td>0.999668</td>\n",
675 |        "      <td>M</td>\n",
676 |        "    </tr>\n",
677 |        "    <tr>\n",
678 |        "      <th>16</th>\n",
679 |        "      <td>436</td>\n",
680 |        "      <td>0.999765</td>\n",
681 |        "      <td>M</td>\n",
682 |        "    </tr>\n",
683 |        "    <tr>\n",
684 |        "      <th>17</th>\n",
685 |        "      <td>106</td>\n",
686 |        "      <td>0.999839</td>\n",
687 |        "      <td>M</td>\n",
688 |        "    </tr>\n",
689 |        "    <tr>\n",
690 |        "      <th>18</th>\n",
691 |        "      <td>48</td>\n",
692 |        "      <td>0.999894</td>\n",
693 |        "      <td>M</td>\n",
694 |        "    </tr>\n",
695 |        "    <tr>\n",
696 |        "      <th>19</th>\n",
697 |        "      <td>230</td>\n",
698 |        "      <td>0.999894</td>\n",
699 |        "      <td>M</td>\n",
700 |        "    </tr>\n",
701 |        "    <tr>\n",
702 |        "      <th>20</th>\n",
703 |        "      <td>9</td>\n",
704 |        "      <td>0.999991</td>\n",
705 |        "      <td>M</td>\n",
706 |        "    </tr>\n",
707 |        "    <tr>\n",
708 |        "      <th>21</th>\n",
709 |        "      <td>513</td>\n",
710 |        "      <td>0.999994</td>\n",
711 |        "      <td>M</td>\n",
712 |        "    </tr>\n",
713 |        "    <tr>\n",
714 |        "      <th>22</th>\n",
715 |        "      <td>197</td>\n",
716 |        "      <td>0.999995</td>\n",
717 |        "      <td>M</td>\n",
718 |        "    </tr>\n",
719 |        "    <tr>\n",
720 |        "      <th>23</th>\n",
721 |        "      <td>65</td>\n",
722 |        "      <td>0.999995</td>\n",
723 |        "      <td>M</td>\n",
724 |        "    </tr>\n",
725 |        "    <tr>\n",
726 |        "      <th>24</th>\n",
727 |        "      <td>32</td>\n",
728 |        "      <td>0.999999</td>\n",
729 |        "      <td>M</td>\n",
730 |        "    </tr>\n",
731 |        "    <tr>\n",
732 |        "      <th>25</th>\n",
733 |        "      <td>4</td>\n",
734 |        "      <td>1.000000</td>\n",
735 |        "      <td>M</td>\n",
736 |        "    </tr>\n",
737 |        "    <tr>\n",
738 |        "      <th>26</th>\n",
739 |        "      <td>380</td>\n",
740 |        "      <td>1.000000</td>\n",
741 |        "      <td>M</td>\n",
742 |        "    </tr>\n",
743 |        "    <tr>\n",
744 |        "      <th>27</th>\n",
745 |        "      <td>10</td>\n",
746 |        "      <td>1.000000</td>\n",
747 |        "      <td>M</td>\n",
748 |        "    </tr>\n",
749 |        "    <tr>\n",
750 |        "      <th>28</th>\n",
751 |        "      <td>204</td>\n",
752 |        "      <td>1.000000</td>\n",
753 |        "      <td>M</td>\n",
754 |        "    </tr>\n",
755 |        "  </tbody>\n",
756 |        "</table>\n",
757 |        "</div>"
758 |       ],
759 |       "text/plain": [
760 |        "     ID  Probability diagnosis\n",
761 |        "0    74     0.508227         M\n",
762 |        "1   298     0.675269         M\n",
763 |        "2   216     0.863261         M\n",
764 |        "3    42     0.955022         M\n",
765 |        "4   147     0.961170         M\n",
766 |        "5   101     0.994392         M\n",
767 |        "6    45     0.995220         M\n",
768 |        "7     6     0.995779         M\n",
769 |        "8    40     0.996360         M\n",
770 |        "9   194     0.998938         M\n",
771 |        "10    8     0.999320         M\n",
772 |        "11  127     0.999456         M\n",
773 |        "12  502     0.999470         M\n",
774 |        "13  172     0.999546         M\n",
775 |        "14   15     0.999659         M\n",
776 |        "15   44     0.999668         M\n",
777 |        "16  436     0.999765         M\n",
778 |        "17  106     0.999839         M\n",
779 |        "18   48     0.999894         M\n",
780 |        "19  230     0.999894         M\n",
781 |        "20    9     0.999991         M\n",
782 |        "21  513     0.999994         M\n",
783 |        "22  197     0.999995         M\n",
784 |        "23   65     0.999995         M\n",
785 |        "24   32     0.999999         M\n",
786 |        "25    4     1.000000         M\n",
787 |        "26  380     1.000000         M\n",
788 |        "27   10     1.000000         M\n",
789 |        "28  204     1.000000         M"
790 |       ]
791 |      },
792 |      "metadata": {},
793 |      "output_type": "display_data"
794 |     }
795 |    ],
796 |    "source": [
797 |     "df2 = pd.read_sql(\"SELECT ID, PROBABILITY(bc FOR 'M') AS Probability, Diagnosis FROM %s \\\n",
798 |     "                    WHERE Mean_Area BETWEEN 300 AND 600 AND Mean_Radius > 5 AND PREDICT(%s) = 'M' \\\n",
799 |     "                    ORDER BY Probability\" % (dataTable, modelName),cnxn)         \n",
800 |     "display(df2)"
801 |    ]
802 |   },
803 |   {
804 |    "cell_type": "markdown",
805 |    "metadata": {},
806 |    "source": [
807 |     "### 9. Close and clean "
808 |    ]
809 |   },
810 |   {
811 |    "cell_type": "code",
812 |    "execution_count": 17,
813 |    "metadata": {},
814 |    "outputs": [
815 |     {
816 |      "name": "stdout",
817 |      "output_type": "stream",
818 |      "text": [
819 |       "Total elapsed time: \n",
820 |       "0.731681\n"
821 |      ]
822 |     }
823 |    ],
824 |    "source": [
825 |     "cnxn.close()\n",
826 |     "end= time.clock()\n",
827 |     "print (\"Total elapsed time: \")\n",
828 |     "print (end-start)\n",
829 |     "#input(\"Hit any key to end\")"
830 |    ]
831 |   }
832 |  ],
833 |  "metadata": {
834 |   "kernelspec": {
835 |    "display_name": "Python 3",
836 |    "language": "python",
837 |    "name": "python3"
838 |   },
839 |   "language_info": {
840 |    "codemirror_mode": {
841 |     "name": "ipython",
842 |     "version": 3
843 |    },
844 |    "file_extension": ".py",
845 |    "mimetype": "text/x-python",
846 |    "name": "python",
847 |    "nbconvert_exporter": "python",
848 |    "pygments_lexer": "ipython3",
849 |    "version": "3.6.9"
850 |   }
851 |  },
852 |  "nbformat": 4,
853 |  "nbformat_minor": 2
854 | }
855 | 


--------------------------------------------------------------------------------
/jupyter-samples/campaign-integratedml-jdbc.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Train an IntegratedML model on Marketing Campaign Dataset\n",
   8 |     "## Use JDBC to connect to InterSystems IRIS database\n",
   9 |     "This Notebook demonstrates:\n",
  10 |     "- Using the JayDeBeApi Python library to connect to InterSystems IRIS\n",
  11 |     "- Creating views to segment data into training and test sets\n",
  12 |     "- Defining and training an IntegratedML model to predict marketing campaign responses\n",
  13 |     "- Comparing the resulting model's predictions to data in the test set (that the model was not trained on)\n",
  14 |     "- Using the IntegratedML \"VALIDATE MODEL\" command to calculate accuracy metrics on the test set data"
  15 |    ]
  16 |   },
  17 |   {
  18 |    "cell_type": "code",
  19 |    "execution_count": 1,
  20 |    "metadata": {},
  21 |    "outputs": [
  22 |     {
  23 |      "data": {
  24 |       "text/html": [
  25 |        "<style>.container { width:100% !important; }</style>"
  26 |       ],
  27 |       "text/plain": [
  28 |        "<IPython.core.display.HTML object>"
  29 |       ]
  30 |      },
  31 |      "metadata": {},
  32 |      "output_type": "display_data"
  33 |     }
  34 |    ],
  35 |    "source": [
  36 |     "from IPython.core.display import display, HTML\n",
  37 |     "display(HTML(\"<style>.container { width:100% !important; }</style>\"))"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "### 1. Set environment variables, if necessary"
  45 |    ]
  46 |   },
  47 |   {
  48 |    "cell_type": "markdown",
  49 |    "metadata": {},
  50 |    "source": [
  51 |     "### 2. Get jdbc connection and cursor"
  52 |    ]
  53 |   },
  54 |   {
  55 |    "cell_type": "code",
  56 |    "execution_count": 2,
  57 |    "metadata": {},
  58 |    "outputs": [],
  59 |    "source": [
  60 |     "\n",
  61 |     "import jaydebeapi\n",
  62 |     "url = \"jdbc:IRIS://irisimlsvr:1972/USER\"\n",
  63 |     "driver = \"com.intersystems.jdbc.IRISDriver\"\n",
  64 |     "user = \"SUPERUSER\"\n",
  65 |     "password = \"SYS\"\n",
  66 |     "#libx = \"C:/InterSystems/IRIS20194/dev/java/lib/JDK18\"\n",
  67 |     "#jarfile = \"C:/InterSystems/IRIS20194/dev/java/lib/JDK18/intersystems-jdbc-3.0.0.jar\"\n",
  68 |     "jarfile = \"./intersystems-jdbc-3.1.0.jar\""
  69 |    ]
  70 |   },
  71 |   {
  72 |    "cell_type": "code",
  73 |    "execution_count": 3,
  74 |    "metadata": {},
  75 |    "outputs": [],
  76 |    "source": [
  77 |     "conn = jaydebeapi.connect(driver, url, [user, password], jarfile)\n",
  78 |     "curs = conn.cursor()"
  79 |    ]
  80 |   },
  81 |   {
  82 |    "cell_type": "markdown",
  83 |    "metadata": {},
  84 |    "source": [
  85 |     "### 3. specify the source data table"
  86 |    ]
  87 |   },
  88 |   {
  89 |    "cell_type": "code",
  90 |    "execution_count": 4,
  91 |    "metadata": {},
  92 |    "outputs": [],
  93 |    "source": [
  94 |     "dataTable = 'Marketing.Campaign'\n",
  95 |     "trainingTable = \"Marketing.CampaignTrainingSmall\"\n",
  96 |     "predictTable = \"Marketing.CampaignPredictSmall\""
  97 |    ]
  98 |   },
  99 |   {
 100 |    "cell_type": "markdown",
 101 |    "metadata": {},
 102 |    "source": [
 103 |     "### 4. Execute a query and display results in Pandas DataFrame"
 104 |    ]
 105 |   },
 106 |   {
 107 |    "cell_type": "code",
 108 |    "execution_count": 5,
 109 |    "metadata": {},
 110 |    "outputs": [
 111 |     {
 112 |      "data": {
 113 |       "text/html": [
 114 |        "<div>\n",
 115 |        "<style scoped>\n",
 116 |        "    .dataframe tbody tr th:only-of-type {\n",
 117 |        "        vertical-align: middle;\n",
 118 |        "    }\n",
 119 |        "\n",
 120 |        "    .dataframe tbody tr th {\n",
 121 |        "        vertical-align: top;\n",
 122 |        "    }\n",
 123 |        "\n",
 124 |        "    .dataframe thead th {\n",
 125 |        "        text-align: right;\n",
 126 |        "    }\n",
 127 |        "</style>\n",
 128 |        "<table border=\"1\" class=\"dataframe\">\n",
 129 |        "  <thead>\n",
 130 |        "    <tr style=\"text-align: right;\">\n",
 131 |        "      <th></th>\n",
 132 |        "      <th>AGE</th>\n",
 133 |        "      <th>EDUCATION</th>\n",
 134 |        "      <th>MARITAL_STATUS</th>\n",
 135 |        "      <th>OCCUPATION</th>\n",
 136 |        "      <th>RELATIONSHIP</th>\n",
 137 |        "      <th>RACE</th>\n",
 138 |        "      <th>GENDER</th>\n",
 139 |        "      <th>PROSPECT_ID</th>\n",
 140 |        "      <th>EMPLOYMENT</th>\n",
 141 |        "      <th>YEARS_EDUC</th>\n",
 142 |        "      <th>COUNTRY_OF_BIRTH</th>\n",
 143 |        "      <th>INV_EARNINGS</th>\n",
 144 |        "      <th>INV_LOSSES</th>\n",
 145 |        "      <th>WORKWEEK_LENGTH</th>\n",
 146 |        "      <th>RESPONSE</th>\n",
 147 |        "      <th>AMOUNT</th>\n",
 148 |        "    </tr>\n",
 149 |        "  </thead>\n",
 150 |        "  <tbody>\n",
 151 |        "    <tr>\n",
 152 |        "      <th>0</th>\n",
 153 |        "      <td>50</td>\n",
 154 |        "      <td>HIGHSCHOOL</td>\n",
 155 |        "      <td>MARRIEDCIVSPO</td>\n",
 156 |        "      <td>REPAIRCRAFT</td>\n",
 157 |        "      <td>HUSBAND</td>\n",
 158 |        "      <td>WHITE</td>\n",
 159 |        "      <td>MALE</td>\n",
 160 |        "      <td>77001107</td>\n",
 161 |        "      <td>BUSINESS</td>\n",
 162 |        "      <td>9</td>\n",
 163 |        "      <td>USA</td>\n",
 164 |        "      <td>0</td>\n",
 165 |        "      <td>0</td>\n",
 166 |        "      <td>40</td>\n",
 167 |        "      <td>0</td>\n",
 168 |        "      <td>123.90</td>\n",
 169 |        "    </tr>\n",
 170 |        "    <tr>\n",
 171 |        "      <th>1</th>\n",
 172 |        "      <td>49</td>\n",
 173 |        "      <td>HIGHSCHOOL</td>\n",
 174 |        "      <td>MARRIEDCIVSPO</td>\n",
 175 |        "      <td>LOGISTICSTRANSP</td>\n",
 176 |        "      <td>HUSBAND</td>\n",
 177 |        "      <td>WHITE</td>\n",
 178 |        "      <td>MALE</td>\n",
 179 |        "      <td>77001140</td>\n",
 180 |        "      <td>BUSINESS</td>\n",
 181 |        "      <td>9</td>\n",
 182 |        "      <td>USA</td>\n",
 183 |        "      <td>0</td>\n",
 184 |        "      <td>0</td>\n",
 185 |        "      <td>40</td>\n",
 186 |        "      <td>0</td>\n",
 187 |        "      <td>97.91</td>\n",
 188 |        "    </tr>\n",
 189 |        "    <tr>\n",
 190 |        "      <th>2</th>\n",
 191 |        "      <td>37</td>\n",
 192 |        "      <td>HIGHSCHOOL</td>\n",
 193 |        "      <td>MARRIEDCIVSPO</td>\n",
 194 |        "      <td>REPAIRCRAFT</td>\n",
 195 |        "      <td>HUSBAND</td>\n",
 196 |        "      <td>WHITE</td>\n",
 197 |        "      <td>MALE</td>\n",
 198 |        "      <td>77001143</td>\n",
 199 |        "      <td>BUSINESS</td>\n",
 200 |        "      <td>9</td>\n",
 201 |        "      <td>USA</td>\n",
 202 |        "      <td>0</td>\n",
 203 |        "      <td>0</td>\n",
 204 |        "      <td>40</td>\n",
 205 |        "      <td>0</td>\n",
 206 |        "      <td>135.65</td>\n",
 207 |        "    </tr>\n",
 208 |        "    <tr>\n",
 209 |        "      <th>3</th>\n",
 210 |        "      <td>43</td>\n",
 211 |        "      <td>HIGHSCHOOL</td>\n",
 212 |        "      <td>MARRIEDCIVSPO</td>\n",
 213 |        "      <td>CLEANINGHANDLING</td>\n",
 214 |        "      <td>HUSBAND</td>\n",
 215 |        "      <td>WHITE</td>\n",
 216 |        "      <td>MALE</td>\n",
 217 |        "      <td>77001187</td>\n",
 218 |        "      <td>BUSINESS</td>\n",
 219 |        "      <td>9</td>\n",
 220 |        "      <td>USA</td>\n",
 221 |        "      <td>0</td>\n",
 222 |        "      <td>0</td>\n",
 223 |        "      <td>40</td>\n",
 224 |        "      <td>0</td>\n",
 225 |        "      <td>93.23</td>\n",
 226 |        "    </tr>\n",
 227 |        "    <tr>\n",
 228 |        "      <th>4</th>\n",
 229 |        "      <td>54</td>\n",
 230 |        "      <td>HIGHSCHOOL</td>\n",
 231 |        "      <td>MARRIEDCIVSPO</td>\n",
 232 |        "      <td>LOGISTICSTRANSP</td>\n",
 233 |        "      <td>HUSBAND</td>\n",
 234 |        "      <td>WHITE</td>\n",
 235 |        "      <td>MALE</td>\n",
 236 |        "      <td>77001412</td>\n",
 237 |        "      <td>BUSINESS</td>\n",
 238 |        "      <td>9</td>\n",
 239 |        "      <td>USA</td>\n",
 240 |        "      <td>0</td>\n",
 241 |        "      <td>0</td>\n",
 242 |        "      <td>40</td>\n",
 243 |        "      <td>0</td>\n",
 244 |        "      <td>143.72</td>\n",
 245 |        "    </tr>\n",
 246 |        "    <tr>\n",
 247 |        "      <th>5</th>\n",
 248 |        "      <td>27</td>\n",
 249 |        "      <td>HIGHSCHOOL</td>\n",
 250 |        "      <td>MARRIEDCIVSPO</td>\n",
 251 |        "      <td>SERVICEVARIOUS</td>\n",
 252 |        "      <td>HUSBAND</td>\n",
 253 |        "      <td>WHITE</td>\n",
 254 |        "      <td>MALE</td>\n",
 255 |        "      <td>77001429</td>\n",
 256 |        "      <td>BUSINESS</td>\n",
 257 |        "      <td>9</td>\n",
 258 |        "      <td>USA</td>\n",
 259 |        "      <td>0</td>\n",
 260 |        "      <td>0</td>\n",
 261 |        "      <td>40</td>\n",
 262 |        "      <td>0</td>\n",
 263 |        "      <td>232.20</td>\n",
 264 |        "    </tr>\n",
 265 |        "    <tr>\n",
 266 |        "      <th>6</th>\n",
 267 |        "      <td>29</td>\n",
 268 |        "      <td>HIGHSCHOOL</td>\n",
 269 |        "      <td>MARRIEDCIVSPO</td>\n",
 270 |        "      <td>REPAIRCRAFT</td>\n",
 271 |        "      <td>HUSBAND</td>\n",
 272 |        "      <td>WHITE</td>\n",
 273 |        "      <td>MALE</td>\n",
 274 |        "      <td>77001443</td>\n",
 275 |        "      <td>BUSINESS</td>\n",
 276 |        "      <td>9</td>\n",
 277 |        "      <td>USA</td>\n",
 278 |        "      <td>0</td>\n",
 279 |        "      <td>0</td>\n",
 280 |        "      <td>40</td>\n",
 281 |        "      <td>0</td>\n",
 282 |        "      <td>103.46</td>\n",
 283 |        "    </tr>\n",
 284 |        "    <tr>\n",
 285 |        "      <th>7</th>\n",
 286 |        "      <td>29</td>\n",
 287 |        "      <td>HIGHSCHOOL</td>\n",
 288 |        "      <td>MARRIEDCIVSPO</td>\n",
 289 |        "      <td>ADMINOFFICE</td>\n",
 290 |        "      <td>HUSBAND</td>\n",
 291 |        "      <td>WHITE</td>\n",
 292 |        "      <td>MALE</td>\n",
 293 |        "      <td>77001484</td>\n",
 294 |        "      <td>BUSINESS</td>\n",
 295 |        "      <td>9</td>\n",
 296 |        "      <td>USA</td>\n",
 297 |        "      <td>0</td>\n",
 298 |        "      <td>0</td>\n",
 299 |        "      <td>40</td>\n",
 300 |        "      <td>0</td>\n",
 301 |        "      <td>123.53</td>\n",
 302 |        "    </tr>\n",
 303 |        "    <tr>\n",
 304 |        "      <th>8</th>\n",
 305 |        "      <td>21</td>\n",
 306 |        "      <td>HIGHSCHOOL</td>\n",
 307 |        "      <td>MARRIEDCIVSPO</td>\n",
 308 |        "      <td>MACHINEOPERINSP</td>\n",
 309 |        "      <td>HUSBAND</td>\n",
 310 |        "      <td>WHITE</td>\n",
 311 |        "      <td>MALE</td>\n",
 312 |        "      <td>77001526</td>\n",
 313 |        "      <td>BUSINESS</td>\n",
 314 |        "      <td>9</td>\n",
 315 |        "      <td>USA</td>\n",
 316 |        "      <td>0</td>\n",
 317 |        "      <td>0</td>\n",
 318 |        "      <td>40</td>\n",
 319 |        "      <td>0</td>\n",
 320 |        "      <td>230.07</td>\n",
 321 |        "    </tr>\n",
 322 |        "    <tr>\n",
 323 |        "      <th>9</th>\n",
 324 |        "      <td>36</td>\n",
 325 |        "      <td>HIGHSCHOOL</td>\n",
 326 |        "      <td>MARRIEDCIVSPO</td>\n",
 327 |        "      <td>MACHINEOPERINSP</td>\n",
 328 |        "      <td>HUSBAND</td>\n",
 329 |        "      <td>WHITE</td>\n",
 330 |        "      <td>MALE</td>\n",
 331 |        "      <td>77001634</td>\n",
 332 |        "      <td>BUSINESS</td>\n",
 333 |        "      <td>9</td>\n",
 334 |        "      <td>USA</td>\n",
 335 |        "      <td>0</td>\n",
 336 |        "      <td>0</td>\n",
 337 |        "      <td>40</td>\n",
 338 |        "      <td>0</td>\n",
 339 |        "      <td>48.95</td>\n",
 340 |        "    </tr>\n",
 341 |        "    <tr>\n",
 342 |        "      <th>10</th>\n",
 343 |        "      <td>52</td>\n",
 344 |        "      <td>HIGHSCHOOL</td>\n",
 345 |        "      <td>MARRIEDCIVSPO</td>\n",
 346 |        "      <td>SPECIALISTPROFF</td>\n",
 347 |        "      <td>HUSBAND</td>\n",
 348 |        "      <td>WHITE</td>\n",
 349 |        "      <td>MALE</td>\n",
 350 |        "      <td>77001649</td>\n",
 351 |        "      <td>BUSINESS</td>\n",
 352 |        "      <td>9</td>\n",
 353 |        "      <td>USA</td>\n",
 354 |        "      <td>0</td>\n",
 355 |        "      <td>0</td>\n",
 356 |        "      <td>40</td>\n",
 357 |        "      <td>0</td>\n",
 358 |        "      <td>58.78</td>\n",
 359 |        "    </tr>\n",
 360 |        "    <tr>\n",
 361 |        "      <th>11</th>\n",
 362 |        "      <td>60</td>\n",
 363 |        "      <td>HIGHSCHOOL</td>\n",
 364 |        "      <td>MARRIEDCIVSPO</td>\n",
 365 |        "      <td>REPAIRCRAFT</td>\n",
 366 |        "      <td>HUSBAND</td>\n",
 367 |        "      <td>WHITE</td>\n",
 368 |        "      <td>MALE</td>\n",
 369 |        "      <td>77001660</td>\n",
 370 |        "      <td>BUSINESS</td>\n",
 371 |        "      <td>9</td>\n",
 372 |        "      <td>USA</td>\n",
 373 |        "      <td>0</td>\n",
 374 |        "      <td>0</td>\n",
 375 |        "      <td>40</td>\n",
 376 |        "      <td>0</td>\n",
 377 |        "      <td>106.29</td>\n",
 378 |        "    </tr>\n",
 379 |        "    <tr>\n",
 380 |        "      <th>12</th>\n",
 381 |        "      <td>28</td>\n",
 382 |        "      <td>HIGHSCHOOL</td>\n",
 383 |        "      <td>MARRIEDCIVSPO</td>\n",
 384 |        "      <td>MACHINEOPERINSP</td>\n",
 385 |        "      <td>HUSBAND</td>\n",
 386 |        "      <td>WHITE</td>\n",
 387 |        "      <td>MALE</td>\n",
 388 |        "      <td>77001661</td>\n",
 389 |        "      <td>BUSINESS</td>\n",
 390 |        "      <td>9</td>\n",
 391 |        "      <td>USA</td>\n",
 392 |        "      <td>0</td>\n",
 393 |        "      <td>0</td>\n",
 394 |        "      <td>40</td>\n",
 395 |        "      <td>0</td>\n",
 396 |        "      <td>211.26</td>\n",
 397 |        "    </tr>\n",
 398 |        "    <tr>\n",
 399 |        "      <th>13</th>\n",
 400 |        "      <td>61</td>\n",
 401 |        "      <td>HIGHSCHOOL</td>\n",
 402 |        "      <td>MARRIEDCIVSPO</td>\n",
 403 |        "      <td>REPAIRCRAFT</td>\n",
 404 |        "      <td>HUSBAND</td>\n",
 405 |        "      <td>WHITE</td>\n",
 406 |        "      <td>MALE</td>\n",
 407 |        "      <td>77001719</td>\n",
 408 |        "      <td>BUSINESS</td>\n",
 409 |        "      <td>9</td>\n",
 410 |        "      <td>USA</td>\n",
 411 |        "      <td>0</td>\n",
 412 |        "      <td>0</td>\n",
 413 |        "      <td>40</td>\n",
 414 |        "      <td>0</td>\n",
 415 |        "      <td>104.34</td>\n",
 416 |        "    </tr>\n",
 417 |        "    <tr>\n",
 418 |        "      <th>14</th>\n",
 419 |        "      <td>34</td>\n",
 420 |        "      <td>HIGHSCHOOL</td>\n",
 421 |        "      <td>MARRIEDCIVSPO</td>\n",
 422 |        "      <td>MACHINEOPERINSP</td>\n",
 423 |        "      <td>HUSBAND</td>\n",
 424 |        "      <td>WHITE</td>\n",
 425 |        "      <td>MALE</td>\n",
 426 |        "      <td>77001794</td>\n",
 427 |        "      <td>BUSINESS</td>\n",
 428 |        "      <td>9</td>\n",
 429 |        "      <td>USA</td>\n",
 430 |        "      <td>0</td>\n",
 431 |        "      <td>0</td>\n",
 432 |        "      <td>40</td>\n",
 433 |        "      <td>0</td>\n",
 434 |        "      <td>184.73</td>\n",
 435 |        "    </tr>\n",
 436 |        "    <tr>\n",
 437 |        "      <th>15</th>\n",
 438 |        "      <td>62</td>\n",
 439 |        "      <td>HIGHSCHOOL</td>\n",
 440 |        "      <td>MARRIEDCIVSPO</td>\n",
 441 |        "      <td>MACHINEOPERINSP</td>\n",
 442 |        "      <td>HUSBAND</td>\n",
 443 |        "      <td>WHITE</td>\n",
 444 |        "      <td>MALE</td>\n",
 445 |        "      <td>77001874</td>\n",
 446 |        "      <td>BUSINESS</td>\n",
 447 |        "      <td>9</td>\n",
 448 |        "      <td>USA</td>\n",
 449 |        "      <td>0</td>\n",
 450 |        "      <td>0</td>\n",
 451 |        "      <td>40</td>\n",
 452 |        "      <td>0</td>\n",
 453 |        "      <td>66.32</td>\n",
 454 |        "    </tr>\n",
 455 |        "    <tr>\n",
 456 |        "      <th>16</th>\n",
 457 |        "      <td>56</td>\n",
 458 |        "      <td>HIGHSCHOOL</td>\n",
 459 |        "      <td>MARRIEDCIVSPO</td>\n",
 460 |        "      <td>MACHINEOPERINSP</td>\n",
 461 |        "      <td>HUSBAND</td>\n",
 462 |        "      <td>WHITE</td>\n",
 463 |        "      <td>MALE</td>\n",
 464 |        "      <td>77001913</td>\n",
 465 |        "      <td>BUSINESS</td>\n",
 466 |        "      <td>9</td>\n",
 467 |        "      <td>USA</td>\n",
 468 |        "      <td>0</td>\n",
 469 |        "      <td>0</td>\n",
 470 |        "      <td>40</td>\n",
 471 |        "      <td>0</td>\n",
 472 |        "      <td>60.13</td>\n",
 473 |        "    </tr>\n",
 474 |        "    <tr>\n",
 475 |        "      <th>17</th>\n",
 476 |        "      <td>34</td>\n",
 477 |        "      <td>HIGHSCHOOL</td>\n",
 478 |        "      <td>MARRIEDCIVSPO</td>\n",
 479 |        "      <td>MACHINEOPERINSP</td>\n",
 480 |        "      <td>HUSBAND</td>\n",
 481 |        "      <td>WHITE</td>\n",
 482 |        "      <td>MALE</td>\n",
 483 |        "      <td>77001942</td>\n",
 484 |        "      <td>BUSINESS</td>\n",
 485 |        "      <td>9</td>\n",
 486 |        "      <td>USA</td>\n",
 487 |        "      <td>0</td>\n",
 488 |        "      <td>0</td>\n",
 489 |        "      <td>40</td>\n",
 490 |        "      <td>0</td>\n",
 491 |        "      <td>77.02</td>\n",
 492 |        "    </tr>\n",
 493 |        "    <tr>\n",
 494 |        "      <th>18</th>\n",
 495 |        "      <td>29</td>\n",
 496 |        "      <td>HIGHSCHOOL</td>\n",
 497 |        "      <td>MARRIEDCIVSPO</td>\n",
 498 |        "      <td>SUPPORTTECHNIC</td>\n",
 499 |        "      <td>HUSBAND</td>\n",
 500 |        "      <td>WHITE</td>\n",
 501 |        "      <td>MALE</td>\n",
 502 |        "      <td>77001977</td>\n",
 503 |        "      <td>BUSINESS</td>\n",
 504 |        "      <td>9</td>\n",
 505 |        "      <td>USA</td>\n",
 506 |        "      <td>0</td>\n",
 507 |        "      <td>0</td>\n",
 508 |        "      <td>40</td>\n",
 509 |        "      <td>0</td>\n",
 510 |        "      <td>122.43</td>\n",
 511 |        "    </tr>\n",
 512 |        "    <tr>\n",
 513 |        "      <th>19</th>\n",
 514 |        "      <td>33</td>\n",
 515 |        "      <td>HIGHSCHOOL</td>\n",
 516 |        "      <td>MARRIEDCIVSPO</td>\n",
 517 |        "      <td>MACHINEOPERINSP</td>\n",
 518 |        "      <td>HUSBAND</td>\n",
 519 |        "      <td>WHITE</td>\n",
 520 |        "      <td>MALE</td>\n",
 521 |        "      <td>77002003</td>\n",
 522 |        "      <td>BUSINESS</td>\n",
 523 |        "      <td>9</td>\n",
 524 |        "      <td>USA</td>\n",
 525 |        "      <td>0</td>\n",
 526 |        "      <td>0</td>\n",
 527 |        "      <td>40</td>\n",
 528 |        "      <td>0</td>\n",
 529 |        "      <td>257.23</td>\n",
 530 |        "    </tr>\n",
 531 |        "  </tbody>\n",
 532 |        "</table>\n",
 533 |        "</div>"
 534 |       ],
 535 |       "text/plain": [
 536 |        "    AGE   EDUCATION MARITAL_STATUS        OCCUPATION RELATIONSHIP   RACE  \\\n",
 537 |        "0    50  HIGHSCHOOL  MARRIEDCIVSPO       REPAIRCRAFT      HUSBAND  WHITE   \n",
 538 |        "1    49  HIGHSCHOOL  MARRIEDCIVSPO   LOGISTICSTRANSP      HUSBAND  WHITE   \n",
 539 |        "2    37  HIGHSCHOOL  MARRIEDCIVSPO       REPAIRCRAFT      HUSBAND  WHITE   \n",
 540 |        "3    43  HIGHSCHOOL  MARRIEDCIVSPO  CLEANINGHANDLING      HUSBAND  WHITE   \n",
 541 |        "4    54  HIGHSCHOOL  MARRIEDCIVSPO   LOGISTICSTRANSP      HUSBAND  WHITE   \n",
 542 |        "5    27  HIGHSCHOOL  MARRIEDCIVSPO    SERVICEVARIOUS      HUSBAND  WHITE   \n",
 543 |        "6    29  HIGHSCHOOL  MARRIEDCIVSPO       REPAIRCRAFT      HUSBAND  WHITE   \n",
 544 |        "7    29  HIGHSCHOOL  MARRIEDCIVSPO       ADMINOFFICE      HUSBAND  WHITE   \n",
 545 |        "8    21  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 546 |        "9    36  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 547 |        "10   52  HIGHSCHOOL  MARRIEDCIVSPO   SPECIALISTPROFF      HUSBAND  WHITE   \n",
 548 |        "11   60  HIGHSCHOOL  MARRIEDCIVSPO       REPAIRCRAFT      HUSBAND  WHITE   \n",
 549 |        "12   28  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 550 |        "13   61  HIGHSCHOOL  MARRIEDCIVSPO       REPAIRCRAFT      HUSBAND  WHITE   \n",
 551 |        "14   34  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 552 |        "15   62  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 553 |        "16   56  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 554 |        "17   34  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 555 |        "18   29  HIGHSCHOOL  MARRIEDCIVSPO    SUPPORTTECHNIC      HUSBAND  WHITE   \n",
 556 |        "19   33  HIGHSCHOOL  MARRIEDCIVSPO   MACHINEOPERINSP      HUSBAND  WHITE   \n",
 557 |        "\n",
 558 |        "   GENDER  PROSPECT_ID EMPLOYMENT  YEARS_EDUC COUNTRY_OF_BIRTH  INV_EARNINGS  \\\n",
 559 |        "0    MALE     77001107   BUSINESS           9              USA             0   \n",
 560 |        "1    MALE     77001140   BUSINESS           9              USA             0   \n",
 561 |        "2    MALE     77001143   BUSINESS           9              USA             0   \n",
 562 |        "3    MALE     77001187   BUSINESS           9              USA             0   \n",
 563 |        "4    MALE     77001412   BUSINESS           9              USA             0   \n",
 564 |        "5    MALE     77001429   BUSINESS           9              USA             0   \n",
 565 |        "6    MALE     77001443   BUSINESS           9              USA             0   \n",
 566 |        "7    MALE     77001484   BUSINESS           9              USA             0   \n",
 567 |        "8    MALE     77001526   BUSINESS           9              USA             0   \n",
 568 |        "9    MALE     77001634   BUSINESS           9              USA             0   \n",
 569 |        "10   MALE     77001649   BUSINESS           9              USA             0   \n",
 570 |        "11   MALE     77001660   BUSINESS           9              USA             0   \n",
 571 |        "12   MALE     77001661   BUSINESS           9              USA             0   \n",
 572 |        "13   MALE     77001719   BUSINESS           9              USA             0   \n",
 573 |        "14   MALE     77001794   BUSINESS           9              USA             0   \n",
 574 |        "15   MALE     77001874   BUSINESS           9              USA             0   \n",
 575 |        "16   MALE     77001913   BUSINESS           9              USA             0   \n",
 576 |        "17   MALE     77001942   BUSINESS           9              USA             0   \n",
 577 |        "18   MALE     77001977   BUSINESS           9              USA             0   \n",
 578 |        "19   MALE     77002003   BUSINESS           9              USA             0   \n",
 579 |        "\n",
 580 |        "    INV_LOSSES  WORKWEEK_LENGTH  RESPONSE  AMOUNT  \n",
 581 |        "0            0               40         0  123.90  \n",
 582 |        "1            0               40         0   97.91  \n",
 583 |        "2            0               40         0  135.65  \n",
 584 |        "3            0               40         0   93.23  \n",
 585 |        "4            0               40         0  143.72  \n",
 586 |        "5            0               40         0  232.20  \n",
 587 |        "6            0               40         0  103.46  \n",
 588 |        "7            0               40         0  123.53  \n",
 589 |        "8            0               40         0  230.07  \n",
 590 |        "9            0               40         0   48.95  \n",
 591 |        "10           0               40         0   58.78  \n",
 592 |        "11           0               40         0  106.29  \n",
 593 |        "12           0               40         0  211.26  \n",
 594 |        "13           0               40         0  104.34  \n",
 595 |        "14           0               40         0  184.73  \n",
 596 |        "15           0               40         0   66.32  \n",
 597 |        "16           0               40         0   60.13  \n",
 598 |        "17           0               40         0   77.02  \n",
 599 |        "18           0               40         0  122.43  \n",
 600 |        "19           0               40         0  257.23  "
 601 |       ]
 602 |      },
 603 |      "metadata": {},
 604 |      "output_type": "display_data"
 605 |     }
 606 |    ],
 607 |    "source": [
 608 |     "import pandas as pd\n",
 609 |     "from IPython.display import display\n",
 610 |     "\n",
 611 |     "df = pd.read_sql(\"select TOP 20 * from %s\" % dataTable, conn)\n",
 612 |     "display(df)"
 613 |    ]
 614 |   },
 615 |   {
 616 |    "cell_type": "code",
 617 |    "execution_count": 6,
 618 |    "metadata": {},
 619 |    "outputs": [
 620 |     {
 621 |      "data": {
 622 |       "text/html": [
 623 |        "<div>\n",
 624 |        "<style scoped>\n",
 625 |        "    .dataframe tbody tr th:only-of-type {\n",
 626 |        "        vertical-align: middle;\n",
 627 |        "    }\n",
 628 |        "\n",
 629 |        "    .dataframe tbody tr th {\n",
 630 |        "        vertical-align: top;\n",
 631 |        "    }\n",
 632 |        "\n",
 633 |        "    .dataframe thead th {\n",
 634 |        "        text-align: right;\n",
 635 |        "    }\n",
 636 |        "</style>\n",
 637 |        "<table border=\"1\" class=\"dataframe\">\n",
 638 |        "  <thead>\n",
 639 |        "    <tr style=\"text-align: right;\">\n",
 640 |        "      <th></th>\n",
 641 |        "      <th>Aggregate_1</th>\n",
 642 |        "    </tr>\n",
 643 |        "  </thead>\n",
 644 |        "  <tbody>\n",
 645 |        "    <tr>\n",
 646 |        "      <th>0</th>\n",
 647 |        "      <td>48842</td>\n",
 648 |        "    </tr>\n",
 649 |        "  </tbody>\n",
 650 |        "</table>\n",
 651 |        "</div>"
 652 |       ],
 653 |       "text/plain": [
 654 |        "   Aggregate_1\n",
 655 |        "0        48842"
 656 |       ]
 657 |      },
 658 |      "metadata": {},
 659 |      "output_type": "display_data"
 660 |     }
 661 |    ],
 662 |    "source": [
 663 |     "# Show number rows\n",
 664 |     "df1 = pd.read_sql(\"SELECT COUNT(*) FROM %s\" % dataTable, conn)\n",
 665 |     "display(df1)"
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "markdown",
 670 |    "metadata": {},
 671 |    "source": [
 672 |     "### Cleaning before retrying"
 673 |    ]
 674 |   },
 675 |   {
 676 |    "cell_type": "code",
 677 |    "execution_count": 7,
 678 |    "metadata": {},
 679 |    "outputs": [],
 680 |    "source": [
 681 |     "# Before executing the notebook again, drop model, tables, views,... previously created\n",
 682 |     "#curs.execute(\"DROP VIEW %s\" % trainingTable)\n",
 683 |     "#curs.execute(\"DROP VIEW %s\" % predictTable)\n",
 684 |     "#curs.execute(\"DROP MODEL CampaignModel\")"
 685 |    ]
 686 |   },
 687 |   {
 688 |    "cell_type": "markdown",
 689 |    "metadata": {},
 690 |    "source": [
 691 |     "### 5. Make some views to split training and testing datasets"
 692 |    ]
 693 |   },
 694 |   {
 695 |    "cell_type": "code",
 696 |    "execution_count": 8,
 697 |    "metadata": {},
 698 |    "outputs": [],
 699 |    "source": [
 700 |     "# Small training set view\n",
 701 |     "curs.execute(\"CREATE VIEW %s AS SELECT * FROM %s WHERE ID<3001\" % (trainingTable,dataTable))\n",
 702 |     "# Small prediction set\n",
 703 |     "curs.execute(\"CREATE VIEW %s AS SELECT * FROM %s WHERE ID>47000\" % (predictTable,dataTable))"
 704 |    ]
 705 |   },
 706 |   {
 707 |    "cell_type": "markdown",
 708 |    "metadata": {},
 709 |    "source": [
 710 |     "### 6. Create and Train an IntegratedML Model using default settings\n",
 711 |     "IntegratedML only needs a model name, the name of the column that is the target column to predict, and a table (or SELECT query to specify input columns."
 712 |    ]
 713 |   },
 714 |   {
 715 |    "cell_type": "code",
 716 |    "execution_count": 9,
 717 |    "metadata": {},
 718 |    "outputs": [],
 719 |    "source": [
 720 |     "curs.execute(\"CREATE MODEL CampaignModel PREDICTING (RESPONSE) FROM %s\" % trainingTable)"
 721 |    ]
 722 |   },
 723 |   {
 724 |    "cell_type": "markdown",
 725 |    "metadata": {},
 726 |    "source": [
 727 |     "Now that the model is defined, you can TRAIN it, which invokes the AutoML machine learning procedure."
 728 |    ]
 729 |   },
 730 |   {
 731 |    "cell_type": "code",
 732 |    "execution_count": 10,
 733 |    "metadata": {},
 734 |    "outputs": [],
 735 |    "source": [
 736 |     "curs.execute(\"TRAIN MODEL CampaignModel\")"
 737 |    ]
 738 |   },
 739 |   {
 740 |    "cell_type": "markdown",
 741 |    "metadata": {},
 742 |    "source": [
 743 |     "Once that finishes, you can see some information about the model in the \"ML_TRAINED_MODELS\" table."
 744 |    ]
 745 |   },
 746 |   {
 747 |    "cell_type": "code",
 748 |    "execution_count": 11,
 749 |    "metadata": {},
 750 |    "outputs": [
 751 |     {
 752 |      "data": {
 753 |       "text/html": [
 754 |        "<div>\n",
 755 |        "<style scoped>\n",
 756 |        "    .dataframe tbody tr th:only-of-type {\n",
 757 |        "        vertical-align: middle;\n",
 758 |        "    }\n",
 759 |        "\n",
 760 |        "    .dataframe tbody tr th {\n",
 761 |        "        vertical-align: top;\n",
 762 |        "    }\n",
 763 |        "\n",
 764 |        "    .dataframe thead th {\n",
 765 |        "        text-align: right;\n",
 766 |        "    }\n",
 767 |        "</style>\n",
 768 |        "<table border=\"1\" class=\"dataframe\">\n",
 769 |        "  <thead>\n",
 770 |        "    <tr style=\"text-align: right;\">\n",
 771 |        "      <th></th>\n",
 772 |        "      <th>MODEL_NAME</th>\n",
 773 |        "      <th>TRAINED_MODEL_NAME</th>\n",
 774 |        "      <th>PROVIDER</th>\n",
 775 |        "      <th>TRAINED_TIMESTAMP</th>\n",
 776 |        "      <th>MODEL_TYPE</th>\n",
 777 |        "      <th>MODEL_INFO</th>\n",
 778 |        "    </tr>\n",
 779 |        "  </thead>\n",
 780 |        "  <tbody>\n",
 781 |        "    <tr>\n",
 782 |        "      <th>0</th>\n",
 783 |        "      <td>CampaignModel</td>\n",
 784 |        "      <td>CampaignModel_t1</td>\n",
 785 |        "      <td>AutoML</td>\n",
 786 |        "      <td>2022-04-22 08:01:09.859000</td>\n",
 787 |        "      <td>classification</td>\n",
 788 |        "      <td>ModelType:TensorFlow Neural Network, Package:T...</td>\n",
 789 |        "    </tr>\n",
 790 |        "    <tr>\n",
 791 |        "      <th>1</th>\n",
 792 |        "      <td>bc</td>\n",
 793 |        "      <td>bc_t1</td>\n",
 794 |        "      <td>AutoML</td>\n",
 795 |        "      <td>2022-04-22 07:54:38.592000</td>\n",
 796 |        "      <td>classification</td>\n",
 797 |        "      <td>ModelType:TensorFlow Neural Network, Package:T...</td>\n",
 798 |        "    </tr>\n",
 799 |        "  </tbody>\n",
 800 |        "</table>\n",
 801 |        "</div>"
 802 |       ],
 803 |       "text/plain": [
 804 |        "      MODEL_NAME TRAINED_MODEL_NAME PROVIDER           TRAINED_TIMESTAMP  \\\n",
 805 |        "0  CampaignModel   CampaignModel_t1   AutoML  2022-04-22 08:01:09.859000   \n",
 806 |        "1             bc              bc_t1   AutoML  2022-04-22 07:54:38.592000   \n",
 807 |        "\n",
 808 |        "       MODEL_TYPE                                         MODEL_INFO  \n",
 809 |        "0  classification  ModelType:TensorFlow Neural Network, Package:T...  \n",
 810 |        "1  classification  ModelType:TensorFlow Neural Network, Package:T...  "
 811 |       ]
 812 |      },
 813 |      "metadata": {},
 814 |      "output_type": "display_data"
 815 |     }
 816 |    ],
 817 |    "source": [
 818 |     "df3 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_TRAINED_MODELS\", conn)\n",
 819 |     "display(df3)"
 820 |    ]
 821 |   },
 822 |   {
 823 |    "cell_type": "markdown",
 824 |    "metadata": {},
 825 |    "source": [
 826 |     "### 7. Compare model output to data it has not seen yet\n",
 827 |     "Now you can use SQL to SELECT data from another table, run the IntegratedML model on this new data, and see how well the predictions match the data!"
 828 |    ]
 829 |   },
 830 |   {
 831 |    "cell_type": "code",
 832 |    "execution_count": 12,
 833 |    "metadata": {},
 834 |    "outputs": [
 835 |     {
 836 |      "data": {
 837 |       "text/html": [
 838 |        "<div>\n",
 839 |        "<style scoped>\n",
 840 |        "    .dataframe tbody tr th:only-of-type {\n",
 841 |        "        vertical-align: middle;\n",
 842 |        "    }\n",
 843 |        "\n",
 844 |        "    .dataframe tbody tr th {\n",
 845 |        "        vertical-align: top;\n",
 846 |        "    }\n",
 847 |        "\n",
 848 |        "    .dataframe thead th {\n",
 849 |        "        text-align: right;\n",
 850 |        "    }\n",
 851 |        "</style>\n",
 852 |        "<table border=\"1\" class=\"dataframe\">\n",
 853 |        "  <thead>\n",
 854 |        "    <tr style=\"text-align: right;\">\n",
 855 |        "      <th></th>\n",
 856 |        "      <th>PredictedResponse</th>\n",
 857 |        "      <th>ActualResponse</th>\n",
 858 |        "    </tr>\n",
 859 |        "  </thead>\n",
 860 |        "  <tbody>\n",
 861 |        "    <tr>\n",
 862 |        "      <th>0</th>\n",
 863 |        "      <td>0</td>\n",
 864 |        "      <td>0</td>\n",
 865 |        "    </tr>\n",
 866 |        "    <tr>\n",
 867 |        "      <th>1</th>\n",
 868 |        "      <td>0</td>\n",
 869 |        "      <td>0</td>\n",
 870 |        "    </tr>\n",
 871 |        "    <tr>\n",
 872 |        "      <th>2</th>\n",
 873 |        "      <td>0</td>\n",
 874 |        "      <td>1</td>\n",
 875 |        "    </tr>\n",
 876 |        "    <tr>\n",
 877 |        "      <th>3</th>\n",
 878 |        "      <td>0</td>\n",
 879 |        "      <td>0</td>\n",
 880 |        "    </tr>\n",
 881 |        "    <tr>\n",
 882 |        "      <th>4</th>\n",
 883 |        "      <td>0</td>\n",
 884 |        "      <td>1</td>\n",
 885 |        "    </tr>\n",
 886 |        "    <tr>\n",
 887 |        "      <th>...</th>\n",
 888 |        "      <td>...</td>\n",
 889 |        "      <td>...</td>\n",
 890 |        "    </tr>\n",
 891 |        "    <tr>\n",
 892 |        "      <th>1837</th>\n",
 893 |        "      <td>0</td>\n",
 894 |        "      <td>0</td>\n",
 895 |        "    </tr>\n",
 896 |        "    <tr>\n",
 897 |        "      <th>1838</th>\n",
 898 |        "      <td>0</td>\n",
 899 |        "      <td>0</td>\n",
 900 |        "    </tr>\n",
 901 |        "    <tr>\n",
 902 |        "      <th>1839</th>\n",
 903 |        "      <td>0</td>\n",
 904 |        "      <td>0</td>\n",
 905 |        "    </tr>\n",
 906 |        "    <tr>\n",
 907 |        "      <th>1840</th>\n",
 908 |        "      <td>0</td>\n",
 909 |        "      <td>0</td>\n",
 910 |        "    </tr>\n",
 911 |        "    <tr>\n",
 912 |        "      <th>1841</th>\n",
 913 |        "      <td>0</td>\n",
 914 |        "      <td>0</td>\n",
 915 |        "    </tr>\n",
 916 |        "  </tbody>\n",
 917 |        "</table>\n",
 918 |        "<p>1842 rows × 2 columns</p>\n",
 919 |        "</div>"
 920 |       ],
 921 |       "text/plain": [
 922 |        "      PredictedResponse  ActualResponse\n",
 923 |        "0                     0               0\n",
 924 |        "1                     0               0\n",
 925 |        "2                     0               1\n",
 926 |        "3                     0               0\n",
 927 |        "4                     0               1\n",
 928 |        "...                 ...             ...\n",
 929 |        "1837                  0               0\n",
 930 |        "1838                  0               0\n",
 931 |        "1839                  0               0\n",
 932 |        "1840                  0               0\n",
 933 |        "1841                  0               0\n",
 934 |        "\n",
 935 |        "[1842 rows x 2 columns]"
 936 |       ]
 937 |      },
 938 |      "metadata": {},
 939 |      "output_type": "display_data"
 940 |     }
 941 |    ],
 942 |    "source": [
 943 |     "df4 = pd.read_sql(\"SELECT PREDICT(CampaignModel) AS PredictedResponse, \\\n",
 944 |     "                  response AS ActualResponse FROM %s\" % predictTable, conn)\n",
 945 |     "display(df4)"
 946 |    ]
 947 |   },
 948 |   {
 949 |    "cell_type": "markdown",
 950 |    "metadata": {},
 951 |    "source": [
 952 |     "### 8. VALIDATE MODEL command calculates accuracy metrics\n",
 953 |     "You can certainly take that output above and calculate the accuracy using a standard formula, but IntegratedML has a built-in function to do that!\n",
 954 |     "\n",
 955 |     "Each time you run the command \"VALIDATE MODEL...\" it generates a set of metrics calculated on the data passed into the query. Since this table can be a bit difficult to read in its raw form we use a simple \"pivot\" call to arrange the data."
 956 |    ]
 957 |   },
 958 |   {
 959 |    "cell_type": "code",
 960 |    "execution_count": 13,
 961 |    "metadata": {},
 962 |    "outputs": [
 963 |     {
 964 |      "data": {
 965 |       "text/html": [
 966 |        "<div>\n",
 967 |        "<style scoped>\n",
 968 |        "    .dataframe tbody tr th:only-of-type {\n",
 969 |        "        vertical-align: middle;\n",
 970 |        "    }\n",
 971 |        "\n",
 972 |        "    .dataframe tbody tr th {\n",
 973 |        "        vertical-align: top;\n",
 974 |        "    }\n",
 975 |        "\n",
 976 |        "    .dataframe thead th {\n",
 977 |        "        text-align: right;\n",
 978 |        "    }\n",
 979 |        "</style>\n",
 980 |        "<table border=\"1\" class=\"dataframe\">\n",
 981 |        "  <thead>\n",
 982 |        "    <tr style=\"text-align: right;\">\n",
 983 |        "      <th>METRIC_NAME</th>\n",
 984 |        "      <th>Accuracy</th>\n",
 985 |        "      <th>F-Measure</th>\n",
 986 |        "      <th>Precision</th>\n",
 987 |        "      <th>Recall</th>\n",
 988 |        "    </tr>\n",
 989 |        "    <tr>\n",
 990 |        "      <th>VALIDATION_RUN_NAME</th>\n",
 991 |        "      <th></th>\n",
 992 |        "      <th></th>\n",
 993 |        "      <th></th>\n",
 994 |        "      <th></th>\n",
 995 |        "    </tr>\n",
 996 |        "  </thead>\n",
 997 |        "  <tbody>\n",
 998 |        "    <tr>\n",
 999 |        "      <th>CampaignModel_t1_v1</th>\n",
1000 |        "      <td>0.6</td>\n",
1001 |        "      <td>0.2</td>\n",
1002 |        "      <td>0.81</td>\n",
1003 |        "      <td>0.11</td>\n",
1004 |        "    </tr>\n",
1005 |        "  </tbody>\n",
1006 |        "</table>\n",
1007 |        "</div>"
1008 |       ],
1009 |       "text/plain": [
1010 |        "METRIC_NAME          Accuracy  F-Measure  Precision  Recall\n",
1011 |        "VALIDATION_RUN_NAME                                        \n",
1012 |        "CampaignModel_t1_v1       0.6        0.2       0.81    0.11"
1013 |       ]
1014 |      },
1015 |      "metadata": {},
1016 |      "output_type": "display_data"
1017 |     }
1018 |    ],
1019 |    "source": [
1020 |     "curs.execute(\"VALIDATE MODEL CampaignModel FROM %s\" % predictTable)\n",
1021 |     "df5 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_VALIDATION_METRICS\", conn)\n",
1022 |     "df6 = df5.pivot(index='VALIDATION_RUN_NAME', columns='METRIC_NAME', values='METRIC_VALUE')\n",
1023 |     "display(df6)"
1024 |    ]
1025 |   }
1026 |  ],
1027 |  "metadata": {
1028 |   "kernelspec": {
1029 |    "display_name": "Python 3",
1030 |    "language": "python",
1031 |    "name": "python3"
1032 |   },
1033 |   "language_info": {
1034 |    "codemirror_mode": {
1035 |     "name": "ipython",
1036 |     "version": 3
1037 |    },
1038 |    "file_extension": ".py",
1039 |    "mimetype": "text/x-python",
1040 |    "name": "python",
1041 |    "nbconvert_exporter": "python",
1042 |    "pygments_lexer": "ipython3",
1043 |    "version": "3.6.9"
1044 |   }
1045 |  },
1046 |  "nbformat": 4,
1047 |  "nbformat_minor": 2
1048 | }
1049 | 


--------------------------------------------------------------------------------
/jupyter-samples/intersystems-jdbc-3.1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/jupyter-samples/intersystems-jdbc-3.1.0.jar


--------------------------------------------------------------------------------
/jupyter-samples/libirisodbcu35.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/jupyter-samples/libirisodbcu35.so


--------------------------------------------------------------------------------
/jupyter-samples/odbc.ini:
--------------------------------------------------------------------------------
 1 | [user]
 2 | Driver=InterSystems ODBC35
 3 | Protocol=TCP
 4 | Host=irisimlsvr
 5 | Port=51773
 6 | Namespace=USER
 7 | UID=SUPERUSER
 8 | Password=SYS
 9 | Description=Sample namespace
10 | Query Timeout=0
11 | Static Cursors=0
12 | 
13 | 


--------------------------------------------------------------------------------
/jupyter-samples/odbcinst.ini:
--------------------------------------------------------------------------------
 1 | [InterSystems ODBC35]
 2 | UsageCount=1
 3 | Driver=/tf/libirisodbcu35.so
 4 | Setup=/tf/libirisodbcu35.so
 5 | SQLLevel=1
 6 | FileUsage=0
 7 | DriverODBCVer=02.10
 8 | ConnectFunctions=YYN
 9 | APILevel=1
10 | DEBUG=1
11 | CPTimeout=<not pooled>
12 | 
13 | 


--------------------------------------------------------------------------------
/tf2-jupyter-jdbc/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.2.0-jupyter
 2 | LABEL maintainer="Zhong Li <Zhong.Li@intersystems.com>"
 3 | 
 4 | RUN apt-get update && apt-get install -y default-jre-headless && apt-get clean && rm -rf /var/lib/apt/lists/*
 5 | 
 6 | COPY requirements.txt ./
 7 | 
 8 | RUN pip install --upgrade pip && \
 9 |     pip install --no-cache-dir -r requirements.txt
10 | 
11 | CMD /bin/bash -c "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root --NotebookApp.token=''"


--------------------------------------------------------------------------------
/tf2-jupyter-jdbc/requirements.txt:
--------------------------------------------------------------------------------
1 | jupyterthemes
2 | JayDeBeApi
3 | pandas
4 | scikit-learn
5 | 
6 | 


--------------------------------------------------------------------------------