├── .gitattributes
├── .github
└── workflows
│ └── iris-contest-workflows.yml
├── .gitignore
├── .vscode
├── launch.json
└── settings.json
├── LICENSE
├── README.md
├── docker-compose.yml
├── iris-aa-server
├── Dockerfile
├── README.md
├── data
│ ├── Campaign.csv
│ ├── HateSpeech.xml
│ ├── IrisDataset.xml
│ ├── Loader.xml
│ ├── LoanPerformance.xml
│ ├── NLPUtils.cls
│ ├── appointment-noshows.csv
│ ├── breast-cancer.csv
│ ├── hate-speech.tar
│ ├── loans.gof
│ ├── readmission.csv
│ └── titanic.csv
├── iris.script
└── src
│ └── Util
│ └── Loader.cls
├── jupyter-samples
├── ED_visit_90_day.ipynb
├── biomedical-integratedml-PyODBC.ipynb
├── campaign-integratedml-jdbc.ipynb
├── intersystems-jdbc-3.1.0.jar
├── libirisodbcu35.so
├── odbc.ini
├── odbcinst.ini
└── readmission-integratedml-jdbc.ipynb
└── tf2-jupyter-jdbc
├── Dockerfile
└── requirements.txt
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.cls linguist-language=ObjectScript
2 | *.mac linguist-language=ObjectScript
3 | *.int linguist-language=ObjectScript
4 | *.inc linguist-language=ObjectScript
5 | *.csp linguist-language=Html
6 |
7 | *.sh text eol=lf
8 | *.cls text eol=lf
9 | *.mac text eol=lf
10 | *.int text eol=lf
11 | *.inc text eol=lf
12 | Dockerfil* text eol=lf
13 |
--------------------------------------------------------------------------------
/.github/workflows/iris-contest-workflows.yml:
--------------------------------------------------------------------------------
1 | name: objectscriptquality
2 | on: push
3 |
4 | jobs:
5 | linux:
6 | name: Linux build
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - name: Execute ObjectScript Quality Analysis
11 | run: wget https://raw.githubusercontent.com/litesolutions/objectscriptquality-jenkins-integration/master/iris-community-hook.sh && sh ./iris-community-hook.sh
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "type": "objectscript",
6 | "request": "launch",
7 | "name": "ObjectScript Debug Class",
8 | "program": "##class(PackageSample.ObjectScript).Test()",
9 | },
10 | {
11 | "type": "objectscript",
12 | "request": "attach",
13 | "name": "ObjectScript Attach",
14 | "processId": "${command:PickProcess}",
15 | "system": true
16 | }
17 | ]
18 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.associations": {
3 |
4 | "iris.script": "objectscript",
5 | },
6 | "objectscript.conn" :{
7 | "ns": "USER",
8 | "username": "superuser",
9 | "active": true,
10 | "docker-compose": {
11 | "service": "irisimlsvr",
12 | "internalPort": 52773
13 | },
14 | "links": {
15 | "IRIS IntegratedML Jupyter": "http://localhost:8896/tree",
16 | "Webterminal": "http://localhost:8092/terminal/"
17 | }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 InterSystems Developer Community
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # integratedml-demo-template
2 | This is a template for IntegratedML - InterSystems Github repository.
3 |
4 | This repository comes with a few example Jupyter notebooks (http://jupyter.org) which demonstrate how to use IntegratedML in InterSystems IRIS Community Edition (Advanced Analytics including IntegratedML) in a docker container.
5 |
6 | ## Contents
7 | * [What is IntegratedML?](#what-is-integratedml)
8 | * [What's inside this template](#whats-inside-this-template)
9 | * [Pre-configured environment, and sample data](#pre-configured-environment-and-sample-data)
10 | * [Sample notebooks to get you started](#sample-notebooks-to-get-you-started)
11 | * [Demo environment topology](#demo-environment-topology)
12 | * [Prerequisites](#prerequisites)
13 | * [Tested environments](#tested-environments)
14 | * [Installation](#installation)
15 | * [How to develop your IntegragedML solution with the IntegratedML Template Repo](#how-to-develop-your-integragedml-solution-with-the-integratedml-template-repository)
16 | * [Use this template](#use-this-template)
17 | * [Checkout the repo](#checkout-the-repo)
18 | * [Start developing](#start-developing)
19 | * [How to Import data into InterSystems IRIS](#how-to-import-data-into-intersystems-iris)
20 | * [Importing data from CSV file](#importing-data-from-csv-file)
21 | * [Importing data from CSV URL](#importing-data-from-csv-url)
22 |
23 | ## What is IntegratedML?
24 | IntegratedML is a feature of the InterSystems IRIS data platform that brings machine learning to SQL developers.
25 |
26 |
27 |
28 |
29 | IntegratedML is
30 | - all-SQL -- Build and train machine learning models using intuitive custom SQL commands, fully integrated within the InterSystems IRIS SQL processor
31 | - turnkey -- no packages or programming languages to learn, nothing to install
32 | - modular -- leverages "best of breed" open source and proprietary AutoML frameworks
33 |
34 | Learn more about InterSystems IRIS and IntegratedML at the [InterSystems Learning site](https://learning.intersystems.com/course/view.php?name=Learn%20IntegratedML)
35 |
36 | ## What's inside this template
37 |
38 | ### Pre-configured environment, and sample data
39 | This template creates a docker environment (via "docker-compose up") of 2 pre-configured containers:
40 | 1. tf2jupyter: Jupyter+Tensorflow2.2(without GPU), with a few sample notebook files (in its Dockerfile)
41 | 2. irisimlsvr another one for an IRIS 2020.3 Community Edition, with pre-loaded sample data in USER namespace(see its [Dockerfile](iris-aa-server/Dockerfile) and [iris.script](iris-aa-server/iris.script) that is run at startup)
42 |
43 | ### Sample notebooks to get you started
44 | 4 sample notebook files -- by default this template starts Jupyter at http://localhost:8896/tree :
45 | - [campaign-integratedml-jdbc.ipynb](jupyter-samples/campaign-integratedml-jdbc.ipynb): A simple JDBC connection from tf2jupyter into a sample data table (Marketing Campaign data) within InterSystems IRIS's USER namespace, showing some use of IntegratedML including VALIDATE MODEL command usage.
46 | - [readmission-integratedml-jdbc.ipynb](jupyter-samples/readmission-integratedml-jdbc.ipynb): Demonstrates use of IntegratedML on a hospital readmission prediction dataset.
47 | - [biomedical-integratedml-PyODBC.ipynb](jupyter-samples/biomedical-integratedml-PyODBC.ipynb): Connection to InterSystems IRIS server over PyODBC, building and using an IntegratedML machine learning model, with a complex SQL query using the PREDICT() and PROBABILITY() IntegratedML SQL functions.
48 | - [ED_visit_90_day.ipynb](jupyter-samples/ED_visit_90_day.ipynb): Building and using an IntegratedML machine learning model to predict visits to Emergency Department, utilizing data from a Health Insight server, kindly provided by Joseph Cofone at Baystate Health. *NOTE: this notebook is not executable!*
49 |
50 | ## Demo environment topology
51 |
52 |
53 |
54 |
55 | ## Prerequisites
56 | Make sure you have [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) and [Docker desktop](https://www.docker.com/products/docker-desktop) installed.
57 |
58 | ## Tested environments
59 | This template is tested breifly on AWS Ubuntu, Mac OS, and Windows 10(using Docker Toolbox only). It should work on other Docker environment too - let us know if you encounter any issues.
60 |
61 | ## Installation
62 |
63 | Clone/git pull the repo into any local directory
64 |
65 | ```
66 | $ git clone https://github.com/intersystems-community/integratedml-demo-template.git
67 | ```
68 |
69 | Open a Docker terminal in this directory and run:
70 |
71 | ```
72 | $ docker-compose build
73 | ```
74 |
75 | 3. Run the IRIS container, and Jupyter notebook server images:
76 |
77 | ```
78 | $ docker-compose up -d
79 | ```
80 |
81 | 4. Open browser to access the notebooks
82 |
83 | ```
84 | http://localhost:8896/tree
85 | ```
86 | Note: use `docker-compose ps` to confirm tf2juyter's ports; make sure right localhost port is used if over SSL tunneling to remotehost)
87 |
88 | 5. Examine the test data with webterminal
89 | Open terminal with: SuperUser / SYS credentials
90 | ```
91 | http://localhost:8092/terminal/
92 | ```
93 | Enter **/sql** mode and make SQL queries to examine data in IRIS.
94 |
95 |
96 | # How to develop your IntegragedML solution with the IntegratedML Template Repository
97 | ## Use this template
98 | Click the button "Use this template" on Github to create a new repository which will be the copy of this one.
99 |
100 | ## Checkout the repo
101 | Clone your new repo to a local folder.
102 |
103 | ## Start developing
104 | Install [VSCode](https://code.visualstudio.com/), [Docker Desctop](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-docker) and [ObjectScript](https://marketplace.visualstudio.com/items?itemName=daimor.vscode-objectscript) plugin and open the folder in VSCode.
105 |
106 | Import your data as listed below, rebuild containers to let the data be imported, and use IntegratedML via SQL tools, as described in Jupyter notebooks.
107 |
108 |
109 | # How to Import data into InterSystems IRIS
110 | ## Importing data from CSV file
111 | 1. Add csv file into the repository, e.g. like [this titanic.csv](https://github.com/intersystems-community/integratedml-demo-template/blob/master/iris-aa-server/data/titanic.csv)
112 | 2. Introduce an import data call into your IRIS initalisation script.
113 | This is an [example line to import titanic.csv](https://github.com/intersystems-community/integratedml-demo-template/blob/0db187b7fd127ff5432b68617bca7cfdadaaed2b/iris-aa-server/iris.script#L13) into IRIS Titanic.Passenger class along with data.
114 | 3. Query the data from any SQL tool, web terminal or from InterSystems ObjectScript with:
115 | ```
116 | SELECT * From Titanic.Passenger
117 | ```
118 | ## Importing data from CSV URL
119 | If your file is accessible remotely, you can import it as follows:
120 | 1. Add the import CSV from URL line into [iris.script]().
121 | Here is an example line to [import countries.csv data from URL](https://github.com/intersystems-community/integratedml-demo-template/blob/7feaffef0a47c7c46cc683d89bdbaedbce48071c/iris-aa-server/iris.script#L17)
122 | 2. Rebuild the docker image (the easiest way is to rebuild via _docker-compose_ -- ```docker-compose build```). This will create User.Countries class and import data which you can query with SQL from Countries table:
123 | ```
124 | SELECT * FROM COUNTRIES
125 | ```
126 |
127 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 |
5 | irisimlsvr:
6 | build:
7 | context: ./iris-aa-server
8 | image: iris-aa-server:2021.2
9 | hostname: irisimlsvr
10 | restart: on-failure
11 | ports:
12 | - 8091:1972 # 1972 is the superserver default port
13 | - 8092:52773 # 52773 is the webserver/management portal port
14 | volumes:
15 | - ./iris-shared:/shared
16 | tf2jupyter: # tensorflow with jupyter
17 | build:
18 | context: ./tf2-jupyter-jdbc
19 | image: tf2-jupyter-jdbc:1.0.0-iml-template
20 | hostname: tf2jupyter
21 | restart: on-failure
22 | ports:
23 | - 8896:8888 # 8888 is the docker jupyter service port
24 | - 6026:6006 # 6006 is the tensorboard port
25 | volumes:
26 | - ./jupyter-samples:/tf #shared volumes
27 |
28 |
29 |
--------------------------------------------------------------------------------
/iris-aa-server/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG IMAGE=store/intersystems/iris-aa-community:2020.3.0AA.331.0
2 | ARG IMAGE=intersystemsdc/iris-aa-community:2020.3.0AA.331.0-zpm
3 | ARG IMAGE=intersystemsdc/iris-ml-community:2021.2.0.651.0-zpm
4 | ARG IMAGE=intersystemsdc/iris-ml-community
5 | FROM $IMAGE
6 | LABEL maintainer="Thomas Dyar "
7 |
8 | USER root
9 | RUN mkdir /data \
10 | && chown irisowner /data
11 | USER irisowner
12 |
13 | # copy files
14 | COPY data /data
15 | COPY src /data/src
16 | COPY iris.script /tmp/iris.script
17 |
18 | # special extract treatment for hate-speech dataset
19 | # RUN mkdir /data/hate-speech/ \
20 | # && tar -xf /data/hate-speech.tar -C /data/
21 |
22 | # load demo stuff
23 | RUN iris start IRIS \
24 | && iris session IRIS < /tmp/iris.script
25 |
26 | # RUN rm -r /data/*
27 |
--------------------------------------------------------------------------------
/iris-aa-server/README.md:
--------------------------------------------------------------------------------
1 | # IRIS Advanced Analytics with IntegratedML Demo
2 |
3 | This folder contains a few simple datasets to demonstrate InterSystems IRIS IntegratedML (previously known as QuickML). The enclosed Dockerfile can be used separately from the rest of the integratedml-demo-template, if you do not want to use the Jupyter Notebook interface.
4 |
5 | ## How to build
6 |
7 | The included Dockerfile will pull the IRIS Advanced Analytics Community Edition (with IntegratedML) container image from the InterSystems public Docker repository, and set up a few simple datasets.
8 |
9 | ```
10 | docker build --tag integratedml-demo .
11 | ```
12 |
13 | To start your container, use the following command (or your favourite equivalent, as this one will drop your container after stopping)
14 |
15 | ```
16 | docker run --rm -d -p 9091:51773 -p 9092:52773 --name integratedml integratedml-demo
17 | ```
18 |
19 | The IRIS password is initialized as SYS, but you can get in directly through the following command, the SMP or connecting through a SQL client such as [DBeaver](https://dbeaver.io/)
20 |
21 | ```
22 | docker exec -it integratedml iris sql IRIS
23 | ```
24 |
25 | ## How to demo
26 |
27 | Using IntegratedML takes only three simple commands:
28 |
29 | ```sql
30 | CREATE MODEL Flowers PREDICTING (Species) FROM DataMining.IrisDataset;
31 | TRAIN MODEL Flowers FROM DataMining.IrisDataset;
32 | SELECT TOP 20 PREDICT(Flowers) AS PredictedSpecies, Species AS ActualSpecies FROM DataMining.IrisDataset;
33 | ```
34 |
35 | Note that the semicolons at the end are for use in a multiline-style client such as DBeaver or SQuirreL and not part of regular IRIS SQL. See the [IntegratedML Syntax overview](https://usconfluence.iscinternal.com/display/TBD/IntegratedML+Syntax) if you want to be more creative. For example, you can add ```USING { "provider": "H2O" }``` to your CREATE or TRAIN commands to test the H2O provider instead of the default one.
36 |
37 | ### Included datasets
38 |
39 | These are broadly available datasets, but we may not have permission to _re_-distribute them, so keep this repo to yourself:
40 | - \[SQLUser.\]Campaign: as used in the campaign showcase in the [ML Toolkit](https://github.com/intersystems/MLToolkit). The target column to put your crosshairs on is RESPONSE
41 | - \[SQLUser.\]BreastCancer
42 |
--------------------------------------------------------------------------------
/iris-aa-server/data/HateSpeech.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | %Persistent
5 | 65407,55491.800962
6 | 65407,46597.061384
7 |
8 |
9 | %String
10 |
11 |
12 |
13 |
14 | %String
15 |
16 |
17 |
18 | %String
19 |
20 |
21 |
22 | %String
23 |
24 |
25 |
26 | %Integer
27 |
28 |
29 |
30 |
34 |
35 |
36 |
37 | %String
38 |
39 |
40 |
41 | %String
42 |
43 |
44 |
45 | 1
46 | pDir:%String="C:\Users\bdeboe\Documents\GitHub\hate-speech-dataset\"
47 | %Status
48 | WC")
62 | set tRow = ..%New()
63 | set tRow.FileName = $piece(tLine,",",1)_".txt"
64 | set tRow.UserId = $piece(tLine,",",2)
65 | set tRow.SubforumId = $piece(tLine,",",3)
66 | set tRow.ContextNeeded = $piece(tLine,",",4)
67 | set tRow.Label = $piece(tLine,",",5)
68 |
69 | set tCommentFile = ##class(%Stream.FileCharacter).%New()
70 | set tSC = tCommentFile.LinkToFile(pDir_"all_files"_tSeparator_tRow.FileName)
71 | quit:$$$ISERR(tSC)
72 | set tRow.Comment = tCommentFile.Read()
73 |
74 | if ##class(%File).Exists(pDir_"sampled_train"_tSeparator_tRow.FileName) {
75 | set tRow.Sample = "train"
76 | } elseif ##class(%File).Exists(pDir_"sampled_test"_tSeparator_tRow.FileName) {
77 | set tRow.Sample = "test"
78 | }
79 |
80 | set tSC = tRow.%Save()
81 | quit:$$$ISERR(tSC)
82 | }
83 |
84 | } catch (ex) {
85 | set tSC = ex.AsStatus()
86 | }
87 | quit tSC
88 | ]]>
89 |
90 |
91 |
92 | %Storage.Persistent
93 | ^NLP.HateSpeechD
94 | HateSpeechDefaultData
95 | ^NLP.HateSpeechD
96 | ^NLP.HateSpeechI
97 | ^NLP.HateSpeechS
98 |
99 |
100 | %%CLASSNAME
101 |
102 |
103 | Comment
104 |
105 |
106 | FileName
107 |
108 |
109 | UserId
110 |
111 |
112 | SubforumId
113 |
114 |
115 | ContextNeeded
116 |
117 |
118 | Label
119 |
120 |
121 | Sample
122 |
123 |
124 |
125 |
126 |
127 |
--------------------------------------------------------------------------------
/iris-aa-server/data/IrisDataset.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 | The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. Each record has 5 attributes:
8 |
9 | - sepal length in cm
10 |
- sepal width in cm
11 |
- petal length in cm
12 |
- petal width in cm
13 |
- class (species):
14 |
15 | - Iris Setosa
16 |
- Iris Versicolour
17 |
- Iris Virginica
18 |
19 |
20 |
21 | The dataset is taken from UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science by Frank, A. & Asuncion, A. (2010).]]>
22 | %Persistent
23 | 64926,50261.117
24 | 62312,41576.508001
25 |
26 |
27 | %Double
28 |
29 |
30 |
31 | %Double
32 |
33 |
34 |
35 | %Double
36 |
37 |
38 |
39 | %Double
40 |
41 |
42 |
43 | %String
44 |
45 |
46 |
47 | 1
48 | %Status
49 | "
71 |
72 | set count = $i(count)
73 |
74 | set flower = ..%New()
75 | set flower.SepalLength = $p(line,",",1)
76 | set flower.SepalWidth = $p(line,",",2)
77 | set flower.PetalLength = $p(line,",",3)
78 | set flower.PetalWidth = $p(line,",",4)
79 | set flower.Species = $p(line,",",5)
80 |
81 | set sc = flower.%Save()
82 | Quit:$$$ISERR(sc)
83 | }
84 |
85 | } Catch (ex) {
86 | set sc = ex.AsStatus()
87 | }
88 |
89 | if ($$$ISERR(sc)) {
90 | do $system.OBJ.DisplayError(sc)
91 | }
92 |
93 | Quit sc
94 | ]]>
95 |
96 |
97 |
98 |
100 |
252 |
253 | ]]>
254 |
255 |
256 |
257 | %Storage.Persistent
258 | ^DataMining.IrisDatasetD
259 | IrisDatasetDefaultData
260 | ^DataMining.IrisDatasetD
261 | ^DataMining.IrisDatasetI
262 | ^DataMining.IrisDatasetS
263 |
264 | listnode
265 |
266 |
267 | %%CLASSNAME
268 |
269 |
270 | SepalLength
271 |
272 |
273 | SepalWidth
274 |
275 |
276 | PetalLength
277 |
278 |
279 | PetalWidth
280 |
281 |
282 | Species
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 | DataMining.IrisDataset.]]>
292 | 1
293 | %DeepSee.PMML.Definition
294 | 65204,51678.214
295 | 62888,42919.529995
296 |
297 |
298 | http://www.intersystems.com/deepsee/pmml
299 |
301 |
302 |
303 | 03/11/2013 11:54:41
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 | SELECT ID, PetalLength, PetalWidth, SepalLength, SepalWidth, Species FROM DataMining.IrisDataset
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 | ]]>
444 |
445 |
446 |
447 |
--------------------------------------------------------------------------------
/iris-aa-server/data/Loader.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 65330,70445.358752
5 | 65330,67823.562348
6 |
7 |
8 | 1
9 | pFile:%String,pClass:%String,pSeparator:%String=","
10 | %Status
11 | WC")
25 | for i = 1:1:$l(tHeader,pSeparator) {
26 | set h = $p(tHeader,pSeparator,i)
27 | quit:h=""
28 | set tColumns($i(tColumns))=tProps($$$UPPER($tr(h,"_"))),
29 | tTypes(tColumns) = tPropTypes($$$UPPER($tr(h,"_")))
30 | }
31 |
32 | while 'tFile.AtEnd {
33 | set tLine = $zstrip(tFile.ReadLine(),"<>WC")
34 | continue:tLine=""
35 |
36 | set tObj = $classmethod(pClass,"%New")
37 | for i=1:1:tColumns {
38 | set tValue = $piece(tLine,pSeparator,i)
39 | set:tTypes(i)="%Library.Date" tValue = $zdateh($tr(tValue,"/","-"),3)
40 | set $property(tObj, tColumns(i)) = tValue
41 | }
42 | set tSC = tObj.%Save()
43 | quit:$$$ISERR(tSC)
44 | set c = $i(c)
45 | }
46 | quit:$$$ISERR(tSC)
47 |
48 | write !,"Read ",c," records for table ",pClass
49 |
50 | } catch (ex) {
51 | set tSC = ex.AsStatus()
52 | }
53 | quit tSC
54 | ]]>
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/iris-aa-server/data/LoanPerformance.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | %Persistent
5 | 65329,33578.231722
6 | 65268,55574.517277
7 |
8 |
9 | %Integer
10 |
11 |
12 |
13 | %Integer
14 |
15 |
16 |
17 | %Double
18 |
19 |
20 |
21 | %Integer
22 |
23 |
24 |
25 | %Integer
26 |
27 |
28 |
29 | %Integer
30 |
31 |
32 |
33 | %Integer
34 |
35 |
36 |
37 | %Date
38 |
39 |
40 |
41 | %String
42 |
43 |
44 |
45 | %Date
46 |
47 |
48 |
49 | %Integer
50 |
51 |
52 |
53 | %Integer
54 |
55 |
56 |
57 | %Boolean
58 |
59 |
60 |
61 | %Boolean
62 |
63 |
64 |
65 | %Boolean
66 |
67 |
68 |
69 | %Boolean
70 |
71 |
72 |
73 | %Boolean
74 |
75 |
76 |
77 | %Boolean
78 |
79 |
80 |
81 | %Integer
82 |
83 |
84 |
85 | %String
86 |
87 |
88 |
89 |
90 | %Integer
91 |
92 |
93 |
94 | %Integer
95 |
96 |
97 |
98 | %Integer
99 |
100 |
101 |
102 | %Integer
103 |
104 |
105 |
106 | %Integer
107 |
108 |
109 |
110 | %Integer
111 |
112 |
113 |
114 | %Integer
115 |
116 |
117 |
118 | %Integer
119 |
120 |
121 |
122 | %Integer
123 |
124 |
125 |
126 | %Integer
127 |
128 |
129 |
130 | %Integer
131 |
132 |
133 |
134 | %Integer
135 |
136 |
137 |
138 | %Integer
139 |
140 |
141 |
142 | %Integer
143 |
144 |
145 |
146 | %Integer
147 |
148 |
149 |
150 | %Integer
151 |
152 |
153 |
154 | %Integer
155 |
156 |
157 |
158 | %Integer
159 |
160 |
161 |
162 | %Integer
163 |
164 |
165 |
166 | %Boolean
167 |
168 |
169 |
170 | bitmap
171 | LoanDefault
172 |
173 |
174 |
175 | AssetCost
176 |
177 |
178 |
179 | 1
180 | csv:%String
181 |
194 |
195 |
196 |
197 | 1
198 | line:%String
199 |
245 |
246 |
247 |
248 | 1
249 | str
250 | %Integer
251 |
257 |
258 |
259 |
260 | 1
261 | str
262 | %Date
263 |
270 |
271 |
272 |
273 | %Storage.Persistent
274 | ^User.LoanPerformanceD
275 | LoanPerformanceDefaultData
276 | ^User.LoanPerformanceD
277 | ^User.LoanPerformanceI
278 | ^User.LoanPerformanceS
279 | sequence
280 |
281 |
282 | %%CLASSNAME
283 |
284 |
285 | DisbursedAmount
286 |
287 |
288 | AssetCost
289 |
290 |
291 | LTV
292 |
293 |
294 | BranchId
295 |
296 |
297 | SupplierId
298 |
299 |
300 | ManufacturerId
301 |
302 |
303 | CurrentPincodeId
304 |
305 |
306 | DateOfBirth
307 |
308 |
309 | EmploymentType
310 |
311 |
312 | DisbursalDate
313 |
314 |
315 | StateId
316 |
317 |
318 | EmployeeCodeId
319 |
320 |
321 | MobileNumberAvailable
322 |
323 |
324 | Aadhar
325 |
326 |
327 | PAN
328 |
329 |
330 | Voter
331 |
332 |
333 | Driving
334 |
335 |
336 | Passport
337 |
338 |
339 | CNSScore
340 |
341 |
342 | PrimaryAccounts
343 |
344 |
345 | PrimaryAccountsActive
346 |
347 |
348 | PrimaryAccountsOverdue
349 |
350 |
351 | PrimaryCurrentBalance
352 |
353 |
354 | PrimarySanctionedAmount
355 |
356 |
357 | PrimaryDisbursedAmount
358 |
359 |
360 | SecondaryAccounts
361 |
362 |
363 | SecondaryAccoutsActive
364 |
365 |
366 | SecondaryAccountsOverdue
367 |
368 |
369 | SecondaryCurrentBalance
370 |
371 |
372 | SecondarySanctionedAmount
373 |
374 |
375 | SecondaryDisbursedAmount
376 |
377 |
378 | PrimaryInstallmentAmount
379 |
380 |
381 | SecondaryInstallmentAmount
382 |
383 |
384 | NewAccountsLastSixMonths
385 |
386 |
387 | DelinquentAccountsLastSixMonths
388 |
389 |
390 | AverageAccountAgeMonths
391 |
392 |
393 | CreditHistoryLengthMonths
394 |
395 |
396 | Inquiries
397 |
398 |
399 | LoanDefault
400 |
401 |
402 | CNSScoreDescription
403 |
404 |
405 | SecondaryAccountsActive
406 |
407 |
408 |
409 |
410 |
411 |
--------------------------------------------------------------------------------
/iris-aa-server/data/NLPUtils.cls:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | %IKInclude
5 | 65407,36216.674515
6 |
7 |
8 | 1
9 | pViewName:%String
10 | %Status
11 | Drop_NLP
12 | 1
13 |
33 |
34 |
35 |
36 | 1
37 |
38 | %Status
39 | Build_NLP
40 | 1
41 | [](){}/\|.;,:=?"),
155 | tPropNames(tEntUniId)=tPropName,
156 | tEntUniIds = tEntUniIds_","_tEntUniId
157 |
158 | if $d(tPropIndex(tPropName)) {
159 | // duplicate! - ignore for now, we'll just sum up frequencies
160 | } else {
161 | do AddColumn(tTableClass,tPropName,"Frequency for '"_tEntValue_"' (ID:"_tEntUniId_")")
162 | }
163 | set tPropIndex(tPropName,tEntUniId)=""
164 | }
165 | set tSC = tTableClass.%Save()
166 | quit:$$$ISERR(tSC)
167 |
168 | set tSC = $system.OBJ.Compile(tTableClass.Name,"ck"_$s(pVerbose:"d",1:"-d"))
169 | quit:$$$ISERR(tSC)
170 |
171 | write:pVerbose !,"Populating feature table"
172 | set tSC = tStatement.%Prepare("SELECT EntUniId, FrequencyAsConcept+FrequencyAsRelation Frequency FROM %iKnow_Objects.EntityInSourceDetails WHERE DomainId = "_tDomainId_" AND SourceId = ? AND EntUniId IN ("_$e(tEntUniIds,2,*)_")")
173 | quit:$$$ISERR(tSC)
174 | set tRS1 = ##class(%SQL.Statement).%ExecDirect(,"SELECT SourceId, LocalReference FROM %iKnow_Objects.Source where domainid = "_tDomainId)
175 | while tRS1.%Next() {
176 | set tRow = $classmethod(tTableClass.Name,"%New")
177 | set tRow.NLPID = tRS1.%Get("LocalReference")
178 | set tRS2 = tStatement.%Execute(tRS1.%Get("SourceId"))
179 | while tRS2.%Next() {
180 | set $property(tRow, tPropNames(tRS2.%Get("EntUniId"))) = $property(tRow, tPropNames(tRS2.%Get("EntUniId"))) + tRS2.%Get("Frequency")
181 | }
182 | set tSC = tRow.%Save()
183 | quit:$$$ISERR(tSC)
184 | }
185 | quit:$$$ISERR(tSC)
186 |
187 | write:pVerbose !
188 |
189 | } catch (ex) {
190 | set tSC = ex.AsStatus()
191 | }
192 | if $$$ISERR(tSC) && pVerbose {
193 | write !
194 | do $system.OBJ.DisplayError(tSC)
195 | write !
196 | }
197 | quit tSC
198 |
199 | AddColumn(cls, name, desc, type="%Integer")
200 | set prop = ##class(%Dictionary.PropertyDefinition).%New()
201 | set prop.parent = cls
202 | if '$zname(name,6) {
203 | set prop.SqlFieldName = name
204 | set prop.Name = $tr(name,"_- ""'+$=<>[](){}/\|.;,:=?")
205 | } else {
206 | set prop.Name = name
207 | }
208 | set prop.Description = desc
209 | set prop.Type = type
210 | quit
211 |
212 | AddParam(cls, name, value)
213 | set param = ##class(%Dictionary.ParameterDefinition).%New()
214 | set param.parent = cls
215 | set param.Name = name
216 | set param.Default = value
217 | ]]>
218 |
219 |
220 |
221 |
--------------------------------------------------------------------------------
/iris-aa-server/data/hate-speech.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/iris-aa-server/data/hate-speech.tar
--------------------------------------------------------------------------------
/iris-aa-server/data/loans.gof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/iris-aa-server/data/loans.gof
--------------------------------------------------------------------------------
/iris-aa-server/iris.script:
--------------------------------------------------------------------------------
1 | zn "%SYS"
2 | w ##class(Security.Users).UnExpireUserPasswords("*")
3 |
4 | zn "USER"
5 | zpm "install sslclient"
6 | zpm "install csvgen"
7 | zpm "install webterminal"
8 |
9 | // Load all the class definitions from the data directory
10 | do $system.OBJ.LoadDir("/data","cuk",,1)
11 |
12 | // Show how csvgen can load data from csv files locally or remotely
13 | do ##class(community.csvgen).Generate("/data/titanic.csv",",","Titanic.Passenger")
14 | do ##class(community.csvgen).GenerateFromURL("https://raw.githubusercontent.com/datasciencedojo/datasets/master/WorldDBTables/CountryTable.csv",",","SQLUser.Countries")
15 | do ##class(community.csvgen).Generate("/data/readmission.csv",",","Patient.Readmission")
16 | do ##class(community.csvgen).Generate("/data/Campaign.csv",";","Marketing.Campaign")
17 | do ##class(community.csvgen).Generate("/data/breast-cancer.csv",",","Biomedical.BreastCancer")
18 |
19 | // Load globals and build indices for the LoanPerformance table
20 | do $system.OBJ.Load("/data/loans.gof")
21 | do ##class(User.LoanPerformance).%BuildIndices()
22 |
23 | // do $system.OBJ.Load("/data/Loader.xml","cf")
24 |
25 |
26 | halt
27 |
--------------------------------------------------------------------------------
/iris-aa-server/src/Util/Loader.cls:
--------------------------------------------------------------------------------
1 | Class Util.Loader
2 | {
3 |
4 | ClassMethod Load(pFile As %String, pClass As %String, pSeparator As %String = ",") As %Status
5 | {
6 | set tSC = $$$OK
7 | try {
8 | do $classmethod(pClass,"%KillExtent")
9 | set tClassDef = ##class(%Dictionary.ClassDefinition).%OpenId(pClass)
10 | for i=1:1:tClassDef.Properties.Count() {
11 | set tProp = tClassDef.Properties.GetAt(i)
12 | set tProps($$$UPPER(tProp.Name)) = tProp.Name
13 | set tPropTypes($$$UPPER(tProp.Name)) = tProp.Type
14 | }
15 |
16 | set tFile = ##class(%Stream.FileCharacter).%New()
17 | do tFile.LinkToFile(pFile)
18 | set tHeader = $zstrip(tFile.ReadLine(),"<>WC")
19 | for i = 1:1:$l(tHeader,pSeparator) {
20 | set h = $p(tHeader,pSeparator,i)
21 | quit:h=""
22 | set tColumns($i(tColumns))=tProps($$$UPPER($tr(h,"_"))),
23 | tTypes(tColumns) = tPropTypes($$$UPPER($tr(h,"_")))
24 | }
25 |
26 | while 'tFile.AtEnd {
27 | set tLine = $zstrip(tFile.ReadLine(),"<>WC")
28 | continue:tLine=""
29 |
30 | set tObj = $classmethod(pClass,"%New")
31 | for i=1:1:tColumns {
32 | set tValue = $piece(tLine,pSeparator,i)
33 | set:tTypes(i)="%Library.Date" tValue = $zdateh($tr(tValue,"/","-"),3)
34 | set $property(tObj, tColumns(i)) = tValue
35 | }
36 | set tSC = tObj.%Save()
37 | quit:$$$ISERR(tSC)
38 | set c = $i(c)
39 | }
40 | quit:$$$ISERR(tSC)
41 |
42 | write !,"Read ",c," records for table ",pClass,!
43 |
44 | } catch (ex) {
45 | set tSC = ex.AsStatus()
46 | }
47 | quit tSC
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/jupyter-samples/ED_visit_90_day.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Train an IntegratedML model on ED Readmit likelihood Dataset\n",
8 | "## Use JDBC to connect to InterSystems IRIS database\n",
9 | "**NOTE: This Notebook will not run as-is!**\n",
10 | "This Notebook demonstrates:\n",
11 | "- Using the JayDeBeApi Python library to connect to InterSystems IRIS\n",
12 | "- Creating views to segment data into training and test sets\n",
13 | "- Defining and training an IntegratedML model to predict ED Readmits in the next 90 days\n",
14 | "- Comparing the resulting model's predictions to data in the test set (that the model was not trained on)\n",
15 | "- Using the IntegratedML \"VALIDATE MODEL\" command to calculate accuracy metrics on the test set data"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "### 1. Get jdbc connection and cursor"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | <<<<<<< HEAD
28 | "execution_count": null,
29 | =======
30 | "execution_count": 1,
31 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "import pandas as pd"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | <<<<<<< HEAD
41 | "execution_count": null,
42 | =======
43 | "execution_count": 2,
44 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "%run -i '../Initializations/Conns.py'"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "### 2. Create and specify the source data table(s)"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | <<<<<<< HEAD
61 | "execution_count": null,
62 | =======
63 | "execution_count": 3,
64 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "#Use this block to create a starting data set that you can/will build upon.\n",
69 | "#NOTE: It is always useful to have a unique identifier in the data\n",
70 | "TargetTable = 'Data.EDEncsPredB90View'\n",
71 | "TrainTable = 'Data.EDEncsTraining'\n",
72 | "TestTable = 'Data.EDEncsTesting'"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "### 3. Review the data to ensure the Target variable and Independent variables are in good standing."
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | <<<<<<< HEAD
85 | "execution_count": null,
86 | "metadata": {
87 | "scrolled": true
88 | },
89 | "outputs": [],
90 | =======
91 | "execution_count": 4,
92 | "metadata": {
93 | "scrolled": true
94 | },
95 | "outputs": [
96 | {
97 | "name": "stdout",
98 | "output_type": "stream",
99 | "text": [
100 | " ID DRID HPS Age Gend Mar InsRel PlanCode T30 T60 ... S180 S365 \\\n",
101 | "0 1 1 4.0 52.0 F M S L05 0.0 0.0 ... 53.0 87.0 \n",
102 | "1 2 2 2.0 48.0 F M 1 13947 0.0 0.0 ... 0.0 1.0 \n",
103 | "2 3 3 1.0 66.0 F M S W01 NaN NaN ... NaN NaN \n",
104 | "3 4 4 NaN 62.0 M D S S20 NaN NaN ... NaN NaN \n",
105 | "4 5 5 NaN 51.0 M S S W01 0.0 1.0 ... NaN NaN \n",
106 | "\n",
107 | " Gen dGen Med dMed Appt dAppt Rx dRx \n",
108 | "0 9.0 7.0 9.0 3.0 0.0 0.0 2.0 2.0 \n",
109 | "1 NaN NaN NaN NaN NaN NaN NaN NaN \n",
110 | "2 NaN NaN NaN NaN NaN NaN NaN NaN \n",
111 | "3 NaN NaN NaN NaN NaN NaN NaN NaN \n",
112 | "4 NaN NaN NaN NaN NaN NaN NaN NaN \n",
113 | "\n",
114 | "[5 rows x 75 columns]\n",
115 | "Index(['ID', 'DRID', 'HPS', 'Age', 'Gend', 'Mar', 'InsRel', 'PlanCode', 'T30',\n",
116 | " 'T60', 'T90', 'E30', 'E90', 'E180', 'E365', 'I180', 'I365', 'O30',\n",
117 | " 'O90', 'O180', 'O365', 'ObsHt', 'dObsHt', 'ObsWt', 'dObsWt', 'ObsBMI',\n",
118 | " 'dObsBMI', 'ObsBSA', 'dObsBSA', 'ObsTemp', 'dObsTemp', 'ObsPulse',\n",
119 | " 'dObsPulse', 'ObsBPS', 'dObsBPS', 'ObsBPD', 'dObsBPD', 'ObsO2',\n",
120 | " 'dObsO2', 'Labs', 'dLabs', 'Meds', 'dMeds', 'Rads', 'dRads', 'Vax',\n",
121 | " 'dVax', 'HypChol_E78', 'dHypChol_E78', 'TII_E11', 'dTII_E11',\n",
122 | " 'GenEnc_Z00', 'dGenEnc_Z00', 'Scrn_Z13', 'dScrn_Z13', 'Couns_Z71',\n",
123 | " 'dCouns_Z71', 'OWt_E66', 'dOWt_E66', 'HypThy_E03', 'dHypThy_E03',\n",
124 | " 'Scrn_Z12', 'dScrn_Z12', 'S30', 'S90', 'S180', 'S365', 'Gen', 'dGen',\n",
125 | " 'Med', 'dMed', 'Appt', 'dAppt', 'Rx', 'dRx'],\n",
126 | " dtype='object')\n"
127 | ]
128 | }
129 | ],
130 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
131 | "source": [
132 | "tKeep()\n",
133 | "import pandas as pd\n",
134 | "from IPython.display import display\n",
135 | "\n",
136 | <<<<<<< HEAD
137 | "df = pd.read_sql(\"select top 3 * from Data.PatientCostData\", iconn)\n",
138 | =======
139 | "df = pd.read_sql(\"select top 5 * from Data.PatientCostData\", iconn)\n",
140 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
141 | "print(df)\n",
142 | "print(df.columns)"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "### Drop and unwanted fields"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | <<<<<<< HEAD
155 | "execution_count": null,
156 | "metadata": {},
157 | "outputs": [],
158 | =======
159 | "execution_count": 5,
160 | "metadata": {},
161 | "outputs": [
162 | {
163 | "data": {
164 | "text/plain": [
165 | "'ID,DRID,HPS,Age,Gend,Mar,InsRel,PlanCode,E30,E90,E180,E365,I180,I365,O30,O90,O180,O365,ObsHt,dObsHt,ObsWt,dObsWt,ObsBMI,dObsBMI,ObsBSA,dObsBSA,ObsTemp,dObsTemp,ObsPulse,dObsPulse,ObsBPS,dObsBPS,ObsBPD,dObsBPD,ObsO2,dObsO2,Labs,dLabs,Meds,dMeds,Rads,dRads,Vax,dVax,HypChol_E78,dHypChol_E78,TII_E11,dTII_E11,GenEnc_Z00,dGenEnc_Z00,Scrn_Z13,dScrn_Z13,Couns_Z71,dCouns_Z71,OWt_E66,dOWt_E66,HypThy_E03,dHypThy_E03,Scrn_Z12,dScrn_Z12,S30,S90,S180,S365,Gen,dGen,Med,dMed,Appt,dAppt,Rx,dRx'"
166 | ]
167 | },
168 | "execution_count": 5,
169 | "metadata": {},
170 | "output_type": "execute_result"
171 | }
172 | ],
173 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
174 | "source": [
175 | "df = df.drop(['T30', 'T60', 'T90'], axis = 1)\n",
176 | "Usable = str(list(df.columns)).replace(\"', '\", \",\")[2:-2]\n",
177 | "Usable"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | <<<<<<< HEAD
183 | "execution_count": null,
184 | "metadata": {},
185 | "outputs": [],
186 | =======
187 | "execution_count": 6,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "data": {
192 | "text/html": [
193 | "\n",
194 | "\n",
207 | "
\n",
208 | " \n",
209 | " \n",
210 | " | \n",
211 | " Recs | \n",
212 | "
\n",
213 | " \n",
214 | " \n",
215 | " \n",
216 | " 0 | \n",
217 | " 126633 | \n",
218 | "
\n",
219 | " \n",
220 | "
\n",
221 | "
"
222 | ],
223 | "text/plain": [
224 | " Recs\n",
225 | "0 126633"
226 | ]
227 | },
228 | "metadata": {},
229 | "output_type": "display_data"
230 | }
231 | ],
232 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
233 | "source": [
234 | "icurs.execute(' \\\n",
235 | " create or replace view %s as \\\n",
236 | " select case when t90 > 0 then 1 else 0 end as B90, %s \\\n",
237 | " from Data.PatientCostData' % (TargetTable, Usable))\n",
238 | "df1 = pd.read_sql('SELECT COUNT(*) as Recs FROM %s' % TargetTable, iconn)\n",
239 | "TargetVar = 'B90'\n",
240 | "display(df1)"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | <<<<<<< HEAD
246 | "execution_count": null,
247 | "metadata": {},
248 | "outputs": [],
249 | =======
250 | "execution_count": 7,
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "data": {
255 | "text/html": [
256 | "\n",
257 | "\n",
270 | "
\n",
271 | " \n",
272 | " \n",
273 | " | \n",
274 | " B90 | \n",
275 | " Recs | \n",
276 | "
\n",
277 | " \n",
278 | " \n",
279 | " \n",
280 | " 0 | \n",
281 | " 0 | \n",
282 | " 120181 | \n",
283 | "
\n",
284 | " \n",
285 | " 1 | \n",
286 | " 1 | \n",
287 | " 6452 | \n",
288 | "
\n",
289 | " \n",
290 | "
\n",
291 | "
"
292 | ],
293 | "text/plain": [
294 | " B90 Recs\n",
295 | "0 0 120181\n",
296 | "1 1 6452"
297 | ]
298 | },
299 | "execution_count": 7,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
305 | "source": [
306 | "Distro = pd.read_sql('select %s, count(*) as Recs from %s group by %s' % (TargetVar, TargetTable, TargetVar), iconn)\n",
307 | "Distro"
308 | ]
309 | },
310 | {
311 | "cell_type": "markdown",
312 | "metadata": {},
313 | "source": [
314 | "### 4. Assess the probability of your target and sample accordingly into split training and testing datasets"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | <<<<<<< HEAD
320 | "execution_count": null,
321 | =======
322 | "execution_count": 8,
323 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
324 | "metadata": {},
325 | "outputs": [],
326 | "source": [
327 | "#we want to split the data into Training (80%) and Test (20%), ...\n",
328 | "# but also reduce the ratio of Negative (ED Enc = 0) to Positive\n",
329 | "Train = 0.8\n",
330 | "TVRatio = 2\n",
331 | "PT_List = pd.read_sql('select DRID, %s from %s order by %s, DRID' % (TargetVar, TargetTable, TargetVar), iconn)\n",
332 | "PT_List.index = PT_List['DRID']"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | <<<<<<< HEAD
338 | "execution_count": null,
339 | "metadata": {
340 | "scrolled": true
341 | },
342 | "outputs": [],
343 | =======
344 | "execution_count": 9,
345 | "metadata": {
346 | "scrolled": true
347 | },
348 | "outputs": [
349 | {
350 | "name": "stdout",
351 | "output_type": "stream",
352 | "text": [
353 | " DRID\n",
354 | "B90 \n",
355 | "0 10323\n",
356 | "1 5161\n",
357 | " DRID\n",
358 | "B90 \n",
359 | "0 27787\n",
360 | "1 1291\n"
361 | ]
362 | }
363 | ],
364 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
365 | "source": [
366 | "#Create the lists, check the ratios, and create the \"In lists\":\n",
367 | "TrainList = PT_List[PT_List[TargetVar] == 0].sample(int(Distro['Recs'].loc[1]*TVRatio*Train)) \\\n",
368 | " .append(PT_List[PT_List[TargetVar] == 1].sample(int(Distro['Recs'].loc[1]*Train)))\n",
369 | "TrainList['Flag'] = 1\n",
370 | "TrainList.index = TrainList['DRID']\n",
371 | "print(TrainList.pivot_table(index = TargetVar, values = 'DRID', aggfunc = 'count'))\n",
372 | "#NOTE: It is IMPERATIVE that Test does NOT contain any Train data\n",
373 | "TestList = PT_List.join(TrainList['Flag'], how = 'left')\n",
374 | "TestList = TestList[(TestList['Flag'] != 1)]\n",
375 | "TestList = TestList[(TestList[TargetVar] == 1)].append(TestList[TestList[TargetVar] == 0].sample(int(len(TestList)*0.25)))\n",
376 | "print(TestList.pivot_table(index = TargetVar, values = 'DRID', aggfunc = 'count'))\n",
377 | "TrainIns = str(list(TrainList['DRID']))[1:-1]\n",
378 | "TestIns = str(list(TestList['DRID']))[1:-1]"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | <<<<<<< HEAD
384 | "execution_count": null,
385 | =======
386 | "execution_count": 10,
387 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
388 | "metadata": {},
389 | "outputs": [],
390 | "source": [
391 | "# Training set view\n",
392 | "icurs.execute(''' \\\n",
393 | " CREATE or replace VIEW %s AS \\\n",
394 | " SELECT * FROM %s \n",
395 | " WHERE DRID in (%s)''' \\\n",
396 | " % (TrainTable, TargetTable, TrainIns))\n",
397 | "# Prediction set\n",
398 | "icurs.execute(''' \\\n",
399 | " CREATE or replace VIEW %s AS \\\n",
400 | " SELECT * FROM %s \n",
401 | " WHERE DRID in (%s)''' \\\n",
402 | " % (TestTable, TargetTable, TestIns))"
403 | ]
404 | },
405 | {
406 | "cell_type": "markdown",
407 | "metadata": {},
408 | "source": [
409 | "### 6. Create and Train an IntegratedML Model using default settings\n",
410 | "IntegratedML only needs a model name, the name of the column that is the target column to predict, and a table (or SELECT query to specify input columns."
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | <<<<<<< HEAD
416 | "execution_count": null,
417 | =======
418 | "execution_count": 11,
419 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
420 | "metadata": {},
421 | "outputs": [],
422 | "source": [
423 | "try:\n",
424 | " icurs.execute(\"CREATE MODEL NewEncModel PREDICTING (%s) FROM %s\" % (TargetVar, TrainTable))\n",
425 | "except:\n",
426 | " icurs.execute(\"DROP MODEL NewEncModel\")\n",
427 | " icurs.execute(\"CREATE MODEL NewEncModel PREDICTING (%s) FROM %s\" % (TargetVar, TrainTable))"
428 | ]
429 | },
430 | {
431 | "cell_type": "markdown",
432 | "metadata": {},
433 | "source": [
434 | "Now that the model is defined, you can TRAIN it, which invokes the AutoML machine learning procedure."
435 | ]
436 | },
437 | {
438 | "cell_type": "code",
439 | <<<<<<< HEAD
440 | "execution_count": null,
441 | "metadata": {},
442 | "outputs": [],
443 | =======
444 | "execution_count": 15,
445 | "metadata": {},
446 | "outputs": [
447 | {
448 | "ename": "DatabaseError",
449 | "evalue": "java.sql.SQLException: [SQLCODE: <-400>:]\r\n[Location: ]\r\n[%msg: ]",
450 | "output_type": "error",
451 | "traceback": [
452 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
453 | "\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
454 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.execute\u001b[0;34m()\u001b[0m\n",
455 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.Update\u001b[0;34m()\u001b[0m\n",
456 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
457 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
458 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
459 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.processError\u001b[0;34m()\u001b[0m\n",
460 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getServerError\u001b[0;34m()\u001b[0m\n",
461 | "\u001b[0;31mException\u001b[0m: Java Exception",
462 | "\nThe above exception was the direct cause of the following exception:\n",
463 | "\u001b[0;31mjava.sql.SQLException\u001b[0m Traceback (most recent call last)",
464 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 535\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
465 | "\u001b[0;31mjava.sql.SQLException\u001b[0m: java.sql.SQLException: [SQLCODE: <-400>:]\r\n[Location: ]\r\n[%msg: ]",
466 | "\nDuring handling of the above exception, another exception occurred:\n",
467 | "\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)",
468 | "\u001b[0;32m~/Initializations/Conns.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m#icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_Auto\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0micurs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"set ml configuration DRCfg\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0micurs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"TRAIN MODEL NewEncModel as NewEncModel_DR\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
469 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 536\u001b[0;31m \u001b[0m_handle_sql_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 537\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_rs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetResultSet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
470 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36m_handle_sql_exception_jpype\u001b[0;34m()\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0mexc_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mInterfaceError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m \u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 166\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_jdbc_connect_jpype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjclassname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdriver_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjars\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
471 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
472 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_stmt_parms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m \u001b[0mis_rs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 535\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0m_handle_sql_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
473 | "\u001b[0;31mDatabaseError\u001b[0m: java.sql.SQLException: [SQLCODE: <-400>:]\r\n[Location: ]\r\n[%msg: ]"
474 | ]
475 | }
476 | ],
477 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
478 | "source": [
479 | "icurs.execute(\"set ml configuration %AutoML\")\n",
480 | "icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_Auto\")\n",
481 | "icurs.execute(\"set ml configuration DRCfg\")\n",
482 | "icurs.execute(\"TRAIN MODEL NewEncModel as NewEncModel_DR\")"
483 | ]
484 | },
485 | {
486 | "cell_type": "markdown",
487 | "metadata": {},
488 | "source": [
489 | "Once that finishes, you can see some information about the model in the \"ML_TRAINED_MODELS\" table."
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | <<<<<<< HEAD
495 | "execution_count": null,
496 | "metadata": {},
497 | "outputs": [],
498 | =======
499 | "execution_count": 16,
500 | "metadata": {},
501 | "outputs": [
502 | {
503 | "data": {
504 | "text/html": [
505 | "\n",
506 | "\n",
519 | "
\n",
520 | " \n",
521 | " \n",
522 | " | \n",
523 | " MODEL_NAME | \n",
524 | " TRAINED_MODEL_NAME | \n",
525 | " PROVIDER | \n",
526 | " TRAINED_TIMESTAMP | \n",
527 | " MODEL_TYPE | \n",
528 | " MODEL_INFO | \n",
529 | "
\n",
530 | " \n",
531 | " \n",
532 | " \n",
533 | " 0 | \n",
534 | " NoShowModel | \n",
535 | " NoShowModel_t2 | \n",
536 | " AutoML | \n",
537 | " 2020-10-26 15:57:18.731000 | \n",
538 | " classification | \n",
539 | " ModelType:Random Forest, Package:sklearn, Prob... | \n",
540 | "
\n",
541 | " \n",
542 | " 1 | \n",
543 | " NewEncModelTD | \n",
544 | " NewEncModelAutoML | \n",
545 | " AutoML | \n",
546 | " 2020-10-27 01:46:39.459000 | \n",
547 | " classification | \n",
548 | " ModelType:Logistic Regression, Package:sklearn... | \n",
549 | "
\n",
550 | " \n",
551 | "
\n",
552 | "
"
553 | ],
554 | "text/plain": [
555 | " MODEL_NAME TRAINED_MODEL_NAME PROVIDER TRAINED_TIMESTAMP \\\n",
556 | "0 NoShowModel NoShowModel_t2 AutoML 2020-10-26 15:57:18.731000 \n",
557 | "1 NewEncModelTD NewEncModelAutoML AutoML 2020-10-27 01:46:39.459000 \n",
558 | "\n",
559 | " MODEL_TYPE MODEL_INFO \n",
560 | "0 classification ModelType:Random Forest, Package:sklearn, Prob... \n",
561 | "1 classification ModelType:Logistic Regression, Package:sklearn... "
562 | ]
563 | },
564 | "execution_count": 16,
565 | "metadata": {},
566 | "output_type": "execute_result"
567 | }
568 | ],
569 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
570 | "source": [
571 | "pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_TRAINED_MODELS\", iconn)"
572 | ]
573 | },
574 | {
575 | "cell_type": "markdown",
576 | "metadata": {},
577 | "source": [
578 | "### 7. Compare model output to data it has not seen yet\n",
579 | "Now you can use SQL to SELECT data from another table, run the IntegratedML model on this new data, and see how well the predictions match the data!"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | <<<<<<< HEAD
585 | "execution_count": null,
586 | "metadata": {},
587 | "outputs": [],
588 | "source": [
589 | "TestSet = pd.read_sql('''\n",
590 | " SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs,\n",
591 | " case when B90 = 1 then 1 end AS ActualPos,\n",
592 | " case when B90 != 1 then 0 end AS ActualNeg\n",
593 | " FROM %s''' % (TestTable), iconn)"
594 | =======
595 | "execution_count": 14,
596 | "metadata": {},
597 | "outputs": [
598 | {
599 | "ename": "DatabaseError",
600 | "evalue": "Execution failed on sql 'SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, case when B90 = 1 then 1 end AS ActualPos, case when B90 != 1 then 0 end AS ActualNeg FROM Data.EDEncsTesting': java.sql.SQLException: [SQLCODE: <-181>:]\r\n[Location: ]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]",
601 | "output_type": "error",
602 | "traceback": [
603 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
604 | "\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
605 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.prepareStatement\u001b[0;34m()\u001b[0m\n",
606 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.prepareStatement\u001b[0;34m()\u001b[0m\n",
607 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getOrCreatePossiblyShardedIRISPreparedStatement\u001b[0;34m()\u001b[0m\n",
608 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.\u001b[0;34m()\u001b[0m\n",
609 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.prepare\u001b[0;34m()\u001b[0m\n",
610 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISPreparedStatement.prepareInternal\u001b[0;34m()\u001b[0m\n",
611 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
612 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
613 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.InStream.readMessage\u001b[0;34m()\u001b[0m\n",
614 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.processError\u001b[0;34m()\u001b[0m\n",
615 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/_jpype.cpython-36m-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mcom.intersystems.jdbc.IRISConnection.getServerError\u001b[0;34m()\u001b[0m\n",
616 | "\u001b[0;31mException\u001b[0m: Java Exception",
617 | "\nThe above exception was the direct cause of the following exception:\n",
618 | "\u001b[0;31mjava.sql.SQLException\u001b[0m Traceback (most recent call last)",
619 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1680\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1681\u001b[0;31m \u001b[0mcur\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1682\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcur\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
620 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/jaydebeapi/__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_close_last\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprepareStatement\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moperation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 532\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_stmt_parms\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
621 | "\u001b[0;31mjava.sql.SQLException\u001b[0m: java.sql.SQLException: [SQLCODE: <-181>:]\r\n[Location: ]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]",
622 | "\nThe above exception was the direct cause of the following exception:\n",
623 | "\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)",
624 | "\u001b[0;32m~/Initializations/Conns.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mcase\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mB90\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mthen\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mend\u001b[0m \u001b[0mAS\u001b[0m \u001b[0mActualPos\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mcase\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mB90\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0mthen\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0mend\u001b[0m \u001b[0mAS\u001b[0m \u001b[0mActualNeg\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m FROM %s\" % (TestTable), iconn)\n\u001b[0m",
625 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_sql\u001b[0;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mcoerce_float\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoerce_float\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparse_dates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunksize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 490\u001b[0m )\n\u001b[1;32m 491\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
626 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_query\u001b[0;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize)\u001b[0m\n\u001b[1;32m 1725\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1726\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_convert_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1727\u001b[0;31m \u001b[0mcursor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1728\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mcol_desc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol_desc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescription\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
627 | "\u001b[0;32m/usr/local/lib64/python3.6/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1691\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1692\u001b[0m \u001b[0mex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDatabaseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Execution failed on sql '{args[0]}': {exc}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1693\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mex\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1694\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1695\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
628 | "\u001b[0;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, case when B90 = 1 then 1 end AS ActualPos, case when B90 != 1 then 0 end AS ActualNeg FROM Data.EDEncsTesting': java.sql.SQLException: [SQLCODE: <-181>:]\r\n[Location: ]\r\n[%msg: < Model 'NewEncModel_Auto' not found>]"
629 | ]
630 | }
631 | ],
632 | "source": [
633 | "TestSet = pd.read_sql(\"SELECT PREDICT(NewEncModel use NewEncModel_Auto) AS PredictedEncs, \\\n",
634 | " case when B90 = 1 then 1 end AS ActualPos, \\\n",
635 | " case when B90 != 1 then 0 end AS ActualNeg \\\n",
636 | " FROM %s\" % (TestTable), iconn)"
637 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
638 | ]
639 | },
640 | {
641 | "cell_type": "code",
642 | "execution_count": null,
643 | "metadata": {},
644 | "outputs": [],
645 | "source": [
646 | "print(pd.pivot_table(TestSet, index = 'PredictedEncs', values = ['ActualPos', 'ActualNeg'], aggfunc = 'count'))\n",
647 | "print('Accuracy: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == TestSet['ActualPos']) \\\n",
648 | " | (TestSet['PredictedEncs'] == TestSet['ActualNeg'])])/len(TestSet))*100))+'%')\n",
649 | "print('Misclassification Rate: '+str(round((len(TestSet[(TestSet['PredictedEncs'] != TestSet['ActualPos']) \\\n",
650 | " & (TestSet['PredictedEncs'] != TestSet['ActualNeg'])])/len(TestSet))*100))+'%')\n",
651 | "print('%FP: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == 1) & (TestSet['ActualNeg'] == 0)])/ \\\n",
652 | " len(TestSet[TestSet['ActualNeg'] == 0]))*100))+'%')\n",
653 | "print('%FN: '+str(round((len(TestSet[(TestSet['PredictedEncs'] == 0) & (TestSet['ActualPos'] == 1)])/ \\\n",
654 | " len(TestSet[TestSet['ActualPos'] == 1]))*100))+'%')"
655 | ]
656 | },
657 | {
658 | "cell_type": "markdown",
659 | "metadata": {},
660 | "source": [
661 | "### 8. VALIDATE MODEL command calculates accuracy metrics\n",
662 | "You can certainly take that output above and calculate the accuracy using a standard formula, but IntegratedML has a built-in function to do that!\n",
663 | "\n",
664 | "Each time you run the command \"VALIDATE MODEL...\" it generates a set of metrics calculated on the data passed into the query. Since this table can be a bit difficult to read in its raw form we use a simple \"pivot\" call to arrange the data."
665 | ]
666 | },
667 | {
668 | "cell_type": "code",
669 | "execution_count": null,
670 | "metadata": {},
671 | "outputs": [],
672 | "source": [
673 | <<<<<<< HEAD
674 | "icurs.execute(\"VALIDATE model NewEncModel use NewEncModel_Auto FROM Data.EDEncsTesting\")\n",
675 | =======
676 | "#icurs.execute(\"VALIDATE model NewEncModel FROM Data.EDEncsTesting\")\n",
677 | >>>>>>> 730f3d3b116a644bb0466c140bb03f80ba73e6c9
678 | "#df5 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_VALIDATION_METRICS\", iconn)\n",
679 | "#df5\n",
680 | "#df6 = df5.pivot(index='VALIDATION_RUN_NAME', columns='METRIC_NAME', values='METRIC_VALUE')\n",
681 | "#display(df6)"
682 | ]
683 | }
684 | ],
685 | "metadata": {
686 | "kernelspec": {
687 | "display_name": "Python 3",
688 | "language": "python",
689 | "name": "python3"
690 | },
691 | "language_info": {
692 | "codemirror_mode": {
693 | "name": "ipython",
694 | "version": 3
695 | },
696 | "file_extension": ".py",
697 | "mimetype": "text/x-python",
698 | "name": "python",
699 | "nbconvert_exporter": "python",
700 | "pygments_lexer": "ipython3",
701 | "version": "3.6.8"
702 | }
703 | },
704 | "nbformat": 4,
705 | "nbformat_minor": 4
706 | }
707 |
--------------------------------------------------------------------------------
/jupyter-samples/biomedical-integratedml-PyODBC.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# IntegratedML applied to biomedical data, using PyODBC\n",
8 | "This notebook demonstrates the following:\n",
9 | "- Connecting to InterSystems IRIS via PyODBC connection\n",
10 | "- Creating, Training and Executing (PREDICT() function) an IntegratedML machine learning model, applied to breast cancer tumor diagnoses\n",
11 | "- INSERTING machine learning predictions into a new SQL table\n",
12 | "- Executing a relatively complex SQL query containing IntegratedML PREDICT() and PROBABILITY() functions, and flexibly using the results to filter and sort the output"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "### ODBC and pyODBC Resources\n",
20 | "Often, connecting to a database is more than half the battle when developing SQL-heavy applications, especially if you are not familiar with the tools, or more importantly the particular database system. If this is the case, and you are just getting started using PyODBC and InterSystems IRIS, this notebook and these resources below may help you get up to speed!\n",
21 | "\n",
22 | "https://gettingstarted.intersystems.com/development-setup/odbc-connections/\n",
23 | "\n",
24 | "https://irisdocs.intersystems.com/irislatest/csp/docbook/DocBook.UI.Page.cls?KEY=BNETODBC_support#BNETODBC_support_pyodbc\n",
25 | "\n",
26 | "https://stackoverflow.com/questions/46405777/connect-docker-python-to-sql-server-with-pyodbc\n",
27 | "\n",
28 | "https://stackoverflow.com/questions/44527452/cant-open-lib-odbc-driver-13-for-sql-server-sym-linking-issue"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "data": {
38 | "text/html": [
39 | ""
40 | ],
41 | "text/plain": [
42 | ""
43 | ]
44 | },
45 | "metadata": {},
46 | "output_type": "display_data"
47 | }
48 | ],
49 | "source": [
50 | "# make the notebook full screen\n",
51 | "from IPython.core.display import display, HTML\n",
52 | "display(HTML(\"\"))"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "### 1. Install system packages for ODBC"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 2,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "name": "stdout",
69 | "output_type": "stream",
70 | "text": [
71 | "Hit:1 http://archive.ubuntu.com/ubuntu bionic InRelease\n",
72 | "Get:2 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n",
73 | "Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB] \n",
74 | "Get:4 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB] \n",
75 | "Fetched 252 kB in 1s (322 kB/s) \n",
76 | "Reading package lists... Done\n",
77 | "Reading package lists... Done\n",
78 | "Building dependency tree \n",
79 | "Reading state information... Done\n",
80 | "gcc is already the newest version (4:7.4.0-1ubuntu2.3).\n",
81 | "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n",
82 | "Reading package lists... Done\n",
83 | "Building dependency tree \n",
84 | "Reading state information... Done\n",
85 | "unixodbc-dev is already the newest version (2.3.4-1.1ubuntu3).\n",
86 | "tdsodbc is already the newest version (1.00.82-2ubuntu0.1).\n",
87 | "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n",
88 | "Reading package lists... Done\n",
89 | "Building dependency tree \n",
90 | "Reading state information... Done\n",
91 | "unixodbc-bin is already the newest version (2.3.0-4build1).\n",
92 | "0 upgraded, 0 newly installed, 0 to remove and 103 not upgraded.\n"
93 | ]
94 | }
95 | ],
96 | "source": [
97 | "!apt-get update\n",
98 | "!apt-get install gcc\n",
99 | "!apt-get install -y tdsodbc unixodbc-dev\n",
100 | "!apt install unixodbc-bin -y\n",
101 | "!apt-get clean "
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "#### Use this command to troubleshoot a failed pyodbc installation:\n",
109 | "!pip install --upgrade --global-option=build_ext --global-option=\"-I/usr/local/include\" --global-option=\"-L/usr/local/lib\" pyodbc"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 3,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "name": "stdout",
119 | "output_type": "stream",
120 | "text": [
121 | "Requirement already satisfied: pyodbc in /usr/local/lib/python3.6/dist-packages (4.0.32)\n",
122 | "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n"
123 | ]
124 | }
125 | ],
126 | "source": [
127 | "!pip install pyodbc"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 4,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "!rm /etc/odbcinst.ini\n",
137 | "!rm /etc/odbc.ini"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 5,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "!ln -s /tf/odbcinst.ini /etc/odbcinst.ini\n",
147 | "!ln -s /tf/odbc.ini /etc/odbc.ini"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 6,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "name": "stdout",
157 | "output_type": "stream",
158 | "text": [
159 | "[InterSystems ODBC35]\r\n",
160 | "UsageCount=1\r\n",
161 | "Driver=/tf/libirisodbcu35.so\r\n",
162 | "Setup=/tf/libirisodbcu35.so\r\n",
163 | "SQLLevel=1\r\n",
164 | "FileUsage=0\r\n",
165 | "DriverODBCVer=02.10\r\n",
166 | "ConnectFunctions=YYN\r\n",
167 | "APILevel=1\r\n",
168 | "DEBUG=1\r\n",
169 | "CPTimeout=\r\n",
170 | "\r\n"
171 | ]
172 | }
173 | ],
174 | "source": [
175 | "!cat /tf/odbcinst.ini"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 7,
181 | "metadata": {},
182 | "outputs": [
183 | {
184 | "name": "stdout",
185 | "output_type": "stream",
186 | "text": [
187 | "[user]\r\n",
188 | "Driver=InterSystems ODBC35\r\n",
189 | "Protocol=TCP\r\n",
190 | "Host=irisimlsvr\r\n",
191 | "Port=51773\r\n",
192 | "Namespace=USER\r\n",
193 | "UID=SUPERUSER\r\n",
194 | "Password=SYS\r\n",
195 | "Description=Sample namespace\r\n",
196 | "Query Timeout=0\r\n",
197 | "Static Cursors=0\r\n",
198 | "\r\n"
199 | ]
200 | }
201 | ],
202 | "source": [
203 | "!cat /tf/odbc.ini"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 8,
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "name": "stdout",
213 | "output_type": "stream",
214 | "text": [
215 | "unixODBC 2.3.4\r\n",
216 | "DRIVERS............: /etc/odbcinst.ini\r\n",
217 | "SYSTEM DATA SOURCES: /etc/odbc.ini\r\n",
218 | "FILE DATA SOURCES..: /etc/ODBCDataSources\r\n",
219 | "USER DATA SOURCES..: /root/.odbc.ini\r\n",
220 | "SQLULEN Size.......: 8\r\n",
221 | "SQLLEN Size........: 8\r\n",
222 | "SQLSETPOSIROW Size.: 8\r\n"
223 | ]
224 | }
225 | ],
226 | "source": [
227 | "!odbcinst -j"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "### 2. Verify you see \"InterSystems ODBC35\" in the drivers list"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 9,
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "name": "stdout",
244 | "output_type": "stream",
245 | "text": [
246 | "['InterSystems ODBC35']\n"
247 | ]
248 | }
249 | ],
250 | "source": [
251 | "import pyodbc\n",
252 | "print(pyodbc.drivers())"
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "### 3. Get an ODBC connection "
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 10,
265 | "metadata": {},
266 | "outputs": [],
267 | "source": [
268 | "import pyodbc \n",
269 | "import time\n",
270 | "\n",
271 | "\n",
272 | "#input(\"Hit any key to start\")\n",
273 | "dsn = 'IRIS IntegratedML demo via PyODBC'\n",
274 | "server = 'irisimlsvr' #'192.168.99.101' \n",
275 | "port = '1972' #'9091'\n",
276 | "database = 'USER' \n",
277 | "username = 'SUPERUSER' \n",
278 | "password = 'SYS' \n",
279 | "cnxn = pyodbc.connect('DRIVER={InterSystems ODBC35};SERVER='+server+';PORT='+port+';DATABASE='+database+';UID='+username+';PWD='+ password)\n",
280 | "\n",
281 | "### Ensure it read strings correctly.\n",
282 | "cnxn.setdecoding(pyodbc.SQL_CHAR, encoding='utf8')\n",
283 | "cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf8')\n",
284 | "cnxn.setencoding(encoding='utf8')"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "### 4. Get a cursor; start the timer"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 11,
297 | "metadata": {},
298 | "outputs": [],
299 | "source": [
300 | "cursor = cnxn.cursor()\n",
301 | "start= time.clock()"
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "### 5. Specify the training data, and give a model name"
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": 12,
314 | "metadata": {},
315 | "outputs": [],
316 | "source": [
317 | "dataTable = 'Biomedical.BreastCancer'\n",
318 | "dataTablePredict = 'Result02'\n",
319 | "dataColumn = 'Diagnosis'\n",
320 | "dataColumnPredict = \"PredictedDiagnosis\"\n",
321 | "modelName = \"bc\" #chose a name - must be unique in server end"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | " ### Cleaning before retrying"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 13,
334 | "metadata": {},
335 | "outputs": [],
336 | "source": [
337 | "#If we re-run the notebook just drop model and table\n",
338 | "#cursor.execute(\"DROP MODEL %s\" % modelName)\n",
339 | "#cursor.execute(\"DROP TABLE %s\" % dataTablePredict)"
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {},
345 | "source": [
346 | "### 6. Train and predict"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 14,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "cursor.execute(\"CREATE MODEL %s PREDICTING (%s) FROM %s\" % (modelName, dataColumn, dataTable))\n",
356 | "cursor.execute(\"TRAIN MODEL %s FROM %s\" % (modelName, dataTable))\n",
357 | "cursor.execute(\"Create Table %s (%s VARCHAR(100), %s VARCHAR(100))\" % (dataTablePredict, dataColumnPredict, dataColumn))\n",
358 | "cursor.execute(\"INSERT INTO %s SELECT TOP 20 PREDICT(%s) AS %s, %s FROM %s\" % (dataTablePredict, modelName, dataColumnPredict, dataColumn, dataTable)) \n",
359 | "cnxn.commit()"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {},
365 | "source": [
366 | "### 7. Show the predict result"
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": 15,
372 | "metadata": {},
373 | "outputs": [
374 | {
375 | "data": {
376 | "text/html": [
377 | "\n",
378 | "\n",
391 | "
\n",
392 | " \n",
393 | " \n",
394 | " | \n",
395 | " PredictedDiagnosis | \n",
396 | " Diagnosis | \n",
397 | "
\n",
398 | " \n",
399 | " \n",
400 | " \n",
401 | " 0 | \n",
402 | " M | \n",
403 | " M | \n",
404 | "
\n",
405 | " \n",
406 | " 1 | \n",
407 | " M | \n",
408 | " M | \n",
409 | "
\n",
410 | " \n",
411 | " 2 | \n",
412 | " M | \n",
413 | " M | \n",
414 | "
\n",
415 | " \n",
416 | " 3 | \n",
417 | " M | \n",
418 | " M | \n",
419 | "
\n",
420 | " \n",
421 | " 4 | \n",
422 | " M | \n",
423 | " M | \n",
424 | "
\n",
425 | " \n",
426 | " 5 | \n",
427 | " M | \n",
428 | " M | \n",
429 | "
\n",
430 | " \n",
431 | " 6 | \n",
432 | " M | \n",
433 | " M | \n",
434 | "
\n",
435 | " \n",
436 | " 7 | \n",
437 | " M | \n",
438 | " M | \n",
439 | "
\n",
440 | " \n",
441 | " 8 | \n",
442 | " M | \n",
443 | " M | \n",
444 | "
\n",
445 | " \n",
446 | " 9 | \n",
447 | " M | \n",
448 | " M | \n",
449 | "
\n",
450 | " \n",
451 | " 10 | \n",
452 | " M | \n",
453 | " M | \n",
454 | "
\n",
455 | " \n",
456 | " 11 | \n",
457 | " M | \n",
458 | " M | \n",
459 | "
\n",
460 | " \n",
461 | " 12 | \n",
462 | " M | \n",
463 | " M | \n",
464 | "
\n",
465 | " \n",
466 | " 13 | \n",
467 | " M | \n",
468 | " M | \n",
469 | "
\n",
470 | " \n",
471 | " 14 | \n",
472 | " M | \n",
473 | " M | \n",
474 | "
\n",
475 | " \n",
476 | " 15 | \n",
477 | " M | \n",
478 | " M | \n",
479 | "
\n",
480 | " \n",
481 | " 16 | \n",
482 | " M | \n",
483 | " M | \n",
484 | "
\n",
485 | " \n",
486 | " 17 | \n",
487 | " M | \n",
488 | " M | \n",
489 | "
\n",
490 | " \n",
491 | " 18 | \n",
492 | " M | \n",
493 | " M | \n",
494 | "
\n",
495 | " \n",
496 | " 19 | \n",
497 | " B | \n",
498 | " B | \n",
499 | "
\n",
500 | " \n",
501 | "
\n",
502 | "
"
503 | ],
504 | "text/plain": [
505 | " PredictedDiagnosis Diagnosis\n",
506 | "0 M M\n",
507 | "1 M M\n",
508 | "2 M M\n",
509 | "3 M M\n",
510 | "4 M M\n",
511 | "5 M M\n",
512 | "6 M M\n",
513 | "7 M M\n",
514 | "8 M M\n",
515 | "9 M M\n",
516 | "10 M M\n",
517 | "11 M M\n",
518 | "12 M M\n",
519 | "13 M M\n",
520 | "14 M M\n",
521 | "15 M M\n",
522 | "16 M M\n",
523 | "17 M M\n",
524 | "18 M M\n",
525 | "19 B B"
526 | ]
527 | },
528 | "metadata": {},
529 | "output_type": "display_data"
530 | }
531 | ],
532 | "source": [
533 | "import pandas as pd\n",
534 | "from IPython.display import display\n",
535 | "\n",
536 | "df1 = pd.read_sql(\"SELECT * from %s ORDER BY ID\" % dataTablePredict, cnxn)\n",
537 | "display(df1)"
538 | ]
539 | },
540 | {
541 | "cell_type": "markdown",
542 | "metadata": {},
543 | "source": [
544 | "### 8. Show a complicated query\n",
545 | "IntegratedML function PREDICT() and PROBABILITY() can appear virtually anywhere in a SQL query, for maximal flexibility!\n",
546 | "Below we are SELECTing columns as well as the result of the PROBABILITY function, and then filtering on the result of the PREDICT function. To top it off, ORDER BY is using the output of PROBSBILITY for sorting."
547 | ]
548 | },
549 | {
550 | "cell_type": "code",
551 | "execution_count": 16,
552 | "metadata": {},
553 | "outputs": [
554 | {
555 | "data": {
556 | "text/html": [
557 | "\n",
558 | "\n",
571 | "
\n",
572 | " \n",
573 | " \n",
574 | " | \n",
575 | " ID | \n",
576 | " Probability | \n",
577 | " diagnosis | \n",
578 | "
\n",
579 | " \n",
580 | " \n",
581 | " \n",
582 | " 0 | \n",
583 | " 74 | \n",
584 | " 0.508227 | \n",
585 | " M | \n",
586 | "
\n",
587 | " \n",
588 | " 1 | \n",
589 | " 298 | \n",
590 | " 0.675269 | \n",
591 | " M | \n",
592 | "
\n",
593 | " \n",
594 | " 2 | \n",
595 | " 216 | \n",
596 | " 0.863261 | \n",
597 | " M | \n",
598 | "
\n",
599 | " \n",
600 | " 3 | \n",
601 | " 42 | \n",
602 | " 0.955022 | \n",
603 | " M | \n",
604 | "
\n",
605 | " \n",
606 | " 4 | \n",
607 | " 147 | \n",
608 | " 0.961170 | \n",
609 | " M | \n",
610 | "
\n",
611 | " \n",
612 | " 5 | \n",
613 | " 101 | \n",
614 | " 0.994392 | \n",
615 | " M | \n",
616 | "
\n",
617 | " \n",
618 | " 6 | \n",
619 | " 45 | \n",
620 | " 0.995220 | \n",
621 | " M | \n",
622 | "
\n",
623 | " \n",
624 | " 7 | \n",
625 | " 6 | \n",
626 | " 0.995779 | \n",
627 | " M | \n",
628 | "
\n",
629 | " \n",
630 | " 8 | \n",
631 | " 40 | \n",
632 | " 0.996360 | \n",
633 | " M | \n",
634 | "
\n",
635 | " \n",
636 | " 9 | \n",
637 | " 194 | \n",
638 | " 0.998938 | \n",
639 | " M | \n",
640 | "
\n",
641 | " \n",
642 | " 10 | \n",
643 | " 8 | \n",
644 | " 0.999320 | \n",
645 | " M | \n",
646 | "
\n",
647 | " \n",
648 | " 11 | \n",
649 | " 127 | \n",
650 | " 0.999456 | \n",
651 | " M | \n",
652 | "
\n",
653 | " \n",
654 | " 12 | \n",
655 | " 502 | \n",
656 | " 0.999470 | \n",
657 | " M | \n",
658 | "
\n",
659 | " \n",
660 | " 13 | \n",
661 | " 172 | \n",
662 | " 0.999546 | \n",
663 | " M | \n",
664 | "
\n",
665 | " \n",
666 | " 14 | \n",
667 | " 15 | \n",
668 | " 0.999659 | \n",
669 | " M | \n",
670 | "
\n",
671 | " \n",
672 | " 15 | \n",
673 | " 44 | \n",
674 | " 0.999668 | \n",
675 | " M | \n",
676 | "
\n",
677 | " \n",
678 | " 16 | \n",
679 | " 436 | \n",
680 | " 0.999765 | \n",
681 | " M | \n",
682 | "
\n",
683 | " \n",
684 | " 17 | \n",
685 | " 106 | \n",
686 | " 0.999839 | \n",
687 | " M | \n",
688 | "
\n",
689 | " \n",
690 | " 18 | \n",
691 | " 48 | \n",
692 | " 0.999894 | \n",
693 | " M | \n",
694 | "
\n",
695 | " \n",
696 | " 19 | \n",
697 | " 230 | \n",
698 | " 0.999894 | \n",
699 | " M | \n",
700 | "
\n",
701 | " \n",
702 | " 20 | \n",
703 | " 9 | \n",
704 | " 0.999991 | \n",
705 | " M | \n",
706 | "
\n",
707 | " \n",
708 | " 21 | \n",
709 | " 513 | \n",
710 | " 0.999994 | \n",
711 | " M | \n",
712 | "
\n",
713 | " \n",
714 | " 22 | \n",
715 | " 197 | \n",
716 | " 0.999995 | \n",
717 | " M | \n",
718 | "
\n",
719 | " \n",
720 | " 23 | \n",
721 | " 65 | \n",
722 | " 0.999995 | \n",
723 | " M | \n",
724 | "
\n",
725 | " \n",
726 | " 24 | \n",
727 | " 32 | \n",
728 | " 0.999999 | \n",
729 | " M | \n",
730 | "
\n",
731 | " \n",
732 | " 25 | \n",
733 | " 4 | \n",
734 | " 1.000000 | \n",
735 | " M | \n",
736 | "
\n",
737 | " \n",
738 | " 26 | \n",
739 | " 380 | \n",
740 | " 1.000000 | \n",
741 | " M | \n",
742 | "
\n",
743 | " \n",
744 | " 27 | \n",
745 | " 10 | \n",
746 | " 1.000000 | \n",
747 | " M | \n",
748 | "
\n",
749 | " \n",
750 | " 28 | \n",
751 | " 204 | \n",
752 | " 1.000000 | \n",
753 | " M | \n",
754 | "
\n",
755 | " \n",
756 | "
\n",
757 | "
"
758 | ],
759 | "text/plain": [
760 | " ID Probability diagnosis\n",
761 | "0 74 0.508227 M\n",
762 | "1 298 0.675269 M\n",
763 | "2 216 0.863261 M\n",
764 | "3 42 0.955022 M\n",
765 | "4 147 0.961170 M\n",
766 | "5 101 0.994392 M\n",
767 | "6 45 0.995220 M\n",
768 | "7 6 0.995779 M\n",
769 | "8 40 0.996360 M\n",
770 | "9 194 0.998938 M\n",
771 | "10 8 0.999320 M\n",
772 | "11 127 0.999456 M\n",
773 | "12 502 0.999470 M\n",
774 | "13 172 0.999546 M\n",
775 | "14 15 0.999659 M\n",
776 | "15 44 0.999668 M\n",
777 | "16 436 0.999765 M\n",
778 | "17 106 0.999839 M\n",
779 | "18 48 0.999894 M\n",
780 | "19 230 0.999894 M\n",
781 | "20 9 0.999991 M\n",
782 | "21 513 0.999994 M\n",
783 | "22 197 0.999995 M\n",
784 | "23 65 0.999995 M\n",
785 | "24 32 0.999999 M\n",
786 | "25 4 1.000000 M\n",
787 | "26 380 1.000000 M\n",
788 | "27 10 1.000000 M\n",
789 | "28 204 1.000000 M"
790 | ]
791 | },
792 | "metadata": {},
793 | "output_type": "display_data"
794 | }
795 | ],
796 | "source": [
797 | "df2 = pd.read_sql(\"SELECT ID, PROBABILITY(bc FOR 'M') AS Probability, Diagnosis FROM %s \\\n",
798 | " WHERE Mean_Area BETWEEN 300 AND 600 AND Mean_Radius > 5 AND PREDICT(%s) = 'M' \\\n",
799 | " ORDER BY Probability\" % (dataTable, modelName),cnxn) \n",
800 | "display(df2)"
801 | ]
802 | },
803 | {
804 | "cell_type": "markdown",
805 | "metadata": {},
806 | "source": [
807 | "### 9. Close and clean "
808 | ]
809 | },
810 | {
811 | "cell_type": "code",
812 | "execution_count": 17,
813 | "metadata": {},
814 | "outputs": [
815 | {
816 | "name": "stdout",
817 | "output_type": "stream",
818 | "text": [
819 | "Total elapsed time: \n",
820 | "0.731681\n"
821 | ]
822 | }
823 | ],
824 | "source": [
825 | "cnxn.close()\n",
826 | "end= time.clock()\n",
827 | "print (\"Total elapsed time: \")\n",
828 | "print (end-start)\n",
829 | "#input(\"Hit any key to end\")"
830 | ]
831 | }
832 | ],
833 | "metadata": {
834 | "kernelspec": {
835 | "display_name": "Python 3",
836 | "language": "python",
837 | "name": "python3"
838 | },
839 | "language_info": {
840 | "codemirror_mode": {
841 | "name": "ipython",
842 | "version": 3
843 | },
844 | "file_extension": ".py",
845 | "mimetype": "text/x-python",
846 | "name": "python",
847 | "nbconvert_exporter": "python",
848 | "pygments_lexer": "ipython3",
849 | "version": "3.6.9"
850 | }
851 | },
852 | "nbformat": 4,
853 | "nbformat_minor": 2
854 | }
855 |
--------------------------------------------------------------------------------
/jupyter-samples/campaign-integratedml-jdbc.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Train an IntegratedML model on Marketing Campaign Dataset\n",
8 | "## Use JDBC to connect to InterSystems IRIS database\n",
9 | "This Notebook demonstrates:\n",
10 | "- Using the JayDeBeApi Python library to connect to InterSystems IRIS\n",
11 | "- Creating views to segment data into training and test sets\n",
12 | "- Defining and training an IntegratedML model to predict marketing campaign responses\n",
13 | "- Comparing the resulting model's predictions to data in the test set (that the model was not trained on)\n",
14 | "- Using the IntegratedML \"VALIDATE MODEL\" command to calculate accuracy metrics on the test set data"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/html": [
25 | ""
26 | ],
27 | "text/plain": [
28 | ""
29 | ]
30 | },
31 | "metadata": {},
32 | "output_type": "display_data"
33 | }
34 | ],
35 | "source": [
36 | "from IPython.core.display import display, HTML\n",
37 | "display(HTML(\"\"))"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "### 1. Set environment variables, if necessary"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "### 2. Get jdbc connection and cursor"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 2,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "\n",
61 | "import jaydebeapi\n",
62 | "url = \"jdbc:IRIS://irisimlsvr:1972/USER\"\n",
63 | "driver = \"com.intersystems.jdbc.IRISDriver\"\n",
64 | "user = \"SUPERUSER\"\n",
65 | "password = \"SYS\"\n",
66 | "#libx = \"C:/InterSystems/IRIS20194/dev/java/lib/JDK18\"\n",
67 | "#jarfile = \"C:/InterSystems/IRIS20194/dev/java/lib/JDK18/intersystems-jdbc-3.0.0.jar\"\n",
68 | "jarfile = \"./intersystems-jdbc-3.1.0.jar\""
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 3,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "conn = jaydebeapi.connect(driver, url, [user, password], jarfile)\n",
78 | "curs = conn.cursor()"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "### 3. specify the source data table"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 4,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "dataTable = 'Marketing.Campaign'\n",
95 | "trainingTable = \"Marketing.CampaignTrainingSmall\"\n",
96 | "predictTable = \"Marketing.CampaignPredictSmall\""
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "### 4. Execute a query and display results in Pandas DataFrame"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 5,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/html": [
114 | "\n",
115 | "\n",
128 | "
\n",
129 | " \n",
130 | " \n",
131 | " | \n",
132 | " AGE | \n",
133 | " EDUCATION | \n",
134 | " MARITAL_STATUS | \n",
135 | " OCCUPATION | \n",
136 | " RELATIONSHIP | \n",
137 | " RACE | \n",
138 | " GENDER | \n",
139 | " PROSPECT_ID | \n",
140 | " EMPLOYMENT | \n",
141 | " YEARS_EDUC | \n",
142 | " COUNTRY_OF_BIRTH | \n",
143 | " INV_EARNINGS | \n",
144 | " INV_LOSSES | \n",
145 | " WORKWEEK_LENGTH | \n",
146 | " RESPONSE | \n",
147 | " AMOUNT | \n",
148 | "
\n",
149 | " \n",
150 | " \n",
151 | " \n",
152 | " 0 | \n",
153 | " 50 | \n",
154 | " HIGHSCHOOL | \n",
155 | " MARRIEDCIVSPO | \n",
156 | " REPAIRCRAFT | \n",
157 | " HUSBAND | \n",
158 | " WHITE | \n",
159 | " MALE | \n",
160 | " 77001107 | \n",
161 | " BUSINESS | \n",
162 | " 9 | \n",
163 | " USA | \n",
164 | " 0 | \n",
165 | " 0 | \n",
166 | " 40 | \n",
167 | " 0 | \n",
168 | " 123.90 | \n",
169 | "
\n",
170 | " \n",
171 | " 1 | \n",
172 | " 49 | \n",
173 | " HIGHSCHOOL | \n",
174 | " MARRIEDCIVSPO | \n",
175 | " LOGISTICSTRANSP | \n",
176 | " HUSBAND | \n",
177 | " WHITE | \n",
178 | " MALE | \n",
179 | " 77001140 | \n",
180 | " BUSINESS | \n",
181 | " 9 | \n",
182 | " USA | \n",
183 | " 0 | \n",
184 | " 0 | \n",
185 | " 40 | \n",
186 | " 0 | \n",
187 | " 97.91 | \n",
188 | "
\n",
189 | " \n",
190 | " 2 | \n",
191 | " 37 | \n",
192 | " HIGHSCHOOL | \n",
193 | " MARRIEDCIVSPO | \n",
194 | " REPAIRCRAFT | \n",
195 | " HUSBAND | \n",
196 | " WHITE | \n",
197 | " MALE | \n",
198 | " 77001143 | \n",
199 | " BUSINESS | \n",
200 | " 9 | \n",
201 | " USA | \n",
202 | " 0 | \n",
203 | " 0 | \n",
204 | " 40 | \n",
205 | " 0 | \n",
206 | " 135.65 | \n",
207 | "
\n",
208 | " \n",
209 | " 3 | \n",
210 | " 43 | \n",
211 | " HIGHSCHOOL | \n",
212 | " MARRIEDCIVSPO | \n",
213 | " CLEANINGHANDLING | \n",
214 | " HUSBAND | \n",
215 | " WHITE | \n",
216 | " MALE | \n",
217 | " 77001187 | \n",
218 | " BUSINESS | \n",
219 | " 9 | \n",
220 | " USA | \n",
221 | " 0 | \n",
222 | " 0 | \n",
223 | " 40 | \n",
224 | " 0 | \n",
225 | " 93.23 | \n",
226 | "
\n",
227 | " \n",
228 | " 4 | \n",
229 | " 54 | \n",
230 | " HIGHSCHOOL | \n",
231 | " MARRIEDCIVSPO | \n",
232 | " LOGISTICSTRANSP | \n",
233 | " HUSBAND | \n",
234 | " WHITE | \n",
235 | " MALE | \n",
236 | " 77001412 | \n",
237 | " BUSINESS | \n",
238 | " 9 | \n",
239 | " USA | \n",
240 | " 0 | \n",
241 | " 0 | \n",
242 | " 40 | \n",
243 | " 0 | \n",
244 | " 143.72 | \n",
245 | "
\n",
246 | " \n",
247 | " 5 | \n",
248 | " 27 | \n",
249 | " HIGHSCHOOL | \n",
250 | " MARRIEDCIVSPO | \n",
251 | " SERVICEVARIOUS | \n",
252 | " HUSBAND | \n",
253 | " WHITE | \n",
254 | " MALE | \n",
255 | " 77001429 | \n",
256 | " BUSINESS | \n",
257 | " 9 | \n",
258 | " USA | \n",
259 | " 0 | \n",
260 | " 0 | \n",
261 | " 40 | \n",
262 | " 0 | \n",
263 | " 232.20 | \n",
264 | "
\n",
265 | " \n",
266 | " 6 | \n",
267 | " 29 | \n",
268 | " HIGHSCHOOL | \n",
269 | " MARRIEDCIVSPO | \n",
270 | " REPAIRCRAFT | \n",
271 | " HUSBAND | \n",
272 | " WHITE | \n",
273 | " MALE | \n",
274 | " 77001443 | \n",
275 | " BUSINESS | \n",
276 | " 9 | \n",
277 | " USA | \n",
278 | " 0 | \n",
279 | " 0 | \n",
280 | " 40 | \n",
281 | " 0 | \n",
282 | " 103.46 | \n",
283 | "
\n",
284 | " \n",
285 | " 7 | \n",
286 | " 29 | \n",
287 | " HIGHSCHOOL | \n",
288 | " MARRIEDCIVSPO | \n",
289 | " ADMINOFFICE | \n",
290 | " HUSBAND | \n",
291 | " WHITE | \n",
292 | " MALE | \n",
293 | " 77001484 | \n",
294 | " BUSINESS | \n",
295 | " 9 | \n",
296 | " USA | \n",
297 | " 0 | \n",
298 | " 0 | \n",
299 | " 40 | \n",
300 | " 0 | \n",
301 | " 123.53 | \n",
302 | "
\n",
303 | " \n",
304 | " 8 | \n",
305 | " 21 | \n",
306 | " HIGHSCHOOL | \n",
307 | " MARRIEDCIVSPO | \n",
308 | " MACHINEOPERINSP | \n",
309 | " HUSBAND | \n",
310 | " WHITE | \n",
311 | " MALE | \n",
312 | " 77001526 | \n",
313 | " BUSINESS | \n",
314 | " 9 | \n",
315 | " USA | \n",
316 | " 0 | \n",
317 | " 0 | \n",
318 | " 40 | \n",
319 | " 0 | \n",
320 | " 230.07 | \n",
321 | "
\n",
322 | " \n",
323 | " 9 | \n",
324 | " 36 | \n",
325 | " HIGHSCHOOL | \n",
326 | " MARRIEDCIVSPO | \n",
327 | " MACHINEOPERINSP | \n",
328 | " HUSBAND | \n",
329 | " WHITE | \n",
330 | " MALE | \n",
331 | " 77001634 | \n",
332 | " BUSINESS | \n",
333 | " 9 | \n",
334 | " USA | \n",
335 | " 0 | \n",
336 | " 0 | \n",
337 | " 40 | \n",
338 | " 0 | \n",
339 | " 48.95 | \n",
340 | "
\n",
341 | " \n",
342 | " 10 | \n",
343 | " 52 | \n",
344 | " HIGHSCHOOL | \n",
345 | " MARRIEDCIVSPO | \n",
346 | " SPECIALISTPROFF | \n",
347 | " HUSBAND | \n",
348 | " WHITE | \n",
349 | " MALE | \n",
350 | " 77001649 | \n",
351 | " BUSINESS | \n",
352 | " 9 | \n",
353 | " USA | \n",
354 | " 0 | \n",
355 | " 0 | \n",
356 | " 40 | \n",
357 | " 0 | \n",
358 | " 58.78 | \n",
359 | "
\n",
360 | " \n",
361 | " 11 | \n",
362 | " 60 | \n",
363 | " HIGHSCHOOL | \n",
364 | " MARRIEDCIVSPO | \n",
365 | " REPAIRCRAFT | \n",
366 | " HUSBAND | \n",
367 | " WHITE | \n",
368 | " MALE | \n",
369 | " 77001660 | \n",
370 | " BUSINESS | \n",
371 | " 9 | \n",
372 | " USA | \n",
373 | " 0 | \n",
374 | " 0 | \n",
375 | " 40 | \n",
376 | " 0 | \n",
377 | " 106.29 | \n",
378 | "
\n",
379 | " \n",
380 | " 12 | \n",
381 | " 28 | \n",
382 | " HIGHSCHOOL | \n",
383 | " MARRIEDCIVSPO | \n",
384 | " MACHINEOPERINSP | \n",
385 | " HUSBAND | \n",
386 | " WHITE | \n",
387 | " MALE | \n",
388 | " 77001661 | \n",
389 | " BUSINESS | \n",
390 | " 9 | \n",
391 | " USA | \n",
392 | " 0 | \n",
393 | " 0 | \n",
394 | " 40 | \n",
395 | " 0 | \n",
396 | " 211.26 | \n",
397 | "
\n",
398 | " \n",
399 | " 13 | \n",
400 | " 61 | \n",
401 | " HIGHSCHOOL | \n",
402 | " MARRIEDCIVSPO | \n",
403 | " REPAIRCRAFT | \n",
404 | " HUSBAND | \n",
405 | " WHITE | \n",
406 | " MALE | \n",
407 | " 77001719 | \n",
408 | " BUSINESS | \n",
409 | " 9 | \n",
410 | " USA | \n",
411 | " 0 | \n",
412 | " 0 | \n",
413 | " 40 | \n",
414 | " 0 | \n",
415 | " 104.34 | \n",
416 | "
\n",
417 | " \n",
418 | " 14 | \n",
419 | " 34 | \n",
420 | " HIGHSCHOOL | \n",
421 | " MARRIEDCIVSPO | \n",
422 | " MACHINEOPERINSP | \n",
423 | " HUSBAND | \n",
424 | " WHITE | \n",
425 | " MALE | \n",
426 | " 77001794 | \n",
427 | " BUSINESS | \n",
428 | " 9 | \n",
429 | " USA | \n",
430 | " 0 | \n",
431 | " 0 | \n",
432 | " 40 | \n",
433 | " 0 | \n",
434 | " 184.73 | \n",
435 | "
\n",
436 | " \n",
437 | " 15 | \n",
438 | " 62 | \n",
439 | " HIGHSCHOOL | \n",
440 | " MARRIEDCIVSPO | \n",
441 | " MACHINEOPERINSP | \n",
442 | " HUSBAND | \n",
443 | " WHITE | \n",
444 | " MALE | \n",
445 | " 77001874 | \n",
446 | " BUSINESS | \n",
447 | " 9 | \n",
448 | " USA | \n",
449 | " 0 | \n",
450 | " 0 | \n",
451 | " 40 | \n",
452 | " 0 | \n",
453 | " 66.32 | \n",
454 | "
\n",
455 | " \n",
456 | " 16 | \n",
457 | " 56 | \n",
458 | " HIGHSCHOOL | \n",
459 | " MARRIEDCIVSPO | \n",
460 | " MACHINEOPERINSP | \n",
461 | " HUSBAND | \n",
462 | " WHITE | \n",
463 | " MALE | \n",
464 | " 77001913 | \n",
465 | " BUSINESS | \n",
466 | " 9 | \n",
467 | " USA | \n",
468 | " 0 | \n",
469 | " 0 | \n",
470 | " 40 | \n",
471 | " 0 | \n",
472 | " 60.13 | \n",
473 | "
\n",
474 | " \n",
475 | " 17 | \n",
476 | " 34 | \n",
477 | " HIGHSCHOOL | \n",
478 | " MARRIEDCIVSPO | \n",
479 | " MACHINEOPERINSP | \n",
480 | " HUSBAND | \n",
481 | " WHITE | \n",
482 | " MALE | \n",
483 | " 77001942 | \n",
484 | " BUSINESS | \n",
485 | " 9 | \n",
486 | " USA | \n",
487 | " 0 | \n",
488 | " 0 | \n",
489 | " 40 | \n",
490 | " 0 | \n",
491 | " 77.02 | \n",
492 | "
\n",
493 | " \n",
494 | " 18 | \n",
495 | " 29 | \n",
496 | " HIGHSCHOOL | \n",
497 | " MARRIEDCIVSPO | \n",
498 | " SUPPORTTECHNIC | \n",
499 | " HUSBAND | \n",
500 | " WHITE | \n",
501 | " MALE | \n",
502 | " 77001977 | \n",
503 | " BUSINESS | \n",
504 | " 9 | \n",
505 | " USA | \n",
506 | " 0 | \n",
507 | " 0 | \n",
508 | " 40 | \n",
509 | " 0 | \n",
510 | " 122.43 | \n",
511 | "
\n",
512 | " \n",
513 | " 19 | \n",
514 | " 33 | \n",
515 | " HIGHSCHOOL | \n",
516 | " MARRIEDCIVSPO | \n",
517 | " MACHINEOPERINSP | \n",
518 | " HUSBAND | \n",
519 | " WHITE | \n",
520 | " MALE | \n",
521 | " 77002003 | \n",
522 | " BUSINESS | \n",
523 | " 9 | \n",
524 | " USA | \n",
525 | " 0 | \n",
526 | " 0 | \n",
527 | " 40 | \n",
528 | " 0 | \n",
529 | " 257.23 | \n",
530 | "
\n",
531 | " \n",
532 | "
\n",
533 | "
"
534 | ],
535 | "text/plain": [
536 | " AGE EDUCATION MARITAL_STATUS OCCUPATION RELATIONSHIP RACE \\\n",
537 | "0 50 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n",
538 | "1 49 HIGHSCHOOL MARRIEDCIVSPO LOGISTICSTRANSP HUSBAND WHITE \n",
539 | "2 37 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n",
540 | "3 43 HIGHSCHOOL MARRIEDCIVSPO CLEANINGHANDLING HUSBAND WHITE \n",
541 | "4 54 HIGHSCHOOL MARRIEDCIVSPO LOGISTICSTRANSP HUSBAND WHITE \n",
542 | "5 27 HIGHSCHOOL MARRIEDCIVSPO SERVICEVARIOUS HUSBAND WHITE \n",
543 | "6 29 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n",
544 | "7 29 HIGHSCHOOL MARRIEDCIVSPO ADMINOFFICE HUSBAND WHITE \n",
545 | "8 21 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
546 | "9 36 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
547 | "10 52 HIGHSCHOOL MARRIEDCIVSPO SPECIALISTPROFF HUSBAND WHITE \n",
548 | "11 60 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n",
549 | "12 28 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
550 | "13 61 HIGHSCHOOL MARRIEDCIVSPO REPAIRCRAFT HUSBAND WHITE \n",
551 | "14 34 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
552 | "15 62 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
553 | "16 56 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
554 | "17 34 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
555 | "18 29 HIGHSCHOOL MARRIEDCIVSPO SUPPORTTECHNIC HUSBAND WHITE \n",
556 | "19 33 HIGHSCHOOL MARRIEDCIVSPO MACHINEOPERINSP HUSBAND WHITE \n",
557 | "\n",
558 | " GENDER PROSPECT_ID EMPLOYMENT YEARS_EDUC COUNTRY_OF_BIRTH INV_EARNINGS \\\n",
559 | "0 MALE 77001107 BUSINESS 9 USA 0 \n",
560 | "1 MALE 77001140 BUSINESS 9 USA 0 \n",
561 | "2 MALE 77001143 BUSINESS 9 USA 0 \n",
562 | "3 MALE 77001187 BUSINESS 9 USA 0 \n",
563 | "4 MALE 77001412 BUSINESS 9 USA 0 \n",
564 | "5 MALE 77001429 BUSINESS 9 USA 0 \n",
565 | "6 MALE 77001443 BUSINESS 9 USA 0 \n",
566 | "7 MALE 77001484 BUSINESS 9 USA 0 \n",
567 | "8 MALE 77001526 BUSINESS 9 USA 0 \n",
568 | "9 MALE 77001634 BUSINESS 9 USA 0 \n",
569 | "10 MALE 77001649 BUSINESS 9 USA 0 \n",
570 | "11 MALE 77001660 BUSINESS 9 USA 0 \n",
571 | "12 MALE 77001661 BUSINESS 9 USA 0 \n",
572 | "13 MALE 77001719 BUSINESS 9 USA 0 \n",
573 | "14 MALE 77001794 BUSINESS 9 USA 0 \n",
574 | "15 MALE 77001874 BUSINESS 9 USA 0 \n",
575 | "16 MALE 77001913 BUSINESS 9 USA 0 \n",
576 | "17 MALE 77001942 BUSINESS 9 USA 0 \n",
577 | "18 MALE 77001977 BUSINESS 9 USA 0 \n",
578 | "19 MALE 77002003 BUSINESS 9 USA 0 \n",
579 | "\n",
580 | " INV_LOSSES WORKWEEK_LENGTH RESPONSE AMOUNT \n",
581 | "0 0 40 0 123.90 \n",
582 | "1 0 40 0 97.91 \n",
583 | "2 0 40 0 135.65 \n",
584 | "3 0 40 0 93.23 \n",
585 | "4 0 40 0 143.72 \n",
586 | "5 0 40 0 232.20 \n",
587 | "6 0 40 0 103.46 \n",
588 | "7 0 40 0 123.53 \n",
589 | "8 0 40 0 230.07 \n",
590 | "9 0 40 0 48.95 \n",
591 | "10 0 40 0 58.78 \n",
592 | "11 0 40 0 106.29 \n",
593 | "12 0 40 0 211.26 \n",
594 | "13 0 40 0 104.34 \n",
595 | "14 0 40 0 184.73 \n",
596 | "15 0 40 0 66.32 \n",
597 | "16 0 40 0 60.13 \n",
598 | "17 0 40 0 77.02 \n",
599 | "18 0 40 0 122.43 \n",
600 | "19 0 40 0 257.23 "
601 | ]
602 | },
603 | "metadata": {},
604 | "output_type": "display_data"
605 | }
606 | ],
607 | "source": [
608 | "import pandas as pd\n",
609 | "from IPython.display import display\n",
610 | "\n",
611 | "df = pd.read_sql(\"select TOP 20 * from %s\" % dataTable, conn)\n",
612 | "display(df)"
613 | ]
614 | },
615 | {
616 | "cell_type": "code",
617 | "execution_count": 6,
618 | "metadata": {},
619 | "outputs": [
620 | {
621 | "data": {
622 | "text/html": [
623 | "\n",
624 | "\n",
637 | "
\n",
638 | " \n",
639 | " \n",
640 | " | \n",
641 | " Aggregate_1 | \n",
642 | "
\n",
643 | " \n",
644 | " \n",
645 | " \n",
646 | " 0 | \n",
647 | " 48842 | \n",
648 | "
\n",
649 | " \n",
650 | "
\n",
651 | "
"
652 | ],
653 | "text/plain": [
654 | " Aggregate_1\n",
655 | "0 48842"
656 | ]
657 | },
658 | "metadata": {},
659 | "output_type": "display_data"
660 | }
661 | ],
662 | "source": [
663 | "# Show number rows\n",
664 | "df1 = pd.read_sql(\"SELECT COUNT(*) FROM %s\" % dataTable, conn)\n",
665 | "display(df1)"
666 | ]
667 | },
668 | {
669 | "cell_type": "markdown",
670 | "metadata": {},
671 | "source": [
672 | "### Cleaning before retrying"
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": 7,
678 | "metadata": {},
679 | "outputs": [],
680 | "source": [
681 | "# Before executing the notebook again, drop model, tables, views,... previously created\n",
682 | "#curs.execute(\"DROP VIEW %s\" % trainingTable)\n",
683 | "#curs.execute(\"DROP VIEW %s\" % predictTable)\n",
684 | "#curs.execute(\"DROP MODEL CampaignModel\")"
685 | ]
686 | },
687 | {
688 | "cell_type": "markdown",
689 | "metadata": {},
690 | "source": [
691 | "### 5. Make some views to split training and testing datasets"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 8,
697 | "metadata": {},
698 | "outputs": [],
699 | "source": [
700 | "# Small training set view\n",
701 | "curs.execute(\"CREATE VIEW %s AS SELECT * FROM %s WHERE ID<3001\" % (trainingTable,dataTable))\n",
702 | "# Small prediction set\n",
703 | "curs.execute(\"CREATE VIEW %s AS SELECT * FROM %s WHERE ID>47000\" % (predictTable,dataTable))"
704 | ]
705 | },
706 | {
707 | "cell_type": "markdown",
708 | "metadata": {},
709 | "source": [
710 | "### 6. Create and Train an IntegratedML Model using default settings\n",
711 | "IntegratedML only needs a model name, the name of the column that is the target column to predict, and a table (or SELECT query to specify input columns."
712 | ]
713 | },
714 | {
715 | "cell_type": "code",
716 | "execution_count": 9,
717 | "metadata": {},
718 | "outputs": [],
719 | "source": [
720 | "curs.execute(\"CREATE MODEL CampaignModel PREDICTING (RESPONSE) FROM %s\" % trainingTable)"
721 | ]
722 | },
723 | {
724 | "cell_type": "markdown",
725 | "metadata": {},
726 | "source": [
727 | "Now that the model is defined, you can TRAIN it, which invokes the AutoML machine learning procedure."
728 | ]
729 | },
730 | {
731 | "cell_type": "code",
732 | "execution_count": 10,
733 | "metadata": {},
734 | "outputs": [],
735 | "source": [
736 | "curs.execute(\"TRAIN MODEL CampaignModel\")"
737 | ]
738 | },
739 | {
740 | "cell_type": "markdown",
741 | "metadata": {},
742 | "source": [
743 | "Once that finishes, you can see some information about the model in the \"ML_TRAINED_MODELS\" table."
744 | ]
745 | },
746 | {
747 | "cell_type": "code",
748 | "execution_count": 11,
749 | "metadata": {},
750 | "outputs": [
751 | {
752 | "data": {
753 | "text/html": [
754 | "\n",
755 | "\n",
768 | "
\n",
769 | " \n",
770 | " \n",
771 | " | \n",
772 | " MODEL_NAME | \n",
773 | " TRAINED_MODEL_NAME | \n",
774 | " PROVIDER | \n",
775 | " TRAINED_TIMESTAMP | \n",
776 | " MODEL_TYPE | \n",
777 | " MODEL_INFO | \n",
778 | "
\n",
779 | " \n",
780 | " \n",
781 | " \n",
782 | " 0 | \n",
783 | " CampaignModel | \n",
784 | " CampaignModel_t1 | \n",
785 | " AutoML | \n",
786 | " 2022-04-22 08:01:09.859000 | \n",
787 | " classification | \n",
788 | " ModelType:TensorFlow Neural Network, Package:T... | \n",
789 | "
\n",
790 | " \n",
791 | " 1 | \n",
792 | " bc | \n",
793 | " bc_t1 | \n",
794 | " AutoML | \n",
795 | " 2022-04-22 07:54:38.592000 | \n",
796 | " classification | \n",
797 | " ModelType:TensorFlow Neural Network, Package:T... | \n",
798 | "
\n",
799 | " \n",
800 | "
\n",
801 | "
"
802 | ],
803 | "text/plain": [
804 | " MODEL_NAME TRAINED_MODEL_NAME PROVIDER TRAINED_TIMESTAMP \\\n",
805 | "0 CampaignModel CampaignModel_t1 AutoML 2022-04-22 08:01:09.859000 \n",
806 | "1 bc bc_t1 AutoML 2022-04-22 07:54:38.592000 \n",
807 | "\n",
808 | " MODEL_TYPE MODEL_INFO \n",
809 | "0 classification ModelType:TensorFlow Neural Network, Package:T... \n",
810 | "1 classification ModelType:TensorFlow Neural Network, Package:T... "
811 | ]
812 | },
813 | "metadata": {},
814 | "output_type": "display_data"
815 | }
816 | ],
817 | "source": [
818 | "df3 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_TRAINED_MODELS\", conn)\n",
819 | "display(df3)"
820 | ]
821 | },
822 | {
823 | "cell_type": "markdown",
824 | "metadata": {},
825 | "source": [
826 | "### 7. Compare model output to data it has not seen yet\n",
827 | "Now you can use SQL to SELECT data from another table, run the IntegratedML model on this new data, and see how well the predictions match the data!"
828 | ]
829 | },
830 | {
831 | "cell_type": "code",
832 | "execution_count": 12,
833 | "metadata": {},
834 | "outputs": [
835 | {
836 | "data": {
837 | "text/html": [
838 | "\n",
839 | "\n",
852 | "
\n",
853 | " \n",
854 | " \n",
855 | " | \n",
856 | " PredictedResponse | \n",
857 | " ActualResponse | \n",
858 | "
\n",
859 | " \n",
860 | " \n",
861 | " \n",
862 | " 0 | \n",
863 | " 0 | \n",
864 | " 0 | \n",
865 | "
\n",
866 | " \n",
867 | " 1 | \n",
868 | " 0 | \n",
869 | " 0 | \n",
870 | "
\n",
871 | " \n",
872 | " 2 | \n",
873 | " 0 | \n",
874 | " 1 | \n",
875 | "
\n",
876 | " \n",
877 | " 3 | \n",
878 | " 0 | \n",
879 | " 0 | \n",
880 | "
\n",
881 | " \n",
882 | " 4 | \n",
883 | " 0 | \n",
884 | " 1 | \n",
885 | "
\n",
886 | " \n",
887 | " ... | \n",
888 | " ... | \n",
889 | " ... | \n",
890 | "
\n",
891 | " \n",
892 | " 1837 | \n",
893 | " 0 | \n",
894 | " 0 | \n",
895 | "
\n",
896 | " \n",
897 | " 1838 | \n",
898 | " 0 | \n",
899 | " 0 | \n",
900 | "
\n",
901 | " \n",
902 | " 1839 | \n",
903 | " 0 | \n",
904 | " 0 | \n",
905 | "
\n",
906 | " \n",
907 | " 1840 | \n",
908 | " 0 | \n",
909 | " 0 | \n",
910 | "
\n",
911 | " \n",
912 | " 1841 | \n",
913 | " 0 | \n",
914 | " 0 | \n",
915 | "
\n",
916 | " \n",
917 | "
\n",
918 | "
1842 rows × 2 columns
\n",
919 | "
"
920 | ],
921 | "text/plain": [
922 | " PredictedResponse ActualResponse\n",
923 | "0 0 0\n",
924 | "1 0 0\n",
925 | "2 0 1\n",
926 | "3 0 0\n",
927 | "4 0 1\n",
928 | "... ... ...\n",
929 | "1837 0 0\n",
930 | "1838 0 0\n",
931 | "1839 0 0\n",
932 | "1840 0 0\n",
933 | "1841 0 0\n",
934 | "\n",
935 | "[1842 rows x 2 columns]"
936 | ]
937 | },
938 | "metadata": {},
939 | "output_type": "display_data"
940 | }
941 | ],
942 | "source": [
943 | "df4 = pd.read_sql(\"SELECT PREDICT(CampaignModel) AS PredictedResponse, \\\n",
944 | " response AS ActualResponse FROM %s\" % predictTable, conn)\n",
945 | "display(df4)"
946 | ]
947 | },
948 | {
949 | "cell_type": "markdown",
950 | "metadata": {},
951 | "source": [
952 | "### 8. VALIDATE MODEL command calculates accuracy metrics\n",
953 | "You can certainly take that output above and calculate the accuracy using a standard formula, but IntegratedML has a built-in function to do that!\n",
954 | "\n",
955 | "Each time you run the command \"VALIDATE MODEL...\" it generates a set of metrics calculated on the data passed into the query. Since this table can be a bit difficult to read in its raw form we use a simple \"pivot\" call to arrange the data."
956 | ]
957 | },
958 | {
959 | "cell_type": "code",
960 | "execution_count": 13,
961 | "metadata": {},
962 | "outputs": [
963 | {
964 | "data": {
965 | "text/html": [
966 | "\n",
967 | "\n",
980 | "
\n",
981 | " \n",
982 | " \n",
983 | " METRIC_NAME | \n",
984 | " Accuracy | \n",
985 | " F-Measure | \n",
986 | " Precision | \n",
987 | " Recall | \n",
988 | "
\n",
989 | " \n",
990 | " VALIDATION_RUN_NAME | \n",
991 | " | \n",
992 | " | \n",
993 | " | \n",
994 | " | \n",
995 | "
\n",
996 | " \n",
997 | " \n",
998 | " \n",
999 | " CampaignModel_t1_v1 | \n",
1000 | " 0.6 | \n",
1001 | " 0.2 | \n",
1002 | " 0.81 | \n",
1003 | " 0.11 | \n",
1004 | "
\n",
1005 | " \n",
1006 | "
\n",
1007 | "
"
1008 | ],
1009 | "text/plain": [
1010 | "METRIC_NAME Accuracy F-Measure Precision Recall\n",
1011 | "VALIDATION_RUN_NAME \n",
1012 | "CampaignModel_t1_v1 0.6 0.2 0.81 0.11"
1013 | ]
1014 | },
1015 | "metadata": {},
1016 | "output_type": "display_data"
1017 | }
1018 | ],
1019 | "source": [
1020 | "curs.execute(\"VALIDATE MODEL CampaignModel FROM %s\" % predictTable)\n",
1021 | "df5 = pd.read_sql(\"SELECT * FROM INFORMATION_SCHEMA.ML_VALIDATION_METRICS\", conn)\n",
1022 | "df6 = df5.pivot(index='VALIDATION_RUN_NAME', columns='METRIC_NAME', values='METRIC_VALUE')\n",
1023 | "display(df6)"
1024 | ]
1025 | }
1026 | ],
1027 | "metadata": {
1028 | "kernelspec": {
1029 | "display_name": "Python 3",
1030 | "language": "python",
1031 | "name": "python3"
1032 | },
1033 | "language_info": {
1034 | "codemirror_mode": {
1035 | "name": "ipython",
1036 | "version": 3
1037 | },
1038 | "file_extension": ".py",
1039 | "mimetype": "text/x-python",
1040 | "name": "python",
1041 | "nbconvert_exporter": "python",
1042 | "pygments_lexer": "ipython3",
1043 | "version": "3.6.9"
1044 | }
1045 | },
1046 | "nbformat": 4,
1047 | "nbformat_minor": 2
1048 | }
1049 |
--------------------------------------------------------------------------------
/jupyter-samples/intersystems-jdbc-3.1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/jupyter-samples/intersystems-jdbc-3.1.0.jar
--------------------------------------------------------------------------------
/jupyter-samples/libirisodbcu35.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intersystems-community/integratedml-demo-template/f7ed655425bed01d5aad595a26554d8cd5757f4b/jupyter-samples/libirisodbcu35.so
--------------------------------------------------------------------------------
/jupyter-samples/odbc.ini:
--------------------------------------------------------------------------------
1 | [user]
2 | Driver=InterSystems ODBC35
3 | Protocol=TCP
4 | Host=irisimlsvr
5 | Port=51773
6 | Namespace=USER
7 | UID=SUPERUSER
8 | Password=SYS
9 | Description=Sample namespace
10 | Query Timeout=0
11 | Static Cursors=0
12 |
13 |
--------------------------------------------------------------------------------
/jupyter-samples/odbcinst.ini:
--------------------------------------------------------------------------------
1 | [InterSystems ODBC35]
2 | UsageCount=1
3 | Driver=/tf/libirisodbcu35.so
4 | Setup=/tf/libirisodbcu35.so
5 | SQLLevel=1
6 | FileUsage=0
7 | DriverODBCVer=02.10
8 | ConnectFunctions=YYN
9 | APILevel=1
10 | DEBUG=1
11 | CPTimeout=
12 |
13 |
--------------------------------------------------------------------------------
/tf2-jupyter-jdbc/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM tensorflow/tensorflow:2.2.0-jupyter
2 | LABEL maintainer="Zhong Li "
3 |
4 | RUN apt-get update && apt-get install -y default-jre-headless && apt-get clean && rm -rf /var/lib/apt/lists/*
5 |
6 | COPY requirements.txt ./
7 |
8 | RUN pip install --upgrade pip && \
9 | pip install --no-cache-dir -r requirements.txt
10 |
11 | CMD /bin/bash -c "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root --NotebookApp.token=''"
--------------------------------------------------------------------------------
/tf2-jupyter-jdbc/requirements.txt:
--------------------------------------------------------------------------------
1 | jupyterthemes
2 | JayDeBeApi
3 | pandas
4 | scikit-learn
5 |
6 |
--------------------------------------------------------------------------------