├── .gitignore
├── Makefile
├── README.md
├── package.json
├── s3.exclude
├── tools
    ├── fuse.coffee
    ├── server.coffee
    └── site-src
    │   ├── 404.jade
    │   ├── _templates
    │       ├── default.jade
    │       ├── footer.jade
    │       ├── head.jade
    │       ├── header.jade
    │       └── scripts.jade
    │   ├── assets
    │       ├── images
    │       │   └── h2o-home.png
    │       ├── styles
    │       │   ├── font-awesome.min.css
    │       │   ├── futura-mock.css
    │       │   ├── github.css
    │       │   ├── icomoon.css
    │       │   └── screen.css
    │       └── vendor
    │       │   ├── bootstrap
    │       │       ├── css
    │       │       │   ├── bootstrap-theme.css
    │       │       │   ├── bootstrap-theme.css.map
    │       │       │   ├── bootstrap-theme.min.css
    │       │       │   ├── bootstrap.css
    │       │       │   ├── bootstrap.css.map
    │       │       │   └── bootstrap.min.css
    │       │       ├── fonts
    │       │       │   ├── glyphicons-halflings-regular.eot
    │       │       │   ├── glyphicons-halflings-regular.svg
    │       │       │   ├── glyphicons-halflings-regular.ttf
    │       │       │   └── glyphicons-halflings-regular.woff
    │       │       └── js
    │       │       │   ├── bootstrap.js
    │       │       │   └── bootstrap.min.js
    │       │   ├── fastclick.min.js
    │       │   ├── imagesloaded.pkgd.js
    │       │   ├── jquery-1.11.1.js
    │       │   ├── jquery-1.11.1.min.js
    │       │   ├── jquery-1.11.1.min.map
    │       │   ├── jquery.actual.min.js
    │       │   ├── jquery.html5support.min.js
    │       │   ├── jquery.slimscroll.min.js
    │       │   ├── jquery.touchwipe.min.js
    │       │   ├── jquery.vticker.min.js
    │       │   └── modernizr.min.js
    │   └── index.md
└── tutorials
    ├── Training_img
        ├── AUC1.png
        ├── AUC2.png
        ├── ClusterStatus.png
        ├── ConfMtx1.png
        ├── ConfMtx2.png
        ├── CreateFrame1.png
        ├── CreateFrame2.png
        ├── Export.png
        ├── GLMResults.png
        ├── GainsLift.png
        ├── GainsLift1.png
        ├── GainsLift2.png
        ├── HitRatio.png
        ├── IOStatus.png
        ├── Import.png
        ├── Import2.png
        ├── Impute1.png
        ├── Impute2.png
        ├── Inspect1.png
        ├── Inspect2.png
        ├── Interaction1.png
        ├── Interaction2.png
        ├── Jobs.png
        ├── Logs.png
        ├── NetworkTest.png
        ├── PCAScore.png
        ├── POJO.png
        ├── Parse.png
        ├── PerfBar.png
        ├── Predict1.png
        ├── PredictModel.png
        ├── PredictResults.png
        ├── Profiler.png
        ├── Profiler2.png
        ├── Quantiles1.png
        ├── Quantiles2.png
        ├── STEAMtabular.png
        ├── SplitFrame.png
        ├── SplitFrame2.png
        ├── StackDump.png
        ├── Summary1.png
        ├── Summary2.png
        ├── Tasks.png
        ├── Timeline.png
        ├── Training.md
        ├── UDPDrop.png
        ├── Upload1.png
        ├── Upload2.png
        └── View.png
    ├── advanced
        ├── binaryClassificationHelper.R.md
        ├── features
        │   └── features.R.md
        ├── higgs
        │   ├── .DS_Store
        │   ├── higgs.R.md
        │   └── images
        │   │   └── higgs.png
        └── tools
        │   └── tools.R.md
    ├── basics
        └── basics.R.md
    ├── bigdataenv
        └── H2OinBigDataEnvironments.pdf
    ├── devel
        ├── droplets
        │   ├── images
        │   │   ├── 1-OpenProject.png
        │   │   ├── 2-JavaDroplet.png
        │   │   ├── 3-RebuildProject.png
        │   │   ├── 4-RunTest.png
        │   │   └── 5-TestPassed.png
        │   └── tutorial.md
        ├── hacking
        │   ├── Grep.md
        │   ├── KMeans.md
        │   └── Quantiles.md
        └── sparkling_water
        │   ├── images
        │       ├── h2o-ui.png
        │       ├── rstudio.png
        │       └── spark-ui.png
        │   └── tutorial.md
    ├── extab
        ├── excel.md
        ├── images
        │   ├── excel01.png
        │   ├── excel02.png
        │   ├── excel03.png
        │   ├── excel04.png
        │   ├── tableau_dashboard.png
        │   ├── tableau_data_connection1.png
        │   ├── tableau_data_connection2.png
        │   ├── tableau_execute.png
        │   ├── tableau_execute2.png
        │   ├── tableau_execute3.png
        │   ├── tableau_execute4.png
        │   ├── tableau_h2o_parameters.png
        │   ├── tableau_r_connection1.png
        │   ├── tableau_r_connection2.png
        │   └── workflow.png
        └── tableau.md
    ├── hive_udf_template
        ├── GBM-example.R
        ├── README.md
        ├── localjars
        │   └── h2o-model.jar
        ├── pom.xml
        └── src
        │   ├── main
        │       └── java
        │       │   └── com
        │       │       └── h2o
        │       │           └── hive
        │       │               └── udf
        │       │                   ├── GBMPojo.java
        │       │                   └── ScoreDataUDF.java
        │   └── test
        │       └── java
        │           └── com
        │               └── h2o
        │                   └── hive
        │                       └── udf
        │                           └── UDFExampleTest.java
    ├── marketing_usecases
        ├── h2o_training_yan_2014.pdf
        ├── h2o_world_Vinod.pdf
        └── marketing_usecases.R.md
    ├── setup
        ├── images
        │   ├── 01_virtualbox.png
        │   ├── 02_vb_menu.png
        │   ├── 03_select_file.png
        │   └── 04_import_wizard.png
        └── install.md
    ├── streaming
        └── storm
        │   ├── H2OStormStarter.java
        │   ├── README.md
        │   ├── TestH2ODataSpout.java
        │   ├── example.R
        │   ├── images
        │       ├── cats_n_dogs.png
        │       ├── h2o_storm.png
        │       ├── ij_1.png
        │       ├── ij_10.png
        │       ├── ij_11.png
        │       ├── ij_2.png
        │       ├── ij_3.png
        │       ├── ij_4.png
        │       ├── ij_6.png
        │       ├── ij_7.png
        │       ├── ij_8.png
        │       └── ij_9.png
        │   ├── live_data.csv
        │   ├── premade_generated_model
        │       ├── GBMPojo.java
        │       └── h2o-genmodel.jar
        │   ├── training_data.csv
        │   └── web
        │       ├── cat.png
        │       ├── cloud.png
        │       ├── dog.png
        │       ├── index.html
        │       └── out
    ├── supervised
        ├── classification
        │   ├── classification.R.md
        │   └── images
        │   │   ├── glm_f1_cutoff_0.png
        │   │   ├── glm_f1_cutoff_1.png
        │   │   ├── glm_roc_0.png
        │   │   ├── glm_roc_1.png
        │   │   └── metrics.png
        ├── deeplearning
        │   └── deeplearning.R.md
        ├── gbm
        │   └── gbm.R.md
        ├── glm
        │   └── glm.R.md
        ├── randomforest
        │   └── randomforest.R.md
        └── regression
        │   ├── images
        │       └── rand_glm_coef.png
        │   └── regression.R.md
    ├── troubleshooting
        ├── images
        │   ├── Clusterstattunnel.png
        │   ├── TroubleshootingHadoopAmbariNodeMgr.png
        │   ├── TroubleshootingHadoopAmbariyarnscheduler.png
        │   ├── TroubleshootingHadoopClouderayarnnodemgr.png
        │   ├── TroubleshootingHadoopClouderayarnscheduler.png
        │   └── UpdateR.png
        └── troubleshooting.md
    ├── unsupervised
        ├── anomaly
        │   ├── anomaly.R.md
        │   └── images
        │   │   ├── autoencoder.png
        │   │   ├── bad_both.png
        │   │   ├── good_both.png
        │   │   └── ugly_both.png
        ├── clustering
        │   └── clustering.R.md
        └── dimreduction
        │   ├── dimreduction.R.md
        │   └── images
        │       └── mnist_pca_sdev.png
    └── web_ui
        └── tutorial.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | build/
3 | 
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | build: node_modules build/site-src
 2 | 	node_modules/.bin/coffee tools/fuse.coffee build/site-src build/site
 3 | 
 4 | run: node_modules
 5 | 	@echo "Go to http://localhost:8080/"
 6 | 	node_modules/.bin/coffee tools/server.coffee build/site
 7 | 
 8 | # After 'make run' you can do this to get to the web site.
 9 | browser_on_mac:
10 | 	open http://localhost:8080
11 | 
12 | clean:
13 | 	rm -rf build
14 | 
15 | node_modules: package.json
16 | 	npm install
17 | 
18 | build/site-src: 
19 | 	rsync -rupE tools/site-src build/
20 | 	rsync -rupE tutorials/ build/site-src/
21 | 
22 | mrproper: clean
23 | 	rm -rf node_modules/
24 | 
25 | install:
26 | 	rm -rf /opt/h2o-training
27 | 	cp -r build/site/ /opt/h2o-training
28 | 
29 | test:
30 | 	s3cmd sync --dry-run --delete-removed --acl-public --exclude-from s3.exclude build/site/ s3://train.h2o.ai/
31 | 
32 | push:
33 | 	s3cmd sync --delete-removed --acl-public --exclude-from s3.exclude build/site/ s3://train.h2o.ai/
34 | 
35 | .PHONY: build run clean test push build/site-src test
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Please go to the h2o-tutorials repository instead!
 2 | 
 3 | ## This repository is now outdated, and contains material for H2O World 2014 training using H2O 2.8.
 4 | 
 5 | ---
 6 | 
 7 | H<sub>2</sub>O Training
 8 | ============
 9 | 
10 | ## Follow tutorials
11 | 
12 | 
13 | ## Build a site
14 | 
15 |   * Run `make build` to build a site
16 |   * Run `make run` to run a web server
17 |   * Go to [http://localhost:8080/](http://localhost:8080/) to see generated site
18 | 
19 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "0xdata.com",
 3 |   "version": "0.0.0",
 4 |   "description": "0xdata Public Website",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "https://github.com/0xdata/0xdata.com.git"
12 |   },
13 |   "keywords": [
14 |     "0xdata.com",
15 |     "website"
16 |   ],
17 |   "author": "Prithvi Prabhu <prithvi@0xdata.com>",
18 |   "license": "ISC",
19 |   "bugs": {
20 |     "url": "https://github.com/0xdata/0xdata.com/issues"
21 |   },
22 |   "homepage": "https://github.com/0xdata/0xdata.com",
23 |   "devDependencies": {
24 |     "highlight.js": "~8.2.0"
25 |   },
26 |   "dependencies": {
27 |     "escape-html": "~1.0.1",
28 |     "twit": "~1.1.18",
29 |     "coffee-script": "~1.7.1",
30 |     "jade": "~1.5.0",
31 |     "js-yaml": "~3.1.0",
32 |     "marked": "~0.3.2",
33 |     "fs-extra": "~0.10.0",
34 |     "connect": "~3.1.0",
35 |     "serve-static": "~1.5.0",
36 |     "dateformat": "~1.0.8-1.2.3",
37 |     "underscore": "~1.6.0"
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/s3.exclude:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .git/*
3 | *.swp
4 | 


--------------------------------------------------------------------------------
/tools/fuse.coffee:
--------------------------------------------------------------------------------
  1 | fs = require 'fs'
  2 | fp = require 'path'
  3 | fse = require 'fs-extra'
  4 | jade = require 'jade'
  5 | marked = require 'marked'
  6 | yaml = require 'js-yaml' 
  7 | dateformat = require 'dateformat'
  8 | underscore = require 'underscore'
  9 | highlight = require 'highlight.js'
 10 | 
 11 | SIM = no
 12 | 
 13 | SITEMAP_XML = '''
 14 | <?xml version="1.0" encoding="UTF-8"?>
 15 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 16 | {{urls}}
 17 | </urlset>
 18 | '''
 19 | 
 20 | marked.setOptions
 21 |   smartypants: yes
 22 |   highlight: (code, lang) ->
 23 |     (highlight.highlightAuto code, [ lang ]).value
 24 | 
 25 | isYaml = (ext) -> ext.toLowerCase() is '.yml'
 26 | isJade = (ext) -> ext.toLowerCase() is '.jade'
 27 | isMarkdown = (ext) -> ext.toLowerCase() is '.md'
 28 | isContent = (ext) -> (isMarkdown ext) or (isJade ext)
 29 | readFile = (path) -> fs.readFileSync path, 'utf8'
 30 | writeFile = fse.outputFileSync
 31 | copyFile = fse.copySync
 32 | 
 33 | formBin = (src) ->
 34 |   if isContent src.ext
 35 |     if src.dir is '.' and (src.slug is 'index' or src.slug is '404')
 36 |       path: fp.join src.dir, "#{src.slug}.html"
 37 |     else
 38 |       if 0 < src.slug.indexOf '_'
 39 |         tokens = src.slug.split /_+/g
 40 |         tokens.unshift src.dir
 41 |         tokens.push 'index.html'
 42 |         path: fp.join.apply null, tokens
 43 |       else
 44 |         path: fp.join src.dir, "#{src.slug}.html"
 45 |         #path: fp.join src.dir, src.slug, 'index.html'
 46 |   else
 47 |     path: src.path
 48 | 
 49 | #TODO turn this into a plugin
 50 | createSitemapTxt = (targetDir, urls) ->
 51 |   writeFile (fp.join targetDir, 'sitemap.txt'), urls.join '\n'
 52 | 
 53 | #TODO turn this into a plugin
 54 | createSitemapXml = (targetDir, urls) ->
 55 |   writeFile (fp.join targetDir, 'sitemap.xml'), SITEMAP_XML.replace '{{urls}}', (urls.map (url) -> "<url><loc>#{url}</loc></url>").join '\n'
 56 | 
 57 | createCategorySlug = (category) ->
 58 |   category
 59 |     .toLowerCase()
 60 |     .replace /[^a-z0-9 ]/g, ''
 61 |     .replace /\s+/g, '-'
 62 | 
 63 | _templates = {}
 64 | loadTemplate = (path, cache=yes) ->
 65 |   if cache and template = _templates[path]
 66 |     template
 67 |   else
 68 |     template = jade.compileFile path,
 69 |       filename: path
 70 |     if cache
 71 |       _templates[path] = template
 72 |     template
 73 | 
 74 | forEachPage = (node, go) ->
 75 |   for name, child of node
 76 |     if child.__fuse__
 77 |       go node, child
 78 |     else
 79 |       forEachPage child, go
 80 |   return
 81 | 
 82 | walkSources = (sourceDir, currentDir, node) ->
 83 |   for name in (fs.readdirSync currentDir) when name[0] isnt '.'
 84 |     path = fp.join currentDir, name
 85 |     stat = fs.statSync path
 86 |     if stat.isDirectory() or stat.isFile()
 87 |       relpath = fp.relative sourceDir, path
 88 |       if stat.isDirectory()
 89 |         if name isnt '_templates'
 90 |           node[name] = leaf = {}
 91 |           walkSources sourceDir, path, leaf
 92 |       else
 93 |         ext = fp.extname path
 94 |         slug = fp.basename path, ext
 95 |         dir = fp.dirname relpath
 96 | 
 97 |         src =
 98 |           dir: dir
 99 |           slug: slug 
100 |           ext: ext
101 |           path: relpath
102 | 
103 |         bin = formBin src
104 |         path = bin.path.split fp.sep
105 |         path.pop() if path[path.length - 1] is 'index.html'
106 | 
107 |         unless (isContent ext) and slug[0] is '_' and not slug is '_sidebar'
108 |           node[name] =
109 |             __fuse__: yes
110 |             src: src
111 |             bin: bin
112 |             ext: if ext[0] is '.' then (ext.substr 1).toLowerCase() else ext.toLowerCase()
113 |             url: '/' + path.join '/' 
114 |             path: path
115 |   node
116 | 
117 | fuse = (context, sourceDir, targetDir) ->
118 |   unless fs.existsSync sourceDir
119 |     throw new Error 'Source directory does not exist: ' + sourceDir
120 | 
121 |   unless fs.statSync(sourceDir).isDirectory()
122 |     throw new Error 'Not a directory: ' + sourceDir
123 | 
124 |   tree = walkSources sourceDir, sourceDir, {}
125 | 
126 |   tree.find = (path) ->
127 |     node = tree
128 |     for slug in path
129 |       unless node = node[slug]
130 |         return null
131 |     node
132 | 
133 |   console.log 'Parsing files...'
134 | 
135 |   forEachPage tree, (parent, item) ->
136 |     sourcePath = fp.join sourceDir, item.src.path
137 |     if isMarkdown item.src.ext
138 |       console.log 'Parsing ' + sourcePath
139 |       content = readFile sourcePath
140 |       if content[0 ... 3] is '---'
141 |         result = content.match /^-{3,}\s([\s\S]*?)-{3,}(\s[\s\S]*|\s?)$/
142 |         if result?.length is 3
143 |           [ match, metadata, markdown ] = result
144 |         else
145 |           markdown = content
146 |       else
147 |         markdown = content
148 | 
149 |       if metadata
150 |         properties = yaml.safeLoad metadata
151 |         for k, v of properties
152 |           item[k] = if k is 'date' then new Date v else v
153 | 
154 |       if markdown
155 |         item.content = marked markdown
156 |       
157 |     else if isYaml item.src.ext
158 |       console.log 'Parsing ' + sourcePath
159 |       item.content = yaml.safeLoad readFile sourcePath
160 | 
161 |   console.log 'Building site...'
162 | 
163 |   forEachPage tree, (parent, page) ->
164 |     if isMarkdown page.src.ext
165 |       if page.src.slug[0] isnt '_'
166 |         console.log 'Processing: ' + page.src.path
167 |         template = loadTemplate fp.join sourceDir, '_templates', "#{page.template or 'default'}.jade" #TODO
168 |         html = template
169 |           context: context
170 |           pages: tree
171 |           page: page
172 | 
173 |         binPath = fp.join targetDir, page.bin.path
174 |         console.log "#{page.src.path} --> #{binPath}"
175 |         writeFile binPath, html unless SIM
176 | 
177 |     else if isJade page.src.ext 
178 |       if page.src.slug[0] isnt '_'
179 |         console.log 'Processing: ' + page.src.path
180 |         
181 |         sourcePath = fp.join sourceDir, page.src.path
182 |         render = loadTemplate sourcePath, no
183 |         page.content = render
184 |           context: context
185 |           pages: tree
186 |           page: page
187 | 
188 |         template = loadTemplate fp.join sourceDir, '_templates', 'default.jade' #TODO
189 |         html = template
190 |           context: context
191 |           pages: tree
192 |           page: page
193 |           
194 |         binPath = fp.join targetDir, page.bin.path
195 |         console.log "#{page.src.path} --> #{binPath}"
196 |         writeFile binPath, html unless SIM
197 |     else
198 |       console.log 'Copying: ' + page.src.path
199 |       srcPath = fp.join sourceDir, page.src.path
200 |       binPath = fp.join targetDir, page.bin.path
201 |       console.log "#{srcPath} --> #{binPath}"
202 |       copyFile srcPath, binPath unless SIM
203 | 
204 |   if SIM
205 |     console.log 'Dumping...'
206 |     writeFile 'fuse.dump', JSON.stringify tree, null, 2 
207 | 
208 | 
209 |   urls = []
210 |   forEachPage tree, (parent, page) ->
211 |     if isContent page.src.ext
212 |       urls.push 'http://train.h2o.ai' + page.url
213 | 
214 |   console.log 'Creating sitemaps...'
215 |   createSitemapTxt targetDir, urls
216 |   createSitemapXml targetDir, urls
217 | 
218 |   console.log 'Done!'
219 | 
220 | [ runtime, script, sourceDir, targetDir ] = process.argv
221 | 
222 | context =
223 |   underscore: underscore
224 |   formatDate: dateformat
225 | 
226 | fuse context, sourceDir, targetDir
227 | 
228 | return
229 | 


--------------------------------------------------------------------------------
/tools/server.coffee:
--------------------------------------------------------------------------------
1 | connect = require 'connect'
2 | serveStatic = require 'serve-static'
3 | connect()
4 |   .use serveStatic process.argv[2]
5 |   .listen 8080
6 | 


--------------------------------------------------------------------------------
/tools/site-src/404.jade:
--------------------------------------------------------------------------------
1 | h1 Whoops
2 | h3 Looks like what you're looking for can't be found.
3 | 


--------------------------------------------------------------------------------
/tools/site-src/_templates/default.jade:
--------------------------------------------------------------------------------
 1 | <!--[if IE 8]><html id="ie8" lang="en"><![endif]-->
 2 | <!--[if IE 9]><html id="ie9" lang="en"><![endif]-->
 3 | <!--[if !IE]><!-->
 4 | html(lang="en")
 5 |   <!--<![endif]-->
 6 |   head
 7 |     include head.jade
 8 | 
 9 |   body.single
10 |     main.content
11 |       include header.jade
12 | 
13 |       .container(class='path-#{page.path[0]}')
14 |         if page.title
15 |           h1= page.title
16 |         != page.content
17 |       
18 |     include footer.jade
19 |     include scripts.jade
20 | 
21 | 


--------------------------------------------------------------------------------
/tools/site-src/_templates/footer.jade:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tools/site-src/_templates/footer.jade


--------------------------------------------------------------------------------
/tools/site-src/_templates/head.jade:
--------------------------------------------------------------------------------
 1 | meta(charset="UTF-8")
 2 | meta(name='viewport', content='width=device-width, initial-scale=1.0')
 3 | - var pageTitle = page.title
 4 | title!= '0xdata' + (pageTitle ? ' - ' + pageTitle : '')
 5 | meta(name='description', content='0xdata, makers of H2O - The Open Source In-Memory Prediction Engine for Big Data Science')
 6 | link(rel='publisher', href='https://plus.google.com/109486779212435464374/')
 7 | meta(property='og:title', content='0xdata, makers of H2O - The Open Source In-Memory Prediction Engine for Big Data Science')
 8 | meta(property='og:type', content='article')
 9 | meta(property='og:image', content='http://0xdata.com/assets/images/h2o.png')
10 | meta(property='og:url', content='http://0xdata.com')
11 | meta(property='og:description', content='H2O makes Hadoop do Math! H2O scales statistics, machine learning and math over Big Data.  H2O keeps familiar interfaces like R, Excel & JSON so that big data enthusiasts and experts can explore, munge, model and score data sets using a range of simple to advanced algorithms.')
12 | link(rel='shortcut icon', href='/img/favicon.ico')
13 | link(rel='stylesheet', type='text/css', href='/assets/vendor/bootstrap/css/bootstrap.min.css', media='screen')
14 | link(rel='stylesheet', type='text/css', href='/assets/styles/icomoon.css', media='screen')
15 | link(rel='stylesheet', type='text/css', href='/assets/styles/font-awesome.min.css', media='screen')
16 | link(rel='stylesheet', type='text/css', href='/assets/styles/screen.css', media='screen')
17 | link(rel='stylesheet', type='text/css', href='/assets/styles/github.css', media='screen')
18 | 
19 | 
20 | //- Remove this TypeKit is used 
21 | //link(rel='stylesheet', type='text/css', href='/assets/styles/futura-mock.css', media='screen')
22 | //- /end Remove this TypeKit is used 
23 | 
24 | <!-- TypeKit
25 | script(type='text/javascript' src='//use.typekit.net/wcb1ppg.js')
26 | script(type='text/javascript').
27 |   try{Typekit.load();}catch(e){}
28 | /end TypeKit -->
29 | 
30 | <!--[if lte IE 8]>
31 | script(type='text/javascript').
32 |   (function() {
33 |     var html5elements = "address|article|aside|audio|canvas|command|datalist|details|dialog|figure|figcaption|footer|header|hgroup|keygen|mark|main|meter|menu|nav|progress|ruby|section|time|video".split('|');
34 |     for(var i = 0; i < html5elements.length; i++) {
35 |       document.createElement(html5elements[i]);
36 |     }
37 |   })();
38 | script(type='text/javascript', src='/assets/vendor/respond.min.js')
39 | <![endif]-->
40 | 
41 | 


--------------------------------------------------------------------------------
/tools/site-src/_templates/header.jade:
--------------------------------------------------------------------------------
1 | header.content-header
2 |   .site-logo
3 |   .container
4 |   
5 | 


--------------------------------------------------------------------------------
/tools/site-src/_templates/scripts.jade:
--------------------------------------------------------------------------------
 1 | script(type='text/javascript', src='/assets/vendor/jquery-1.11.1.min.js')
 2 | script(type='text/javascript', src='/assets/vendor/modernizr.min.js')
 3 | 
 4 | <!--[if lte IE 8]>
 5 | script(type='text/javascript', src='/assets/vendor/selectivizr.min.js')
 6 | <![endif]-->
 7 | 
 8 | script(type='text/javascript', src='/assets/vendor/bootstrap/js/bootstrap.min.js')
 9 | 
10 | script(type='text/javascript').
11 |   Modernizr.load([
12 |     {
13 |       test: (Modernizr.touch),
14 |       yep: ['/assets/vendor/fastclick.min.js'],
15 |       complete: function() {
16 |         if (Modernizr.touch) {
17 |           $('body').append("<script type=\"text/javascript\">$(function() { FastClick.attach(document.body); });<\/script>");
18 |         }
19 |       }
20 |     }
21 |   ]);
22 | 
23 | <!--[if lte IE 9]>
24 | script(type='text/javascript', src='/assets/vendor/jquery.html5support.min.js')
25 | script(type='text/javascript').
26 |   $(document).ready(function() { $.placeholder(); });
27 | <![endif]-->
28 | 
29 | 
30 | script(type='text/javascript', src='/assets/vendor/imagesloaded.pkgd.js')
31 | script(type='text/javascript', src='/assets/vendor/jquery.touchwipe.min.js')
32 | 
33 | 


--------------------------------------------------------------------------------
/tools/site-src/assets/images/h2o-home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tools/site-src/assets/images/h2o-home.png


--------------------------------------------------------------------------------
/tools/site-src/assets/styles/futura-mock.css:
--------------------------------------------------------------------------------
  1 | @font-face {
  2 |     font-family: 'futura_ltlight';
  3 |     src: url('../fonts/futuralt-light-webfont.eot');
  4 |     src: url('../fonts/futuralt-light-webfont.eot?#iefix') format('embedded-opentype'),
  5 |          url('../fonts/futuralt-light-webfont.woff') format('woff'),
  6 |          url('../fonts/futuralt-light-webfont.ttf') format('truetype'),
  7 |          url('../fonts/futuralt-light-webfont.svg#futura_ltlight') format('svg');
  8 |     font-weight: normal;
  9 |     font-style: normal;
 10 | }
 11 | 
 12 | @font-face {
 13 |     font-family: 'futura_ltregular';
 14 |     src: url('../fonts/futuralt-webfont.eot');
 15 |     src: url('../fonts/futuralt-webfont.eot?#iefix') format('embedded-opentype'),
 16 |          url('../fonts/futuralt-webfont.woff') format('woff'),
 17 |          url('../fonts/futuralt-webfont.ttf') format('truetype'),
 18 |          url('../fonts/futuralt-webfont.svg#futura_ltregular') format('svg');
 19 |     font-weight: normal;
 20 |     font-style: normal;
 21 | }
 22 | 
 23 | @font-face {
 24 |     font-family: 'futura_ltbold';
 25 |     src: url('../fonts/futuralt-bold-webfont.eot');
 26 |     src: url('../fonts/futuralt-bold-webfont.eot?#iefix') format('embedded-opentype'),
 27 |          url('../fonts/futuralt-bold-webfont.woff') format('woff'),
 28 |          url('../fonts/futuralt-bold-webfont.ttf') format('truetype'),
 29 |          url('../fonts/futuralt-bold-webfont.svg#futura_ltbold') format('svg');
 30 |     font-weight: normal;
 31 |     font-style: normal;
 32 | }
 33 | 
 34 | body,
 35 | h1, h2, h3, h4, h5, h6,
 36 | .h1, .h2, .h3, .h4, .h5, .h6
 37 | {
 38 |     font-family:  'futura_ltregular', sans-serif;
 39 | }
 40 | 
 41 | h2, h3, h4, h5, h6
 42 | {
 43 |     font-family:  'futura_ltlight', sans-serif;
 44 | }
 45 | 
 46 | section header h2 {
 47 |     font-family: 'futura_ltlight';
 48 | }
 49 | 
 50 | #Promo .jumbotron p {
 51 |     font-family: 'futura_ltlight';
 52 | }
 53 | 
 54 | .team-description h4 {
 55 |     font-family: 'futura_ltlight';
 56 | }
 57 | 
 58 | .team-description .close {
 59 |     font-family: 'futura_ltlight';
 60 | }
 61 | 
 62 | .advisors-item p {
 63 |     font-family: 'futura_ltlight';
 64 | }
 65 | 
 66 | #Contact .form .form-control {
 67 |     font-family: 'futura_ltlight';
 68 | }
 69 | 
 70 | #Promo .jumbotron h1 {
 71 |     font-family: 'futura_ltbold';
 72 | }
 73 | #Promo .jumbotron h2 {
 74 |     font-family: 'futura_ltlight';
 75 | }
 76 | 
 77 | .content-header h1 {
 78 |     font-family: 'futura_ltlight';
 79 | }
 80 | .customers-list-item h2 {
 81 |     font-family: 'futura_ltlight';
 82 | }
 83 | .customers-list-item .image .cover .label {
 84 |     font-family: 'futura_ltbold';
 85 | }
 86 | 
 87 | #News nav {
 88 |     font-family: 'futura_ltlight';
 89 | }
 90 | #News .tab-content a .image .cover .label {
 91 |     font-family: 'futura_ltbold';
 92 | }
 93 | 
 94 | #Why_H2O h1 {
 95 |     font-family: 'futura_ltlight';
 96 | }
 97 | #Why_H2O .modal-trigger .image .cover .label {
 98 |     font-family: 'futura_ltbold';
 99 | }
100 | 


--------------------------------------------------------------------------------
/tools/site-src/assets/styles/github.css:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | github.com style (c) Vasily Polovnyov <vast@whiteants.net>
  4 | 
  5 | */
  6 | 
  7 | .hljs {
  8 |   display: block;
  9 |   overflow-x: auto;
 10 |   padding: 0.5em;
 11 |   color: #333;
 12 |   background: #f8f8f8;
 13 |   -webkit-text-size-adjust: none;
 14 | }
 15 | 
 16 | .hljs-comment,
 17 | .hljs-template_comment,
 18 | .diff .hljs-header,
 19 | .hljs-javadoc {
 20 |   color: #998;
 21 |   font-style: italic;
 22 | }
 23 | 
 24 | .hljs-keyword,
 25 | .css .rule .hljs-keyword,
 26 | .hljs-winutils,
 27 | .javascript .hljs-title,
 28 | .nginx .hljs-title,
 29 | .hljs-subst,
 30 | .hljs-request,
 31 | .hljs-status {
 32 |   color: #333;
 33 |   font-weight: bold;
 34 | }
 35 | 
 36 | .hljs-number,
 37 | .hljs-hexcolor,
 38 | .ruby .hljs-constant {
 39 |   color: #008080;
 40 | }
 41 | 
 42 | .hljs-string,
 43 | .hljs-tag .hljs-value,
 44 | .hljs-phpdoc,
 45 | .hljs-dartdoc,
 46 | .tex .hljs-formula {
 47 |   color: #d14;
 48 | }
 49 | 
 50 | .hljs-title,
 51 | .hljs-id,
 52 | .scss .hljs-preprocessor {
 53 |   color: #900;
 54 |   font-weight: bold;
 55 | }
 56 | 
 57 | .javascript .hljs-title,
 58 | .hljs-list .hljs-keyword,
 59 | .hljs-subst {
 60 |   font-weight: normal;
 61 | }
 62 | 
 63 | .hljs-class .hljs-title,
 64 | .hljs-type,
 65 | .vhdl .hljs-literal,
 66 | .tex .hljs-command {
 67 |   color: #458;
 68 |   font-weight: bold;
 69 | }
 70 | 
 71 | .hljs-tag,
 72 | .hljs-tag .hljs-title,
 73 | .hljs-rules .hljs-property,
 74 | .django .hljs-tag .hljs-keyword {
 75 |   color: #000080;
 76 |   font-weight: normal;
 77 | }
 78 | 
 79 | .hljs-attribute,
 80 | .hljs-variable,
 81 | .lisp .hljs-body {
 82 |   color: #008080;
 83 | }
 84 | 
 85 | .hljs-regexp {
 86 |   color: #009926;
 87 | }
 88 | 
 89 | .hljs-symbol,
 90 | .ruby .hljs-symbol .hljs-string,
 91 | .lisp .hljs-keyword,
 92 | .clojure .hljs-keyword,
 93 | .scheme .hljs-keyword,
 94 | .tex .hljs-special,
 95 | .hljs-prompt {
 96 |   color: #990073;
 97 | }
 98 | 
 99 | .hljs-built_in {
100 |   color: #0086b3;
101 | }
102 | 
103 | .hljs-preprocessor,
104 | .hljs-pragma,
105 | .hljs-pi,
106 | .hljs-doctype,
107 | .hljs-shebang,
108 | .hljs-cdata {
109 |   color: #999;
110 |   font-weight: bold;
111 | }
112 | 
113 | .hljs-deletion {
114 |   background: #fdd;
115 | }
116 | 
117 | .hljs-addition {
118 |   background: #dfd;
119 | }
120 | 
121 | .diff .hljs-change {
122 |   background: #0086b3;
123 | }
124 | 
125 | .hljs-chunk {
126 |   color: #aaa;
127 | }
128 | 


--------------------------------------------------------------------------------
/tools/site-src/assets/styles/icomoon.css:
--------------------------------------------------------------------------------
 1 | @font-face {
 2 | 	font-family: 'icomoon';
 3 | 	src:url('../fonts/icomoon.eot');
 4 | 	src:url('../fonts/icomoon.eot?#iefix') format('embedded-opentype'),
 5 | 		url('../fonts/icomoon.woff') format('woff'),
 6 | 		url('../fonts/icomoon.ttf') format('truetype'),
 7 | 		url('../fonts/icomoon.svg#icomoon') format('svg');
 8 | 	font-weight: normal;
 9 | 	font-style: normal;
10 | }
11 | 
12 | [class^="icon-"], [class*=" icon-"] {
13 | 	font-family: 'icomoon';
14 | 	speak: none;
15 | 	font-style: normal;
16 | 	font-weight: normal;
17 | 	font-variant: normal;
18 | 	text-transform: none;
19 | 	line-height: 1;
20 | 
21 | 	/* Better Font Rendering =========== */
22 | 	-webkit-font-smoothing: antialiased;
23 | 	-moz-osx-font-smoothing: grayscale;
24 | }
25 | 
26 | .icon-play:before {
27 | 	content: "\e60b";
28 | }
29 | .icon-meetup:before {
30 | 	content: "\e609";
31 | }
32 | .icon-facebook:before {
33 | 	content: "\e607";
34 | }
35 | .icon-youtube:before {
36 | 	content: "\e600";
37 | }
38 | .icon-twitter:before {
39 | 	content: "\e601";
40 | }
41 | .icon-arrow-down:before {
42 | 	content: "\e60a";
43 | }
44 | .icon-arrow-up:before {
45 | 	content: "\e602";
46 | }
47 | .icon-pinterest:before {
48 | 	content: "\e603";
49 | }
50 | .icon-google-plus:before {
51 | 	content: "\e604";
52 | }
53 | .icon-github:before {
54 | 	content: "\e605";
55 | }
56 | .icon-arrow-right:before {
57 | 	content: "\e608";
58 | }
59 | .icon-arrow-left:before {
60 | 	content: "\e606";
61 | }
62 | 


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/bootstrap/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tools/site-src/assets/vendor/bootstrap/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/bootstrap/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tools/site-src/assets/vendor/bootstrap/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/bootstrap/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tools/site-src/assets/vendor/bootstrap/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/fastclick.min.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |  FastClick: polyfill to remove click delays on browsers with touch UIs.
 3 | 
 4 |  @version 0.6.11
 5 |  @codingstandard ftlabs-jsv2
 6 |  @copyright The Financial Times Limited [All Rights Reserved]
 7 |  @license MIT License (see LICENSE.txt)
 8 | */
 9 | function FastClick(a){var c,b=this;this.trackingClick=!1;this.trackingClickStart=0;this.targetElement=null;this.lastTouchIdentifier=this.touchStartY=this.touchStartX=0;this.touchBoundary=10;this.layer=a;if(!a||!a.nodeType)throw new TypeError("Layer must be a document node");this.onClick=function(){return FastClick.prototype.onClick.apply(b,arguments)};this.onMouse=function(){return FastClick.prototype.onMouse.apply(b,arguments)};this.onTouchStart=function(){return FastClick.prototype.onTouchStart.apply(b,
10 | arguments)};this.onTouchMove=function(){return FastClick.prototype.onTouchMove.apply(b,arguments)};this.onTouchEnd=function(){return FastClick.prototype.onTouchEnd.apply(b,arguments)};this.onTouchCancel=function(){return FastClick.prototype.onTouchCancel.apply(b,arguments)};FastClick.notNeeded(a)||(this.deviceIsAndroid&&(a.addEventListener("mouseover",this.onMouse,!0),a.addEventListener("mousedown",this.onMouse,!0),a.addEventListener("mouseup",this.onMouse,!0)),a.addEventListener("click",this.onClick,
11 | !0),a.addEventListener("touchstart",this.onTouchStart,!1),a.addEventListener("touchmove",this.onTouchMove,!1),a.addEventListener("touchend",this.onTouchEnd,!1),a.addEventListener("touchcancel",this.onTouchCancel,!1),Event.prototype.stopImmediatePropagation||(a.removeEventListener=function(b,c,e){var f=Node.prototype.removeEventListener;"click"===b?f.call(a,b,c.hijacked||c,e):f.call(a,b,c,e)},a.addEventListener=function(b,c,e){var f=Node.prototype.addEventListener;"click"===b?f.call(a,b,c.hijacked||
12 | (c.hijacked=function(a){a.propagationStopped||c(a)}),e):f.call(a,b,c,e)}),"function"===typeof a.onclick&&(c=a.onclick,a.addEventListener("click",function(a){c(a)},!1),a.onclick=null))}FastClick.prototype.deviceIsAndroid=0<navigator.userAgent.indexOf("Android");FastClick.prototype.deviceIsIOS=/iP(ad|hone|od)/.test(navigator.userAgent);FastClick.prototype.deviceIsIOS4=FastClick.prototype.deviceIsIOS&&/OS 4_\d(_\d)?/.test(navigator.userAgent);
13 | FastClick.prototype.deviceIsIOSWithBadTarget=FastClick.prototype.deviceIsIOS&&/OS ([6-9]|\d{2})_\d/.test(navigator.userAgent);FastClick.prototype.needsClick=function(a){switch(a.nodeName.toLowerCase()){case "button":case "select":case "textarea":if(a.disabled)return!0;break;case "input":if(this.deviceIsIOS&&"file"===a.type||a.disabled)return!0;break;case "label":case "video":return!0}return/\bneedsclick\b/.test(a.className)};
14 | FastClick.prototype.needsFocus=function(a){switch(a.nodeName.toLowerCase()){case "textarea":return!0;case "select":return!this.deviceIsAndroid;case "input":switch(a.type){case "button":case "checkbox":case "file":case "image":case "radio":case "submit":return!1}return!a.disabled&&!a.readOnly;default:return/\bneedsfocus\b/.test(a.className)}};
15 | FastClick.prototype.sendClick=function(a,c){var b,d;document.activeElement&&document.activeElement!==a&&document.activeElement.blur();d=c.changedTouches[0];b=document.createEvent("MouseEvents");b.initMouseEvent(this.determineEventType(a),!0,!0,window,1,d.screenX,d.screenY,d.clientX,d.clientY,!1,!1,!1,!1,0,null);b.forwardedTouchEvent=!0;a.dispatchEvent(b)};FastClick.prototype.determineEventType=function(a){return this.deviceIsAndroid&&"select"===a.tagName.toLowerCase()?"mousedown":"click"};
16 | FastClick.prototype.focus=function(a){var c;this.deviceIsIOS&&a.setSelectionRange&&0!==a.type.indexOf("date")&&"time"!==a.type?(c=a.value.length,a.setSelectionRange(c,c)):a.focus()};FastClick.prototype.updateScrollParent=function(a){var c,b;c=a.fastClickScrollParent;if(!c||!c.contains(a)){b=a;do{if(b.scrollHeight>b.offsetHeight){c=b;a.fastClickScrollParent=b;break}b=b.parentElement}while(b)}c&&(c.fastClickLastScrollTop=c.scrollTop)};
17 | FastClick.prototype.getTargetElementFromEventTarget=function(a){return a.nodeType===Node.TEXT_NODE?a.parentNode:a};
18 | FastClick.prototype.onTouchStart=function(a){var c,b,d;if(1<a.targetTouches.length)return!0;c=this.getTargetElementFromEventTarget(a.target);b=a.targetTouches[0];if(this.deviceIsIOS){d=window.getSelection();if(d.rangeCount&&!d.isCollapsed)return!0;if(!this.deviceIsIOS4){if(b.identifier===this.lastTouchIdentifier)return a.preventDefault(),!1;this.lastTouchIdentifier=b.identifier;this.updateScrollParent(c)}}this.trackingClick=!0;this.trackingClickStart=a.timeStamp;this.targetElement=c;this.touchStartX=
19 | b.pageX;this.touchStartY=b.pageY;200>a.timeStamp-this.lastClickTime&&a.preventDefault();return!0};FastClick.prototype.touchHasMoved=function(a){a=a.changedTouches[0];var c=this.touchBoundary;return Math.abs(a.pageX-this.touchStartX)>c||Math.abs(a.pageY-this.touchStartY)>c?!0:!1};FastClick.prototype.onTouchMove=function(a){if(!this.trackingClick)return!0;if(this.targetElement!==this.getTargetElementFromEventTarget(a.target)||this.touchHasMoved(a))this.trackingClick=!1,this.targetElement=null;return!0};
20 | FastClick.prototype.findControl=function(a){return void 0!==a.control?a.control:a.htmlFor?document.getElementById(a.htmlFor):a.querySelector("button, input:not([type=hidden]), keygen, meter, output, progress, select, textarea")};
21 | FastClick.prototype.onTouchEnd=function(a){var c,b,d=this.targetElement;if(!this.trackingClick)return!0;if(200>a.timeStamp-this.lastClickTime)return this.cancelNextClick=!0;this.cancelNextClick=!1;this.lastClickTime=a.timeStamp;c=this.trackingClickStart;this.trackingClick=!1;this.trackingClickStart=0;this.deviceIsIOSWithBadTarget&&(b=a.changedTouches[0],d=document.elementFromPoint(b.pageX-window.pageXOffset,b.pageY-window.pageYOffset)||d,d.fastClickScrollParent=this.targetElement.fastClickScrollParent);
22 | b=d.tagName.toLowerCase();if("label"===b){if(c=this.findControl(d)){this.focus(d);if(this.deviceIsAndroid)return!1;d=c}}else if(this.needsFocus(d)){if(100<a.timeStamp-c||this.deviceIsIOS&&window.top!==window&&"input"===b)return this.targetElement=null,!1;this.focus(d);this.deviceIsIOS4&&"select"===b||(this.targetElement=null,a.preventDefault());return!1}if(this.deviceIsIOS&&!this.deviceIsIOS4&&(c=d.fastClickScrollParent)&&c.fastClickLastScrollTop!==c.scrollTop)return!0;this.needsClick(d)||(a.preventDefault(),
23 | this.sendClick(d,a));return!1};FastClick.prototype.onTouchCancel=function(){this.trackingClick=!1;this.targetElement=null};FastClick.prototype.onMouse=function(a){return this.targetElement&&!a.forwardedTouchEvent&&a.cancelable?!this.needsClick(this.targetElement)||this.cancelNextClick?(a.stopImmediatePropagation?a.stopImmediatePropagation():a.propagationStopped=!0,a.stopPropagation(),a.preventDefault(),!1):!0:!0};
24 | FastClick.prototype.onClick=function(a){if(this.trackingClick)return this.targetElement=null,this.trackingClick=!1,!0;if("submit"===a.target.type&&0===a.detail)return!0;a=this.onMouse(a);a||(this.targetElement=null);return a};
25 | FastClick.prototype.destroy=function(){var a=this.layer;this.deviceIsAndroid&&(a.removeEventListener("mouseover",this.onMouse,!0),a.removeEventListener("mousedown",this.onMouse,!0),a.removeEventListener("mouseup",this.onMouse,!0));a.removeEventListener("click",this.onClick,!0);a.removeEventListener("touchstart",this.onTouchStart,!1);a.removeEventListener("touchmove",this.onTouchMove,!1);a.removeEventListener("touchend",this.onTouchEnd,!1);a.removeEventListener("touchcancel",this.onTouchCancel,!1)};
26 | FastClick.notNeeded=function(a){var c,b;if("undefined"===typeof window.ontouchstart)return!0;if(b=+(/Chrome\/([0-9]+)/.exec(navigator.userAgent)||[,0])[1])if(FastClick.prototype.deviceIsAndroid){if((c=document.querySelector("meta[name=viewport]"))&&(-1!==c.content.indexOf("user-scalable=no")||31<b&&window.innerWidth<=window.screen.width))return!0}else return!0;return"none"===a.style.msTouchAction?!0:!1};FastClick.attach=function(a){return new FastClick(a)};
27 | "undefined"!==typeof define&&define.amd?define(function(){return FastClick}):"undefined"!==typeof module&&module.exports?(module.exports=FastClick.attach,module.exports.FastClick=FastClick):window.FastClick=FastClick;
28 | 


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/jquery.actual.min.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2012, Ben Lin (http://dreamerslab.com/)
2 |  * Licensed under the MIT License (LICENSE.txt).
3 |  *
4 |  * Version: 1.0.15
5 |  *
6 |  * Requires: jQuery >= 1.2.3
7 |  */
8 | (function(a){a.fn.addBack=a.fn.addBack||a.fn.andSelf;a.fn.extend({actual:function(b,l){if(!this[b]){throw'$.actual => The jQuery method "'+b+'" you called does not exist';}var f={absolute:false,clone:false,includeMargin:false};var i=a.extend(f,l);var e=this.eq(0);var h,j;if(i.clone===true){h=function(){var m="position: absolute !important; top: -1000 !important; ";e=e.clone().attr("style",m).appendTo("body");};j=function(){e.remove();};}else{var g=[];var d="";var c;h=function(){c=e.parents().addBack().filter(":hidden");d+="visibility: hidden !important; display: block !important; ";if(i.absolute===true){d+="position: absolute !important; ";}c.each(function(){var m=a(this);g.push(m.attr("style"));m.attr("style",d);});};j=function(){c.each(function(m){var o=a(this);var n=g[m];if(n===undefined){o.removeAttr("style");}else{o.attr("style",n);}});};}h();var k=/(outer)/.test(b)?e[b](i.includeMargin):e[b]();j();return k;}});})(jQuery);


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/jquery.html5support.min.js:
--------------------------------------------------------------------------------
1 | /*
2 |  * http://github.com/amiel/html5support
3 |  * Amiel Martin
4 |  * 2010-01-26
5 |  *
6 |  * Support certain HTML 5 attributes with javascript, but only if the browser doesn't already support them.
7 |  */
8 | 
9 | var HTML5Support=function(a){function e(){var d=a(this),e=d.attr(b)+"",f=function(){if(a.trim(d.val())==""||d.val()==e)d.val(e).addClass(c)},g=function(){if(d.val()==e)d.val("").removeClass(c)};d.focus(g).blur(f).blur()}function f(){var d=a(this),e=d.attr(b),f=a('<input type="text">').val(e).addClass(c).addClass(d.attr("class")).css("display","none");set_value=function(){if(a.trim(d.val())==""){f.show();d.hide()}},clear_value=function(){f.hide();d.show().focus()};d.after(f);f.focus(clear_value);d.blur(set_value).blur()}var b="placeholder",c=b,d={};a.extend(d,{supports_attribute:function(a,b){return a in document.createElement(b||"input")}});a.fn.placeholder=function(b){if(d.supports_attribute("placeholder"))return this;return this.each(function(){a(this).attr("type")=="password"?f.apply(this):e.apply(this)})};a.fn.autofocus=function(){if(d.supports_attribute("autofocus"))return this;return this.focus()};a.autofocus=function(){a("[autofocus]:visible").autofocus()};a.placeholder=function(){a("["+b+"]").placeholder()};a.html5support=function(){a.autofocus();a.placeholder()};return d}(jQuery)


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/jquery.slimscroll.min.js:
--------------------------------------------------------------------------------
 1 | /*! Copyright (c) 2011 Piotr Rochala (http://rocha.la)
 2 |  * Dual licensed under the MIT (http://www.opensource.org/licenses/mit-license.php)
 3 |  * and GPL (http://www.opensource.org/licenses/gpl-license.php) licenses.
 4 |  *
 5 |  * Version: 1.3.2
 6 |  *
 7 |  */
 8 | (function(f){jQuery.fn.extend({slimScroll:function(g){var a=f.extend({width:"auto",height:"250px",size:"7px",color:"#000",position:"right",distance:"1px",start:"top",opacity:0.4,alwaysVisible:!1,disableFadeOut:!1,railVisible:!1,railColor:"#333",railOpacity:0.2,railDraggable:!0,railClass:"slimScrollRail",barClass:"slimScrollBar",wrapperClass:"slimScrollDiv",allowPageScroll:!1,wheelStep:20,touchScrollStep:200,borderRadius:"7px",railBorderRadius:"7px"},g);this.each(function(){function u(d){if(r){d=d||
 9 | window.event;var c=0;d.wheelDelta&&(c=-d.wheelDelta/120);d.detail&&(c=d.detail/3);f(d.target||d.srcTarget||d.srcElement).closest("."+a.wrapperClass).is(b.parent())&&m(c,!0);d.preventDefault&&!k&&d.preventDefault();k||(d.returnValue=!1)}}function m(d,f,g){k=!1;var e=d,h=b.outerHeight()-c.outerHeight();f&&(e=parseInt(c.css("top"))+d*parseInt(a.wheelStep)/100*c.outerHeight(),e=Math.min(Math.max(e,0),h),e=0<d?Math.ceil(e):Math.floor(e),c.css({top:e+"px"}));l=parseInt(c.css("top"))/(b.outerHeight()-c.outerHeight());
10 | e=l*(b[0].scrollHeight-b.outerHeight());g&&(e=d,d=e/b[0].scrollHeight*b.outerHeight(),d=Math.min(Math.max(d,0),h),c.css({top:d+"px"}));b.scrollTop(e);b.trigger("slimscrolling",~~e);v();p()}function C(){window.addEventListener?(this.addEventListener("DOMMouseScroll",u,!1),this.addEventListener("mousewheel",u,!1)):document.attachEvent("onmousewheel",u)}function w(){s=Math.max(b.outerHeight()/b[0].scrollHeight*b.outerHeight(),D);c.css({height:s+"px"});var a=s==b.outerHeight()?"none":"block";c.css({display:a})}
11 | function v(){w();clearTimeout(A);l==~~l?(k=a.allowPageScroll,B!=l&&b.trigger("slimscroll",0==~~l?"top":"bottom")):k=!1;B=l;s>=b.outerHeight()?k=!0:(c.stop(!0,!0).fadeIn("fast"),a.railVisible&&h.stop(!0,!0).fadeIn("fast"))}function p(){a.alwaysVisible||(A=setTimeout(function(){a.disableFadeOut&&r||x||y||(c.fadeOut("slow"),h.fadeOut("slow"))},1E3))}var r,x,y,A,z,s,l,B,D=30,k=!1,b=f(this);if(b.parent().hasClass(a.wrapperClass)){var n=b.scrollTop(),c=b.parent().find("."+a.barClass),h=b.parent().find("."+
12 | a.railClass);w();if(f.isPlainObject(g)){if("height"in g&&"auto"==g.height){b.parent().css("height","auto");b.css("height","auto");var q=b.parent().parent().height();b.parent().css("height",q);b.css("height",q)}if("scrollTo"in g)n=parseInt(a.scrollTo);else if("scrollBy"in g)n+=parseInt(a.scrollBy);else if("destroy"in g){c.remove();h.remove();b.unwrap();return}m(n,!1,!0)}}else{a.height="auto"==g.height?b.parent().height():g.height;n=f("<div></div>").addClass(a.wrapperClass).css({position:"relative",
13 | overflow:"hidden",width:a.width,height:a.height});b.css({overflow:"hidden",width:a.width,height:a.height});var h=f("<div></div>").addClass(a.railClass).css({width:a.size,height:"100%",position:"absolute",top:0,display:a.alwaysVisible&&a.railVisible?"block":"none","border-radius":a.railBorderRadius,background:a.railColor,opacity:a.railOpacity,zIndex:90}),c=f("<div></div>").addClass(a.barClass).css({background:a.color,width:a.size,position:"absolute",top:0,opacity:a.opacity,display:a.alwaysVisible?
14 | "block":"none","border-radius":a.borderRadius,BorderRadius:a.borderRadius,MozBorderRadius:a.borderRadius,WebkitBorderRadius:a.borderRadius,zIndex:99}),q="right"==a.position?{right:a.distance}:{left:a.distance};h.css(q);c.css(q);b.wrap(n);b.parent().append(c);b.parent().append(h);a.railDraggable&&c.bind("mousedown",function(a){var b=f(document);y=!0;t=parseFloat(c.css("top"));pageY=a.pageY;b.bind("mousemove.slimscroll",function(a){currTop=t+a.pageY-pageY;c.css("top",currTop);m(0,c.position().top,!1)});
15 | b.bind("mouseup.slimscroll",function(a){y=!1;p();b.unbind(".slimscroll")});return!1}).bind("selectstart.slimscroll",function(a){a.stopPropagation();a.preventDefault();return!1});h.hover(function(){v()},function(){p()});c.hover(function(){x=!0},function(){x=!1});b.hover(function(){r=!0;v();p()},function(){r=!1;p()});b.bind("touchstart",function(a,b){a.originalEvent.touches.length&&(z=a.originalEvent.touches[0].pageY)});b.bind("touchmove",function(b){k||b.originalEvent.preventDefault();b.originalEvent.touches.length&&
16 | (m((z-b.originalEvent.touches[0].pageY)/a.touchScrollStep,!0),z=b.originalEvent.touches[0].pageY)});w();"bottom"===a.start?(c.css({top:b.outerHeight()-c.outerHeight()}),m(0,!0)):"top"!==a.start&&(m(f(a.start).position().top,null,!0),a.alwaysVisible||c.hide());C()}});return this}});jQuery.fn.extend({slimscroll:jQuery.fn.slimScroll})})(jQuery);


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/jquery.touchwipe.min.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * jQuery Plugin to obtain touch gestures from iPhone, iPod Touch and iPad, should also work with Android mobile phones (not tested yet!)
 3 |  * Common usage: wipe images (left and right to show the previous or next image)
 4 |  * 
 5 |  * @author Andreas Waltl, netCU Internetagentur (http://www.netcu.de)
 6 |  * @version 1.1.1 (9th December 2010) - fix bug (older IE's had problems)
 7 |  * @version 1.1 (1st September 2010) - support wipe up and wipe down
 8 |  * @version 1.0 (15th July 2010)
 9 |  */
10 | (function($){$.fn.touchwipe=function(settings){var config={min_move_x:20,min_move_y:20,wipeLeft:function(){},wipeRight:function(){},wipeUp:function(){},wipeDown:function(){},preventDefaultEvents:true};if(settings)$.extend(config,settings);this.each(function(){var startX;var startY;var isMoving=false;function cancelTouch(){this.removeEventListener('touchmove',onTouchMove);startX=null;isMoving=false}function onTouchMove(e){if(config.preventDefaultEvents){e.preventDefault()}if(isMoving){var x=e.touches[0].pageX;var y=e.touches[0].pageY;var dx=startX-x;var dy=startY-y;if(Math.abs(dx)>=config.min_move_x){cancelTouch();if(dx>0){config.wipeLeft()}else{config.wipeRight()}}else if(Math.abs(dy)>=config.min_move_y){cancelTouch();if(dy>0){config.wipeDown()}else{config.wipeUp()}}}}function onTouchStart(e){if(e.touches.length==1){startX=e.touches[0].pageX;startY=e.touches[0].pageY;isMoving=true;this.addEventListener('touchmove',onTouchMove,false)}}if('ontouchstart'in document.documentElement){this.addEventListener('touchstart',onTouchStart,false)}});return this}})(jQuery);


--------------------------------------------------------------------------------
/tools/site-src/assets/vendor/jquery.vticker.min.js:
--------------------------------------------------------------------------------
1 | /*! vTicker 1.14 http://richhollis.github.com/vticker/ | http://richhollis.github.com/vticker/license/ | based on Jubgits vTicker http://www.jugbit.com/jquery-vticker-vertical-news-ticker/ */
2 | /*! vTicker 1.13 http://richhollis.github.com/vticker/ | http://richhollis.github.com/vticker/license/ | based on Jubgits vTicker http://www.jugbit.com/jquery-vticker-vertical-news-ticker/ */
3 | (function(d){var m={speed:700,pause:4E3,showItems:1,mousePause:!0,height:0,animate:!0,margin:0,padding:0,startPaused:!1},c={moveUp:function(a,b){c.animate(a,b,"up")},moveDown:function(a,b){c.animate(a,b,"down")},animate:function(a,b,e){var c=a.itemHeight,f=a.options,g=a.element.children("ul"),k="up"===e?"li:first":"li:last",l=g.children(k).clone(!0);0<f.height&&(c=g.children("li:first").height());c+=f.margin+2*f.padding;"down"===e&&g.css("top","-"+c+"px").prepend(l);if(b&&b.animate){if(a.animating)return;
4 | a.animating=!0;g.animate("up"===e?{top:"-="+c+"px"}:{top:0},f.speed,function(){d(g).children(k).remove();d(g).css("top","0px");a.animating=!1})}else g.children(k).remove(),g.css("top","0px");"up"===e&&l.appendTo(g)},nextUsePause:function(){var a=d(this).data("state"),b=a.options;a.isPaused||2>a.itemCount||f.next.call(this,{animate:b.animate})},startInterval:function(){var a=d(this).data("state"),b=this;a.intervalId=setInterval(function(){c.nextUsePause.call(b)},a.options.pause)},stopInterval:function(){var a=
5 | d(this).data("state");a&&(a.intervalId&&clearInterval(a.intervalId),a.intervalId=void 0)},restartInterval:function(){c.stopInterval.call(this);c.startInterval.call(this)}},f={init:function(a){f.stop.call(this);var b=jQuery.extend({},m);a=d.extend(b,a);var b=d(this),e={itemCount:b.children("ul").children("li").length,itemHeight:0,itemMargin:0,element:b,animating:!1,options:a,isPaused:a.startPaused?!0:!1,pausedByCode:!1};d(this).data("state",e);b.css({overflow:"hidden",position:"relative"}).children("ul").css({position:"absolute",
6 | margin:0,padding:0}).children("li").css({margin:a.margin,padding:a.padding});isNaN(a.height)||0===a.height?(b.children("ul").children("li").each(function(){var a=d(this);a.height()>e.itemHeight&&(e.itemHeight=a.height())}),b.children("ul").children("li").each(function(){d(this).height(e.itemHeight)}),b.height((e.itemHeight+(a.margin+2*a.padding))*a.showItems+a.margin)):b.height(a.height);var h=this;a.startPaused||c.startInterval.call(h);a.mousePause&&b.bind("mouseenter",function(){!0!==e.isPaused&&
7 | (e.pausedByCode=!0,c.stopInterval.call(h),f.pause.call(h,!0))}).bind("mouseleave",function(){if(!0!==e.isPaused||e.pausedByCode)e.pausedByCode=!1,f.pause.call(h,!1),c.startInterval.call(h)})},pause:function(a){var b=d(this).data("state");if(b){if(2>b.itemCount)return!1;(b.isPaused=a)?d(this).addClass("paused"):d(this).removeClass("paused")}},next:function(a){var b=d(this).data("state");if(b){if(b.animating||2>b.itemCount)return!1;c.restartInterval.call(this);c.moveUp(b,a)}},prev:function(a){var b=
8 | d(this).data("state");if(b){if(b.animating||2>b.itemCount)return!1;c.restartInterval.call(this);c.moveDown(b,a)}},stop:function(){d(this).data("state")&&c.stopInterval.call(this)}};d.fn.vTicker=function(a){if(f[a])return f[a].apply(this,Array.prototype.slice.call(arguments,1));if("object"!==typeof a&&a)d.error("Method "+a+" does not exist on jQuery.vTicker");else return f.init.apply(this,arguments)}})(jQuery);
9 | 


--------------------------------------------------------------------------------
/tools/site-src/index.md:
--------------------------------------------------------------------------------
 1 | # H2O World Training
 2 | 
 3 | ## 1. H2O World Training Sandbox
 4 |  * [Step-by-step Guide](setup/install.html)
 5 |  * [Screencast]
 6 | 
 7 | ## 2. [SriSatish Ambati](http://0xdata.com/team/srisatish-ambati/): Introduction to H2O
 8 | 
 9 | ## 3. [Tom Kraljevic](http://h2o.ai/team/tom-kraljevic/): [H2O in Big Data environments](bigdataenv/H2OinBigDataEnvironments.pdf)
10 | 
11 | ## 4. Hands-On
12 | 
13 | ### 4.1 H2O with Web UI
14 |  * [Amy Wang](http://h2o.ai/team/amy-wang/): [Step-by-step Tutorial](web_ui/tutorial.html) ([Video Tutorial](https://www.youtube.com/watch?v=DL00ZSSTjOM))
15 | 
16 | ### 4.2 H2O in R
17 |  * [Patrick Aboyoun](http://h2o.ai/team/patrick-aboyoun/): [Basics and Exploratory Data Analysis (EDA)](basics/basics.R.html)
18 | 
19 | ### 4.3 Supervised Learning - Regression and Classification
20 |  * [Patrick Aboyoun](http://h2o.ai/team/patrick-aboyoun/): [Introduction to Generalized Linear Models in H2O](supervised/glm/glm.R.html)
21 |  * [Patrick Aboyoun](http://h2o.ai/team/patrick-aboyoun/): [Introduction to Gradient Boosting Machines in H2O](supervised/gbm/gbm.R.html)
22 |  * [Patrick Aboyoun](http://h2o.ai/team/patrick-aboyoun/): [Introduction to Random Forests in H2O](supervised/randomforest/randomforest.R.html)
23 |  * [Patrick Aboyoun](http://h2o.ai/team/patrick-aboyoun/): [Regression](supervised/regression/regression.R.html)
24 |  * [Patrick Aboyoun](http://h2o.ai/team/patrick-aboyoun/): [Classification](supervised/classification/classification.R.html)
25 |  * [Arno Candel](http://h2o.ai/team/arno-candel/): [Deep Learning](supervised/deeplearning/deeplearning.R.html)
26 | 
27 | ### 4.4 Unsupervised Learning
28 |  * [Arno Candel](http://h2o.ai/team/arno-candel/) & [Spencer Aiello](http://h2o.ai/team/spencer-aiello/): [K-Means Clustering](unsupervised/clustering/clustering.R.html)
29 |  * [Arno Candel](http://h2o.ai/team/arno-candel/): [Dimensionality Reduction on MNIST](unsupervised/dimreduction/dimreduction.R.html)
30 |  * [Arno Candel](http://h2o.ai/team/arno-candel/): [Anomaly Detection on MNIST with H2O Deep Learning](unsupervised/anomaly/anomaly.R.html)
31 | 
32 | ### 4.5 Advanced Topics
33 |  * [Arno Candel](http://h2o.ai/team/arno-candel/): [Multi-model Parameter Tuning for Higgs Dataset](advanced/higgs/higgs.R.html)
34 |  * [Arno Candel](http://h2o.ai/team/arno-candel/): [Categorical Feature Engineering for Adult dataset](advanced/features/features.R.html)
35 |  * [Arno Candel](http://h2o.ai/team/arno-candel/): [Other Useful Tools](advanced/tools/tools.R.html)
36 | 
37 | ### 4.6 Yan: Marketing/CRM Applications and Use-Cases
38 |   * [Yan Zou](http://h2o.ai/team/yan-zou/): [H2O for Marketing/CRM Applications - Presentation](marketing_usecases/h2o_training_yan_2014.pdf)
39 |   * [Yan Zou](http://h2o.ai/team/yan-zou/): [H2O for Marketing/CRM Applications - R Script](marketing_usecases/yanTrainingH2O.R.html)
40 |   * [Vinod Iyengar](http://0xdata.com/h2o-world/#vinod/): [Lead Scoring For Real Time Bidding - Presentation](marketing_usecases/h2o_world_Vinod.pdf)
41 | 
42 | ## 5. Demos
43 | 
44 | ### 5.1 Integration with Tableau and Excel
45 |  * [Amy Wang](http://h2o.ai/team/amy-wang/): [Beauty and Big Data: Tableau](extab/tableau.html)
46 |    * [Video Screencast](https://www.youtube.com/watch?v=Mn8S0cTls9A)
47 |  * [Amy Wang](http://h2o.ai/team/amy-wang/): [Beauty and Big Data: Excel](extab/excel.html)
48 | 
49 | ### 5.2 Streaming Data
50 |   * [Spencer Aiello](http://h2o.ai/team/spencer-aiello/) & [Tom Kraljevic](http://h2o.ai/team/tom-kraljevic/): [Real-time Prediction with H2O and Storm](streaming/storm/README.html)
51 | 
52 | ## 6. Hackers Station
53 | 
54 | ### 6.1 Start Developing with H2O
55 |   * [Cliff Click](http://h2o.ai/team/cliff-click/): [Hacking Algorithms into H2O: KMeans](devel/hacking/KMeans.html) 
56 |   * [Cliff Click](http://h2o.ai/team/cliff-click/): [Hacking Algorithms into H2O: Grep](devel/hacking/Grep.html)
57 |   * [Cliff Click](http://h2o.ai/team/cliff-click/): [Hacking Algorithms into H2O: Quantiles](devel/hacking/Quantiles.html)
58 |   
59 | ### 6.2 Build Application on Top of H2O
60 |   * [Michal Malohlava](http://h2o.ai/team/michal-malohlava/): [Start Developing Your Application with H2O Droplets](devel/droplets/tutorial.html)
61 | 
62 | ### 6.3 Start with Sparkling Water
63 |   * [Michal Malohlava](http://h2o.ai/team/michal-malohlava/): [Sparkling Water Tutorial](devel/sparkling_water/tutorial.html)
64 | 
65 | ## 7. Troubleshooting
66 |   * [H2O Troubleshooting](troubleshooting/index.html)
67 |  
68 | ## 8. More Information
69 |   * [H2O Documentation](http://docs.h2o.ai)
70 |   * [H2O YouTube Channel](https://www.youtube.com/user/0xdata)
71 |   * [H2O SlideShare](http://www.slideshare.net/0xdata/presentations)
72 |   * [H2O Blog](http://h2o.ai/blog)
73 |   * [H2O GitHub](http://github.com/0xdata)
74 | 
75 | 


--------------------------------------------------------------------------------
/tutorials/Training_img/AUC1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/AUC1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/AUC2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/AUC2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/ClusterStatus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/ClusterStatus.png


--------------------------------------------------------------------------------
/tutorials/Training_img/ConfMtx1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/ConfMtx1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/ConfMtx2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/ConfMtx2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/CreateFrame1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/CreateFrame1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/CreateFrame2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/CreateFrame2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Export.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Export.png


--------------------------------------------------------------------------------
/tutorials/Training_img/GLMResults.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/GLMResults.png


--------------------------------------------------------------------------------
/tutorials/Training_img/GainsLift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/GainsLift.png


--------------------------------------------------------------------------------
/tutorials/Training_img/GainsLift1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/GainsLift1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/GainsLift2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/GainsLift2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/HitRatio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/HitRatio.png


--------------------------------------------------------------------------------
/tutorials/Training_img/IOStatus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/IOStatus.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Import.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Import.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Import2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Import2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Impute1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Impute1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Impute2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Impute2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Inspect1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Inspect1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Inspect2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Inspect2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Interaction1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Interaction1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Interaction2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Interaction2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Jobs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Jobs.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Logs.png


--------------------------------------------------------------------------------
/tutorials/Training_img/NetworkTest.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/NetworkTest.png


--------------------------------------------------------------------------------
/tutorials/Training_img/PCAScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/PCAScore.png


--------------------------------------------------------------------------------
/tutorials/Training_img/POJO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/POJO.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Parse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Parse.png


--------------------------------------------------------------------------------
/tutorials/Training_img/PerfBar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/PerfBar.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Predict1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Predict1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/PredictModel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/PredictModel.png


--------------------------------------------------------------------------------
/tutorials/Training_img/PredictResults.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/PredictResults.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Profiler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Profiler.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Profiler2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Profiler2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Quantiles1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Quantiles1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Quantiles2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Quantiles2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/STEAMtabular.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/STEAMtabular.png


--------------------------------------------------------------------------------
/tutorials/Training_img/SplitFrame.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/SplitFrame.png


--------------------------------------------------------------------------------
/tutorials/Training_img/SplitFrame2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/SplitFrame2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/StackDump.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/StackDump.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Summary1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Summary1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Summary2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Summary2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Tasks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Tasks.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Timeline.png


--------------------------------------------------------------------------------
/tutorials/Training_img/UDPDrop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/UDPDrop.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Upload1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Upload1.png


--------------------------------------------------------------------------------
/tutorials/Training_img/Upload2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/Upload2.png


--------------------------------------------------------------------------------
/tutorials/Training_img/View.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/Training_img/View.png


--------------------------------------------------------------------------------
/tutorials/advanced/binaryClassificationHelper.R.md:
--------------------------------------------------------------------------------
 1 |     TOP_FEATURES = 5
 2 |     
 3 |     h2o.get_auc <- function(model, data, response) {
 4 |       pred <- h2o.predict(model, data)[,3]
 5 |       perf <- h2o.performance(pred, data[,response])
 6 |       perf@model$auc
 7 |     }
 8 |     
 9 |     h2o.varimp <- function(algo, model) {
10 |       if (identical(algo, h2o.glm)) {
11 |         varimp <- paste(names(sort(abs(model@model$normalized_coefficients), TRUE))[1:TOP_FEATURES], collapse = ",", sep = ",")
12 |       } else if (identical(algo, h2o.randomForest) || identical(algo, h2o.deeplearning)) {
13 |         varimp <- paste(names(sort(model@model$varimp[1,], TRUE))[1:TOP_FEATURES], collapse = ",", sep = ",")
14 |       } else if (identical(algo, h2o.gbm)) {
15 |         varimp <- paste(rownames(model@model$varimp)[1:TOP_FEATURES], collapse = ",", sep = ",")
16 |       }
17 |       varimp
18 |     }
19 |     
20 |     h2o.validate <- function(t0, model, modeltype, validation, response, varimp) {
21 |       elapsed_seconds <- as.numeric(Sys.time()) - as.numeric(t0)
22 |       modelkey <- model@key
23 |       type <- modeltype
24 |       auc <- h2o.get_auc(model, validation, response)
25 |       result <- list(list(model, modeltype, response, elapsed_seconds, auc, varimp))
26 |       names(result) <- model@key
27 |       return(result)
28 |     }
29 |     
30 |     h2o.fit <- function(algo, data, args) {
31 |       t0 <- Sys.time()
32 |       predictors <- data$x
33 |       response <- data$y
34 |       train <- data$train
35 |       valid <- data$valid
36 |       nfolds <- data$nfolds
37 |       if (nfolds >= 0) {
38 |         model <- do.call(algo, modifyList(list(x=predictors, y=response, data=train, nfolds=nfolds), args))
39 |       } else {
40 |         model <- do.call(algo, modifyList(list(x=predictors, y=response, data=train), args))
41 |       }
42 |       if (.hasSlot(model,"sumtable")) {
43 |         model <- model@model[[1]]
44 |       }
45 |       return(h2o.validate(t0, model, as.character(substitute(algo)), valid, response, h2o.varimp(algo, model)))
46 |     }
47 |     
48 |     h2o.selectModel <- function(x) {
49 |       c(model_key = x[[1]]@key,
50 |         model_type = x[[2]],
51 |         train_auc = as.numeric(x[[1]]@model$auc),
52 |         validation_auc = as.numeric(x[[5]]),
53 |         important_feat = x[[6]],
54 |         tuning_time_s = as.numeric(as.character(x[[4]])))
55 |     }
56 |     
57 |     h2o.leaderBoard <- function(models, test_hex, response) {
58 |       model.list <- as.data.frame(t(as.data.frame(lapply(models, h2o.selectModel))))
59 |       model.list$train_auc <- as.numeric(as.character(model.list$train_auc))
60 |       model.list$validation_auc <- as.numeric(as.character(model.list$validation_auc))
61 |       
62 |       #### sort the models by AUC from worst to best
63 |       models.sort.by.auc <- model.list[with(model.list, order(validation_auc)),-1]
64 |       models.sort.by.auc <- models.sort.by.auc[rev(rownames(models.sort.by.auc)),]
65 |     
66 |       #### convert the `auc` and `tuning_time` columns into numerics
67 |       models.sort.by.auc$train_auc       <- as.numeric(as.character(models.sort.by.auc$train_auc))
68 |       models.sort.by.auc$validation_auc  <- as.numeric(as.character(models.sort.by.auc$validation_auc))
69 |       models.sort.by.auc$tuning_time_s   <- as.numeric(as.character(models.sort.by.auc$tuning_time_s))
70 |       
71 |       #### display the frame
72 |       print(models.sort.by.auc)
73 |       
74 |       #### score the best model on the test data
75 |       best_model <- h2o.getModel(h2oServer, rownames(models.sort.by.auc)[1])
76 |       preds <- h2o.predict(best_model, test_hex)
77 |       test_auc <- h2o.get_auc(best_model, test_hex, response)
78 |       
79 |       cat(paste(" -------------------------------\n",
80 |                 "Best Model Performance On Final Testing Data:", "\n",
81 |                 "AUC = ", round(test_auc,6), "\n",
82 |                 "--------------------------------\n"))
83 |       
84 |       cat(paste(" =---------Summary------------=\n",
85 |                 "Best model type: ", models.sort.by.auc[1,]$model_type, "\n",
86 |                 "Best model AUC on test: ", round(test_auc,6), "\n",
87 |                 "Top", TOP_FEATURES, "important features: ", models.sort.by.auc[1,]$important_feat, "\n",
88 |                 "Model training time (incl. tuning, grid search): ", round(models.sort.by.auc[1,]$tuning_time_s,6), "seconds \n",
89 |                 "=----------------------------=\n"))
90 |       best_model
91 |     }


--------------------------------------------------------------------------------
/tutorials/advanced/features/features.R.md:
--------------------------------------------------------------------------------
  1 | # Feature Engineering on the Adult dataset
  2 | 
  3 | ######This tutorial shows feature engineering on the [Adult dataset](https://archive.ics.uci.edu/ml/datasets/Adult). This file is both valid R and markdown code.
  4 | 
  5 | ### Start H2O and load the Adult data
  6 | 
  7 | ######Initialize the H2O server and import the Adult dataset.
  8 | 
  9 |     library(h2o)
 10 |     h2oServer <- h2o.init(nthreads=-1)
 11 |     homedir <- "/data/h2o-training/adult/"
 12 |     TRAIN = "adult.gz"
 13 |     data_hex <- h2o.importFile(h2oServer, path = paste0(homedir,TRAIN), header = F, sep = ' ', key = 'data_hex')
 14 | 
 15 | ######We manually assign column names since they are missing in the original file.
 16 |     
 17 |     colnames(data_hex) <- c("age","workclass","fnlwgt","education","education-num","marital-status","occupation","relationship","race","sex","capital-gain","capital-loss","hours-per-week","native-country","income")
 18 |     summary(data_hex)
 19 | 
 20 | ######We will try to predict whether `income` is `<=50K` or `>50K`.
 21 |     summary(data_hex$income)
 22 |     response = "income"
 23 | 
 24 | ######First, we source a few [helper functions](../binaryClassificationHelper.R.html) that allow us to quickly compare a multitude of binomial classification models, in particular the h2o.fit() and h2o.leaderBoard() functions.  Note that these specific functions require variable importances and N-fold cross-validation to be enabled.
 25 | 
 26 |     source("~/h2o-training/tutorials/advanced/binaryClassificationHelper.R.md")
 27 | 
 28 | ###### We then add this simple helper function to split a frame into train/valid/test pieces, train a GLM and a GBM model with 2-fold cross-validation and obtaining the best model after printing a leaderbaord. For more accurate
 29 | 
 30 |     N_FOLDS = 2
 31 | 
 32 |     h2o.trainModels <- function(frame) {
 33 |       # split the data into train/valid/test
 34 |       random <- h2o.runif(frame, seed = 123456789)
 35 |       train_hex <- h2o.assign(frame[random < .8,], "train_hex")
 36 |       valid_hex <- h2o.assign(frame[random >= .8 & random < .9,], "valid_hex")
 37 |       test_hex  <- h2o.assign(frame[random >= .9,], "test_hex")
 38 |      
 39 |       predictors <- colnames(frame)[-match(response,colnames(frame))]
 40 |       
 41 |       # multi-model comparison with N-fold cross-validation
 42 |       data = list(x=predictors, y=response, train=train_hex, valid=valid_hex, nfolds=N_FOLDS)
 43 |       models <- c(
 44 |         h2o.fit(h2o.glm, data, glmparams),
 45 |         h2o.fit(h2o.gbm, data, gbmparams)
 46 |       )
 47 |       best_model <- h2o.leaderBoard(models, test_hex, match(response,colnames(frame)))
 48 |   
 49 |       h2o.rm(h2oServer, grep(pattern = "Last.value", x = h2o.ls(h2oServer)$Key, value = TRUE))
 50 |       best_model
 51 |     }
 52 | 
 53 | ### Baseline performance on original dataset
 54 | ###### For simplicity, we use default parameters (no grid search parameter tuning) to establish baseline performance numbers for this dataset.
 55 | 
 56 |     glmparams <- list(family="binomial", variable_importances=T, use_all_factor_levels=T)
 57 |     gbmparams <- list(importance=TRUE)
 58 | 
 59 |     best_model <- h2o.trainModels(data_hex)
 60 | 
 61 | ###### Both GLM and GBM do a great job at this dataset, we get validation AUC values of above 90%: `GLM: 0.9028564 GBM: 0.9009924`
 62 | ###### According to GBM, the most important columns are `marital-status,relationship,capital-gain,education-num,age`
 63 | 
 64 | ### Feature engineering
 65 |  
 66 | ###### The following section shows ways to create new derived features. We'll need this simple append function, as we're going to add new columns the our dataset
 67 | 
 68 |     h2o.append <- function(frame, col) {
 69 |       appended_frame <- h2o.assign(cbind(frame, col), "appended_frame")
 70 |       appended_frame
 71 |     }
 72 | 
 73 | ####1. Turn age into a factor
 74 | ######The feature `age` is an integer value, but GLM for example will have a tough time predicing income from age with a linear relationship, while GBM should be able to carve out these non-linear dependencies by itself (but it might need more trees, deeper interaction depth than default values)
 75 |  
 76 |     data_hex <- h2o.append(data_hex, as.factor(data_hex$age))
 77 |     colnames(data_hex)
 78 |     summary(data_hex)
 79 |     best_model <- h2o.trainModels(data_hex)
 80 | 
 81 | ###### GLM clearly benefited from this. We see that ages 18,19 and 20 are among the most important predictors for income and we get the following validation AUC values: `GLM: 0.9066634 GBM: 0.9009924`
 82 | 
 83 | ###### For fun, let's look at the largest positively and negatively correlated coefficients:
 84 | 
 85 |     head(sort(best_model@model$normalized_coefficients,decreasing=T),5)
 86 |     head(sort(best_model@model$normalized_coefficients,decreasing=F),5)
 87 | 
 88 | ####2. Same for capital-gain/loss and work hours per week
 89 |   
 90 |     data_hex <- h2o.append(data_hex, as.factor(data_hex$'hours-per-week'))
 91 |     data_hex <- h2o.append(data_hex, as.factor(data_hex$'capital-gain'))
 92 |     data_hex <- h2o.append(data_hex, as.factor(data_hex$'capital-loss'))
 93 |     colnames(data_hex)
 94 |     summary(data_hex)
 95 |     best_model <- h2o.trainModels(data_hex)
 96 | 
 97 | ###### With all these new factor levels as predictors, GLM now got a nice boost: `GLM: 0.9285384 GBM: 0.9021225`
 98 | 
 99 | ###### Let's give GBM a shot at beating GLM by using better parameters:
100 | 
101 |     gbmparams <- list(importance=TRUE, n.tree=50, interaction.depth=10)
102 |     best_model <- h2o.trainModels(data_hex)
103 | 
104 | ###### Ok, now both algorithms reach similar validation AUC values: `GLM: 0.9285384 GBM: 0.9286973`
105 | 
106 | ### Replace money-related integer columns by their Log-Transform
107 |     
108 |     data_hex$'capital-gain'   <- log(1+data_hex$'capital-gain')
109 |     data_hex$'capital-loss'   <- log(1+data_hex$'capital-loss')
110 |     data_hex
111 |     best_model <- h2o.trainModels(data_hex)
112 |     
113 | ######We see that the training AUC for GLM improves slightly, from `0.9269056070` to `0.9269586507`. Intuition: Money is often distributed exponentially, and the log transform brings it back to a linear space. Note that the validation AUC drops, likely due to small data statistical noise. We clearly got close to the limit of this dataset. Note that GBM didn't benefit from this transform, it seems to be able to better split up the original integer space.
114 | 
115 |     frame <- data_hex
116 |     random <- h2o.runif(frame, seed = 123456789)
117 |     train_hex <- h2o.assign(frame[random < .8,], "train_hex")
118 |     valid_hex <- h2o.assign(frame[random >= .8 & random < .9,], "valid_hex")
119 |     test_hex  <- h2o.assign(frame[random >= .9,], "test_hex")
120 |     
121 |     predictors <- colnames(frame)[-match(response,colnames(frame))]
122 |     
123 |     # multi-model comparison with N-fold cross-validation
124 |     data = list(x=predictors, y=response, train=train_hex, valid=valid_hex, nfolds=N_FOLDS)
125 |     models <- c(
126 |       h2o.fit(h2o.deeplearning, data, list())
127 |     )
128 |     best_model <- h2o.leaderBoard(models, test_hex, match(response,colnames(frame)))
129 |   


--------------------------------------------------------------------------------
/tutorials/advanced/higgs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/advanced/higgs/.DS_Store


--------------------------------------------------------------------------------
/tutorials/advanced/higgs/higgs.R.md:
--------------------------------------------------------------------------------
  1 | # Higgs Particle Discovery with H2O Deep Learning
  2 | 
  3 | ######This tutorial shows how H2O can be used to classify particle detector events into Higgs bosons vs background noise. This file is both valid R and markdown code.
  4 | #####![](images/higgs.png)
  5 | ### HIGGS dataset and Deep Learning
  6 | ######We use the [UCI HIGGS](https://archive.ics.uci.edu/ml/datasets/HIGGS/) dataset with 11 million events and 28 features (21 low-level features and 7 humanly created non-linear derived features). In this tutorial, we show that (only) Deep Learning can automatically generate these or similar high-level features on its own and reach highest accuracies from just the low-level features alone, outperforming traditional classifiers. This is in accordance with a recently published Nature paper on using [Deep Learning for Higgs particle detection](http://www.slideshare.net/0xdata/how-to-win-data-science-competitions-with-deep-learning/33). Remarkably, Deep Learning also won the [Higgs Kaggle challenge](https://www.kaggle.com/c/higgs-boson/forums/t/10425/code-release) on the full set of features.
  7 | 
  8 | 
  9 | 
 10 | ### Start H2O, import the HIGGS data, and prepare train/validation/test splits
 11 | 
 12 | ######Initialize the H2O server (enable all cores)
 13 | 
 14 |     library(h2o)
 15 |     h2oServer <- h2o.init(nthreads=-1)
 16 |  
 17 | 
 18 | ###### Import the data: For simplicity, we use a reduced dataset containing the first 100k rows.
 19 | 
 20 |     homedir <- "/data/h2o-training/higgs/"
 21 |     TRAIN = "higgs.100k.csv.gz"
 22 |     data_hex <- h2o.importFile(h2oServer, path = paste0(homedir,TRAIN), header = F, sep = ',', key = 'data_hex')
 23 |     
 24 | ###### For small datasets, it can help to rebalance the dataset into more chunks to keep all cores busy
 25 | 
 26 |     data_hex <- h2o.rebalance(data_hex, chunks=64, key='data_hex.rebalanced')
 27 | 
 28 | ###### Prepare train/validation/test splits: We split the dataset randomly into 3 pieces. Grid search for hyperparameter tuning and model selection will be done on the training and validation sets, and final model testing is done on the test set. We also assign the resulting frames to meaningful names in the H2O key-value store for later use, and clean up all temporaries at the end.
 29 | 
 30 |     random <- h2o.runif(data_hex, seed = 123456789)
 31 |     train_hex <- h2o.assign(data_hex[random < .8,], "train_hex")
 32 |     valid_hex <- h2o.assign(data_hex[random >= .8 & random < .9,], "valid_hex")
 33 |     test_hex  <- h2o.assign(data_hex[random >= .9,], "test_hex")
 34 |     h2o.rm(h2oServer, grep(pattern = "Last.value", x = h2o.ls(h2oServer)$Key, value = TRUE))
 35 |  
 36 | ######The first column is the response label (background:0 or higgs:1). Of the 28 numerical features, the first 21 are low-level detector features and the last 7 are high-level humanly derived features (physics formulae).
 37 |   
 38 |     response = 1
 39 |     low_level_predictors = c(2:22)
 40 |     low_and_high_level_predictors = c(2:29)
 41 | 
 42 | ### Establishing the baseline performance reference with several H2O classifiers
 43 | ######To get a feel for the performance of different classifiers on this dataset, we build a variety of different H2O models (Generalized Linear Model, Random Forest, Gradient Boosted Machines and DeepLearning). We would like to use grid-search for hyper-parameter tuning with N-fold cross-validation, and we want to do this twice: once using just the low-level features, and once using both low- and high-level features.
 44 | 
 45 | ######First, we source a few [helper functions](../binaryClassificationHelper.R.html) that allow us to quickly compare a multitude of binomial classification models, in particular the h2o.fit() and h2o.leaderBoard() functions.  Note that these specific functions require variable importances and N-fold cross-validation to be enabled.
 46 | 
 47 |     source("~/h2o-training/tutorials/advanced/binaryClassificationHelper.R.md")
 48 | 
 49 | ######The code below trains 60 models (2 loops, 5 classifiers with 2 grid search models each, each resulting in 1 full training and 2 cross-validation models). A leaderboard scoring the best models per h2o.fit() function is displayed.
 50 | 
 51 |     N_FOLDS=2
 52 |     
 53 |     for (preds in list(low_level_predictors, low_and_high_level_predictors)) {
 54 |       data = list(x=preds, y=response, train=train_hex, valid=valid_hex, nfolds=N_FOLDS)
 55 |       
 56 |       models <- c(
 57 |         h2o.fit(h2o.glm, data, 
 58 |                 list(family="binomial", variable_importances=T, lambda=c(1e-5,1e-4), use_all_factor_levels=T)),
 59 |         h2o.fit(h2o.randomForest, data, 
 60 |                 list(type="fast", importance=TRUE, ntree=c(5), depth=c(5,10))),
 61 |         h2o.fit(h2o.randomForest, data, 
 62 |                 list(type="BigData", importance=TRUE, ntree=c(5), depth=c(5,10))),
 63 |         h2o.fit(h2o.gbm, data, 
 64 |                 list(importance=TRUE, n.tree=c(10), interaction.depth=c(2,5))),
 65 |         h2o.fit(h2o.deeplearning, data, 
 66 |                 list(variable_importances=T, l1=c(1e-5), epochs=1, hidden=list(c(10,10,10), c(100,100))))
 67 |       )
 68 |       best_model <- h2o.leaderBoard(models, test_hex, response)
 69 |       h2o.rm(h2oServer, grep(pattern = "Last.value", x = h2o.ls(h2oServer)$Key, value = TRUE))
 70 |     }
 71 |     
 72 | ###### The output contains a leaderboard (based on validation AUC) for the models using low-level features only:
 73 |     
 74 |     #                                        model_type train_auc validation_auc    important_feat tuning_time_s
 75 |     #    DeepLearning_ba44837829dc8d1e h2o.deeplearning 0.6102486      0.6661170  C7,C3,C10,C11,C2      5.604354
 76 |     #    GBM_8aad39d45442ed2418646fac4 h2o.gbm          0.6393795      0.6483558  C7,C2,C5,C10,C11      3.379146
 77 |     #    DRF_b64f1aca48cfa9532f78408df h2o.randomForest 0.6358507      0.6439986  C7,C10,C2,C5,C11      4.423504
 78 |     #    SpeeDRF_a40eda3fe25b1271b6be8 h2o.randomForest 0.5977397      0.6000200 C11,C7,C5,C10,C14      3.381785
 79 |     #    GLMModel__9f5855ebfcb804a1664 h2o.glm          0.5907724      0.5893810  C5,C7,C14,C2,C10      1.422024
 80 | 
 81 | ###### Note that training AUCs are based on cross-validation.
 82 | 
 83 | ###### When using both low- and high-level features, the AUC values go up across the board:
 84 |   
 85 |     #                                       model_type train_auc validation_auc     important_feat tuning_time_s
 86 |     #    DeepLearning_a7cb0d0cc6f6fb8 h2o.deeplearning 0.7280887      0.7586982 C27,C29,C28,C24,C7      6.663959
 87 |     #    GBM_bc87169c331b22e37339c643 h2o.gbm          0.7552951      0.7545594 C27,C29,C28,C7,C24      4.437348
 88 |     #    DRF_b3572d5d088a043addcb3684 h2o.randomForest 0.7467827      0.7534905 C27,C29,C28,C24,C7      6.636395
 89 |     #    SpeeDRF_ba048cb60c3aad567b17 h2o.randomForest 0.7274115      0.7301433 C27,C28,C29,C7,C24      5.516673
 90 |     #    GLMModel__b59d95ace34a2979da h2o.glm          0.6827518      0.6764049 C29,C28,C27,C7,C24      1.523096
 91 | 
 92 | ###### Clearly, the high-level features add a lot of predictive power, but what if they are not easily available? On this sampled dataset and with simple models from low-level features alone, Deep Learning seems to have an edge over the other methods indicating that it is able to create useful high-level features on its own.
 93 | 
 94 | ###### *Note:* Every run of DeepLearning results in different results since we use [Hogwild!](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf) parallelization with intentional race conditions between threads.  To get reproducible results at the expense of speed for small datasets, set reproducible=T and specify a seed.
 95 | 
 96 | ### Build an improved Deep Learning model on the low-level features alone
 97 | ###### With slightly modified parameters, it is possible to build an even better Deep Learning model. We add dropout/L1/L2 regularization and increase the number of neurons and the number of hidden layers. Note that we don't use input layer dropout, as there are only 21 features, all of which are assumed to be present and important for particle detection. We also reduce the amount of dropout for derived features. Note that the importance of regularization typically goes down with increasing dataset sizes, as overfitting is less an issue when the model is small compared to the data.
 98 |     
 99 |     h2o.deeplearning(x=low_level_predictors, y=response, activation="RectifierWithDropout", data=train_hex, 
100 |                      validation=valid_hex, input_dropout_ratio=0, hidden_dropout_ratios=c(0.2,0.1,0.1,0),
101 |                      l1=1e-5, l2=1e-5, epochs=20, hidden=c(200,200,200,200))
102 |    
103 | ###### With this computationally slightly more expensive Deep Learning model, we achieve a nice boost over the simple models above: `AUC =  0.7245833 (on validation)`    
104 | 
105 | ###Voila!
106 | #####We applied multi-model grid search with N-fold cross-valiation on the real-world Higgs dataset, and demonstrated the power of H2O Deep Learning in automatically cerating non-linear derived features for highest predictive accuracy!
107 | 
108 | ####Extension to the full dataset
109 | #####Please note that this tutorial was on a small subsample (<1%) of the [UCI HIGGS dataset](https://archive.ics.uci.edu/ml/datasets/HIGGS/), and results do not trivially transfer from small samples to the full dataset. Previous results by H2O Deep Learning on the full dataset (training on 10M rows, validation on 500k rows, testing on 500k rows) agree with a recently published Nature paper on using [Deep Learning for Higgs particle detection](http://www.slideshare.net/0xdata/how-to-win-data-science-competitions-with-deep-learning/33), where 5-layer H2O Deep Learning models have achieved a test set AUC value of 0.869. We would love to hear about your best models or even ensembles!
110 | 
111 | #### More information can be found in the [H2O Deep Learning booklet](https://t.co/kWzyFMGJ2S) and in our [slides](http://www.slideshare.net/0xdata/presentations).


--------------------------------------------------------------------------------
/tutorials/advanced/higgs/images/higgs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/advanced/higgs/images/higgs.png


--------------------------------------------------------------------------------
/tutorials/advanced/tools/tools.R.md:
--------------------------------------------------------------------------------
  1 | ### Synthetic Data
  2 | ###### Use `h2o.createFrame` to synthetic random data in H2O. This method can also be used to quickly create very large datasets for scaling tests. Note that there is no intrinsic structure in the data (it's either constant or random), so results from many machine learning methods will not be very meaningful.
  3 | 
  4 |     library(h2o)
  5 |     h2oServer <- h2o.init(nthreads=-1)
  6 | 
  7 |     myframe = h2o.createFrame(h2oServer, 'framekey', rows = 20, cols = 5,
  8 |                               seed = -12301283, randomize = TRUE, value = 0,
  9 |                               categorical_fraction = 0.8, factors = 10, real_range = 1,
 10 |                               integer_fraction = 0.2, integer_range = 10, missing_fraction = 0.2,
 11 |                               response_factors = 1)
 12 |     
 13 | ###### We created a small random frame in H2O that contains missing values, categorical and numerical columns.
 14 | 
 15 |     head(myframe,20)
 16 |   
 17 |     #         response    C1    C2 C3    C4    C5
 18 |     #    1  -0.4227451 4a41c a1290  3 326a0 82dce
 19 |     #    2   0.4594655 0a79a a1290 -6 05f22 f4772
 20 |     #    3   0.2784008 70d75 4c3a7  7 320a8 6b17b
 21 |     #    4  -0.5674698       07067  6 320a8 a78d9
 22 |     #    5  -0.7041911 24800 4c3a7  8       82dce
 23 |     #    6   0.5853957 8e64f a1290 NA ea644      
 24 |     #    7   0.8540204              4 05f22 a6b1e
 25 |     #    8  -0.1466706 4a41c        1 cef5a 89717
 26 |     #    9          NA 0a79a c402d  3 070d5 a78d9
 27 |     #    10 -0.7357408 4a41c a1290  9 e055d 64439
 28 |     #    11 -0.2798403       4c3a7  3       f4772
 29 |     #    12  0.3454386 24800       NA            
 30 |     #    13         NA 0a79a ca1de  9 070d5 a78d9
 31 |     #    14 -0.6732674 8e64f 50b47  9 320a8 6b17b
 32 |     #    15         NA cc3d5 ca1de  4 e055d f4772
 33 |     #    16         NA cc3d5 07067  1 070d5 89717
 34 |     #    17  0.8564855 4f8b9 a1290 -1 326a0 6b17b
 35 |     #    18 -0.5555804 0a79a 50b47 NA       82dce
 36 |     #    19  0.5639324              3 e055d      
 37 |     #    20  0.2511743              7 30c85 cae4c
 38 | 
 39 | ###### We remove the response column and convert the integer column to a factor.
 40 | 
 41 |     myframe <- myframe[,-1]
 42 |     myframe[,3] <- as.factor(myframe$C3)
 43 |     summary(myframe)
 44 |     head(myframe, 20)
 45 | 
 46 | ###Interaction Features between Factors
 47 | ###### Create pairwise interactions for 2 groups of columns, keep only up to 10 (most common) factors per interaction.
 48 |     
 49 |     pairwise <- h2o.interaction(myframe, key = 'pairwise', factors = list(c(1,2),c(2,3,4)),
 50 |                                 pairwise=TRUE, max_factors = 10, min_occurrence = 1)
 51 |     head(pairwise, 20)
 52 |     levels(pairwise[,2])
 53 | 
 54 | ###### Create 5-th order interaction between the specified columns, and allow up to 10k resulting factors (per pair-wise interaction).
 55 |     
 56 |     higherorder <- h2o.interaction(myframe, key = 'higherorder', factors = c(1,2,3,4,5),
 57 |                                    pairwise=FALSE, max_factors = 10000, min_occurrence = 1)
 58 |     head(higherorder, 20)
 59 | 
 60 | ######Create a categorical variable out of the integer column via self-interaction, and keep at most 3 factors, and only if they occur at least twice
 61 |     
 62 |     summary(myframe$C3)
 63 |     head(myframe$C3, 20)
 64 |     trim_integer_levels <- h2o.interaction(myframe, key = 'trim_integers', factors = 3,
 65 |                                            pairwise = FALSE, max_factors = 3, min_occurrence = 2)
 66 |     head(trim_integer_levels, 20)
 67 | 
 68 | ###### Append all interactions to the original frame and clean up temporaries
 69 |     
 70 |     myframe <- cbind(myframe, pairwise, higherorder, trim_integer_levels)
 71 |     myframe <- h2o.assign(myframe, 'final.key')
 72 |     h2o.rm(h2oServer, grep(pattern = "Last.value", x = h2o.ls(h2oServer)$Key, value = TRUE))
 73 |     myframe
 74 |     head(myframe,20)
 75 |     summary(myframe)
 76 | 
 77 |     #    > head(myframe,20)
 78 |     #          C1    C2 C3    C4    C5       C1_C2    C2_C3       C2_C4    C3_C4             C1_C2_C3_C4_C5 C3_C3
 79 |     #    1  49ed9 d9ff0  3 c9523 00599 49ed9_d9ff0  d9ff0_3 d9ff0_c9523    other  49ed9_d9ff0_3_c9523_00599     3
 80 |     #    2  e2271 d9ff0 -6 fe2d9 cb67d e2271_d9ff0 d9ff0_-6 d9ff0_fe2d9 -6_fe2d9 e2271_d9ff0_-6_fe2d9_cb67d other
 81 |     #    3  408d2 6c5ce  7 28b4d 3e4cb 408d2_6c5ce    other       other    other  408d2_6c5ce_7_28b4d_3e4cb other
 82 |     #    4        ae93f  6 28b4d da0c6       other    other       other    other     NA_ae93f_6_28b4d_da0c6 other
 83 |     #    5  722ea 6c5ce  8       00599 722ea_6c5ce    other    6c5ce_NA     8_NA     722ea_6c5ce_8_NA_00599 other
 84 |     #    6  5e310 d9ff0 NA 9dbca       5e310_d9ff0 d9ff0_NA       other    other    5e310_d9ff0_NA_9dbca_NA      
 85 |     #    7               4 fe2d9 8d2b5       NA_NA    other    NA_fe2d9  4_fe2d9        NA_NA_4_fe2d9_8d2b5 other
 86 |     #    8  49ed9        1 87bef 92d9c    49ed9_NA     NA_1       other  1_87bef     49ed9_NA_1_87bef_92d9c     1
 87 |     #    9  e2271 14aa5  3 d77b0 da0c6       other  14aa5_3 14aa5_d77b0  3_d77b0  e2271_14aa5_3_d77b0_da0c6     3
 88 |     #    10 49ed9 d9ff0  9 79727 b7a40 49ed9_d9ff0    other       other    other  49ed9_d9ff0_9_79727_b7a40     9
 89 |     #    11       6c5ce  3       cb67d    NA_6c5ce  6c5ce_3    6c5ce_NA     3_NA        NA_6c5ce_3_NA_cb67d     3
 90 |     #    12 722ea       NA                722ea_NA    NA_NA       NA_NA    NA_NA          722ea_NA_NA_NA_NA      
 91 |     #    13 e2271 0036a  9 d77b0 da0c6       other    other 0036a_d77b0  9_d77b0  e2271_0036a_9_d77b0_da0c6     9
 92 |     #    14 5e310 0a9ed  9 28b4d 3e4cb       other    other       other    other  5e310_0a9ed_9_28b4d_3e4cb     9
 93 |     #    15 f76de 0036a  4 79727 cb67d       other    other       other    other  f76de_0036a_4_79727_cb67d other
 94 |     #    16 f76de ae93f  1 d77b0 92d9c       other  ae93f_1 ae93f_d77b0  1_d77b0  f76de_ae93f_1_d77b0_92d9c     1
 95 |     #    17 853d4 d9ff0 -1 c9523 3e4cb 853d4_d9ff0 d9ff0_-1 d9ff0_c9523    other 853d4_d9ff0_-1_c9523_3e4cb other
 96 |     #    18 e2271 0a9ed NA       00599       other 0a9ed_NA    0a9ed_NA    NA_NA    e2271_0a9ed_NA_NA_00599      
 97 |     #    19              3 79727             NA_NA    other       other    other           NA_NA_3_79727_NA     3
 98 |     #    20              7 8007d 1280c       NA_NA    other    NA_8007d  7_8007d        NA_NA_7_8007d_1280c other
 99 | 
100 | ### Imputation of Missing Values
101 | ###### First, we randomly replace 50 rows in each column of the iris dataset with missing values
102 | 
103 |     ds <- iris
104 |     ds[sample(nrow(ds), 50),1] <- NA
105 |     ds[sample(nrow(ds), 50),2] <- NA
106 |     ds[sample(nrow(ds), 50),3] <- NA
107 |     ds[sample(nrow(ds), 50),4] <- NA
108 |     ds[sample(nrow(ds), 50),5] <- NA
109 |     summary(ds)
110 | 
111 | ###### upload the NA'ed dataset to H2O
112 |     
113 |     hex <- as.h2o(h2oServer, ds)
114 |     head(hex,20)
115 | 
116 | ###### Impute the NAs in the first column in place with "median"
117 |     
118 |     h2o.impute(hex, "Sepal.Length", method = "median")
119 |     head(hex,20)
120 | 
121 | ###### Impute the NAs in the second column with the mean based on the groupBy columns Sepal.Length and Petal.Width and Species
122 |     
123 |     h2o.impute(hex, "Sepal.Width", method = "mean", groupBy = c("Sepal.Length", "Petal.Width", "Species"))
124 |     head(hex,20)
125 | 
126 | ###### Impute the Species column with the "mode" based on the columns 1 and 4
127 |     
128 |     h2o.impute(hex, 5, method = "mode", groupBy = c(1,4))
129 |     head(hex,20)
130 |     
131 |     
132 | ### Splitting H2O Frames into Consecutive Subsets
133 | ###### First, we create a large frame
134 | 
135 |     myframe = h2o.createFrame(h2oServer, 'large', rows = 1000000, cols = 10,
136 |                               seed = -12301283, randomize = TRUE, value = 0,
137 |                               categorical_fraction = 0.8, factors = 10, real_range = 1,
138 |                               integer_fraction = 0.2, integer_range = 10, missing_fraction = 0.2,
139 |                               response_factors = 1)
140 |     dim(myframe)
141 |     
142 | ###### Now, we split that dataset into 4 consecutive pieces, so we need to specify the sizes of the first 3 splits
143 | 
144 |     splits <- h2o.splitFrame(myframe, c(0.4,0.2,0.1))
145 |     dim(splits[[1]])
146 |     dim(splits[[2]])
147 |     dim(splits[[3]])
148 |     dim(splits[[4]])
149 | 
150 | ### Splitting H2O Frames into Random Subsets
151 | ###### We create a 1D vector with uniform values sampled from the interval 0...1 and use that to assign rows to the splits.
152 | 
153 |     random <- h2o.runif(myframe, seed = 123456789)
154 |     train <- myframe[random < .8,]
155 |     valid <- myframe[random >= .8 & random < 0.9,]
156 |     test  <- myframe[random >= .9,]
157 |     dim(train)
158 |     dim(valid)
159 |     dim(test)
160 | 


--------------------------------------------------------------------------------
/tutorials/basics/basics.R.md:
--------------------------------------------------------------------------------
  1 | # Basic H2O Operations in R
  2 | 
  3 | ###### This tutorial demonstrates basic data import, manipulations, and summarizations of data within an H2O cluster from within R. It requires an installation of the h2o R package and its dependencies.
  4 | 
  5 | ### Load the h2o R package and start an local H2O cluster
  6 | 
  7 | ###### Connection to an H2O cloud is established through the `h2o.init` function from the `h2o` package. For the purposes of this training exercise, we will use a local H2O cluster running on the default port of `54321`. We will also use the default cluster memory size and set `nthreads = -1` to make all the CPUs available to the H2O cluster.
  8 | 
  9 |     library(h2o)
 10 |     h2oServer <- h2o.init(nthreads = -1)
 11 | 
 12 | ### Load data into the key-value store in the H2O cluster
 13 | 
 14 | ###### This tutorial uses a 10% sample of the Person-Level 1% 2013 Public Use Microdata Sample (PUMS) from United States Census Bureau, making it a Person-Level 0.1% 2013 PUMS. We will use the `h2o.importFile` function to read the data into the H2O key-value store.
 15 | 
 16 |     datadir <- "/data"
 17 |     pumsdir <- file.path(datadir, "h2o-training", "pums2013")
 18 |     csvfile <- "adult_2013_full.csv.gz"
 19 |     adult_2013_full <- h2o.importFile(h2oServer,
 20 |                                       path = file.path(pumsdir, csvfile),
 21 |                                       key = "adult_2013_full", sep = ",")
 22 | 
 23 | ###### The `key` argument to the `h2o.importFile` function sets the name of the data set in the H2O key-value store. If the `key` argument is not supplied, the data will reside in the H2O key-value store under a machine generated name.
 24 | 
 25 | ###### The results of the `h2o.ls` function shows the size of the object held by the `adult_2013_full` key in the H2O key-value store.
 26 | 
 27 |     kvstore <- h2o.ls(h2oServer)
 28 |     kvstore
 29 |     kvstore$Bytesize[kvstore$Key == "adult_2013_full"] / 1024^2
 30 | 
 31 | ### Examine the proxy object for the H2O resident data
 32 | 
 33 | ###### The resulting `adult_2013_full` object is of class `H2OParsedData`, which implements methods commonly associated with native R `data.frame` objects.
 34 | 
 35 |     class(adult_2013_full)
 36 |     dim(adult_2013_full)
 37 |     head(colnames(adult_2013_full), 50)
 38 | 
 39 | ### Create an up-to-date UCI Adult Data Set
 40 | 
 41 | ###### In the interest of familiarity, we will create a data set similar to the [UCI Adult Data Set](https://archive.ics.uci.edu/ml/datasets/Adult) from the University of California Irvine (UCI) Machine Learning Repository. In particular, we want to extract the age of person (`AGEP`), class of worker (`COW`), educational attainment (`SCHL`), marital status (`MAR`), industry employed (`INDP`), relationship (`RELP`), race (`RAC1P`), sex (`SEX`), interest/dividends/net rental income over the past 12 months (`INTP`), usual hours worked per week over the past 12 months (`WKHP`), place of birth (`POBP`), and wages/salary income over the past 12 months.
 42 | 
 43 |     nms <- c("AGEP", "COW", "SCHL", "MAR", "INDP", "RELP", "RAC1P", "SEX",
 44 |              "INTP", "WKHP", "POBP", "WAGP")
 45 |     adult_2013 <- adult_2013_full[!is.na(adult_2013_full$WAGP) &
 46 |                                   adult_2013_full$WAGP > 0, nms]
 47 |     h2o.ls(h2oServer)
 48 | 
 49 | ###### Although we created an object in R called `adult_2013`, there is no value with that key in the H2O key-value store. To make it easier to track our data set, we will copy it's value to the `adult_2013` key using the `h2o.assign` function and delete all the machine generated keys with the prefix `Last.value` that served as intermediary objects using the `h2o.rm` function.
 50 | 
 51 |     adult_2013 <- h2o.assign(adult_2013, key = "adult_2013")
 52 |     h2o.ls(h2oServer)
 53 | 
 54 |     rmLastValues <- function(pattern = "Last.value.")
 55 |     {
 56 |       keys <- h2o.ls(h2oServer, pattern = pattern)$Key
 57 |       if (!is.null(keys))
 58 |         h2o.rm(h2oServer, keys)
 59 |       invisible(keys)
 60 |     }
 61 |     rmLastValues()
 62 | 
 63 |     kvstore <- h2o.ls(h2oServer)
 64 |     kvstore
 65 |     kvstore$Bytesize[kvstore$Key == "adult_2013"] / 1024^2
 66 | 
 67 | ### Summarize the 2013 update of the UCI Adult Data Set
 68 | 
 69 | ###### As mentioned above, an R proxy object to an H2O data set implements several methods commonly associated with R `data.frame` objects including the `summary` function to obtain column-level summaries and the `dim` function to get the row and column count
 70 | 
 71 |     summary(adult_2013)
 72 |     dim(adult_2013)
 73 | 
 74 | ###### As with R `data.frame` objects, individual columns within an H2O data set can be summarized using methods commonly associated with R `vector` objects. For example, the `quantile` function in R is used to find sample quantiles at probability values specified in the `prob` argument.
 75 | 
 76 |     centiles <- quantile(adult_2013$WAGP, probs = seq(0, 1, by = 0.01))
 77 |     centiles
 78 | 
 79 | ###### The use of the `$` operator to extract the column `WAGP` from the `adult_2013` data set generated new `Last.value` keys that we will clean up in the interest of maintaining a tidy key-value store.
 80 | 
 81 |     h2o.ls(h2oServer)
 82 |     rmLastValues()
 83 |     h2o.ls(h2oServer)
 84 | 
 85 | ### Derive columns: capital gain and capital loss columns
 86 | 
 87 | ###### The original UCI Adult Data Set contains columns for capital gain and capital loss, which can be extracted from the `INTP` column within the Person-Level PUMS data set. We will derive these two columns using the `ifelse` function where the test condition is whether the `INTP` column is positive or negative, and if that condition is met, the value is either `INT` (capital gain) / `- INT` (capital loss) or `0`. If we were just interested in measure the magnitude of either a loss or a gain, we could have used the `abs` function.
 88 | 
 89 |     capgain <- ifelse(adult_2013$INTP > 0, adult_2013$INTP, 0)
 90 |     caploss <- ifelse(adult_2013$INTP < 0, - adult_2013$INTP, 0)
 91 |     adult_2013$CAPGAIN <- capgain
 92 |     adult_2013$CAPLOSS <- caploss
 93 |     adult_2013 <- adult_2013[,- match("INTP", colnames(adult_2013))]
 94 | 
 95 | ###### Now that we have the capital gain and loss columns, we can assign our new data set to the `adult_2013` key and remove all the temporary keys from the H2O key-value store.
 96 | 
 97 |     adult_2013 <- h2o.assign(adult_2013, key = "adult_2013")
 98 | 
 99 |     h2o.ls(h2oServer)
100 |     rmLastValues()
101 |     h2o.ls(h2oServer)
102 | 
103 | ### Derive columns: log transformations for income variables
104 | 
105 | ###### The UCI Adult Data Set was originally created to predict whether a person's income in the early 1990s exceeds $50,000 per year. Given that incomes are right-skewed, transforming these measures to a log scale tends to make them more conducive to use in predictive modeling.
106 | 
107 |     adult_2013$LOG_CAPGAIN <- log(adult_2013$CAPGAIN + 1L)
108 |     adult_2013$LOG_CAPLOSS <- log(adult_2013$CAPLOSS + 1L)
109 |     adult_2013$LOG_WAGP    <- log(adult_2013$WAGP    + 1L)
110 | 
111 | ###### Now that we have the log transformed columns, we can assign our new data set to the `adult_2013` key and remove all the temporary keys from the H2O key-value store.
112 | 
113 |     h2o.ls(h2oServer)
114 |     rmLastValues()
115 |     h2o.ls(h2oServer)
116 | 
117 | ### Create cross-tabulations of original and derived categorical variables
118 | 
119 | ###### We will begin an analysis of wages by exploring the pairwise relationships between wage groups subdivided into percentiles and the variables we will use as predictors in our statistical models. In the code below the `h2o.cut` function create the wage groups and the `h2o.table` function to performs the cross-tabulations.
120 | 
121 |     cutpoints <- centiles
122 |     cutpoints[1L] <- 0
123 |     adult_2013$CENT_WAGP <- h2o.cut(adult_2013$WAGP, cutpoints)
124 |     adult_2013$TOP2_WAGP <- adult_2013$WAGP > centiles[99L]
125 | 
126 |     centcounts <- h2o.table(adult_2013["CENT_WAGP"], return.in.R = TRUE)
127 |     round(100 * centcounts/sum(centcounts), 2)
128 | 
129 |     top2counts <- h2o.table(adult_2013["TOP2_WAGP"], return.in.R = TRUE)
130 |     round(100 * top2counts/sum(top2counts), 2)
131 | 
132 |     relpxtabs <- h2o.table(adult_2013[c("RELP", "TOP2_WAGP")], return.in.R = TRUE)
133 |     relpxtabs
134 |     round(100 * relpxtabs/rowSums(relpxtabs), 2)
135 | 
136 |     schlxtabs <- h2o.table(adult_2013[c("SCHL", "TOP2_WAGP")], return.in.R = TRUE)
137 |     schlxtabs
138 | 
139 |     round(100 * schlxtabs/rowSums(schlxtabs), 2)
140 | 
141 | ###### Perform a key-value store clean up.
142 | 
143 |     h2o.ls(h2oServer)
144 |     rmLastValues()
145 |     h2o.ls(h2oServer)
146 | 
147 | ### Coerce integer columns to factor (categorical) columns
148 | 
149 | ###### As with standard R integer vectors, integer columns in H2O can be converted to a categorical type using an `as.factor` method. For our data set we have 8 columns that use integer codes to represent categorical levels.
150 | 
151 |     for (j in c("COW", "SCHL", "MAR", "INDP", "RELP", "RAC1P", "SEX", "POBP"))
152 |       adult_2013[[j]] <- as.factor(adult_2013[[j]])
153 | 
154 | ###### Perform a key-value store clean up.
155 | 
156 |     h2o.ls(h2oServer)
157 |     rmLastValues()
158 |     h2o.ls(h2oServer)
159 | 
160 | ### Create pairwise interaction terms for linear modeling
161 | 
162 | ###### While some modeling approaches, such as gradient boosting machines (GBM), random forests, and deep learning, are able to derive interactions between terms during the modeling training stage, other modeling approaches, such as generalized linear models (GLM), require interactions to be user defined inputs. We will use the `h2o.interaction` function to generate a new column in our data set that pairs relationship (`RELP`) with education attainment (`SCHL`) to form a new column labeled `RELP_SCHL` that we will "column bind" to our data set using a `cbind` method.
163 | 
164 |     inter_2013 <- h2o.interaction(adult_2013, factors = c("RELP", "SCHL"),
165 |                                   pairwise = TRUE, max_factors = 10000,
166 |                                   min_occurrence = 10)
167 |     adult_2013 <- cbind(adult_2013, inter_2013)
168 |     adult_2013 <- h2o.assign(adult_2013, key = "adult_2013")
169 |     colnames(adult_2013)
170 | 
171 | ###### Now that we have derived a few sets of variables, we can examine the H2O key-value store to ensure we have the expected objects.
172 | 
173 |     h2o.ls(h2oServer)
174 |     rmLastValues()
175 | 
176 |     kvstore <- h2o.ls(h2oServer)
177 |     kvstore
178 |     kvstore$Bytesize[kvstore$Key == "adult_2013"] / 1024^2
179 | 
180 | ### Generate group by aggregates
181 | 
182 | ###### In addition to cross-tabulations, we can create more detailed group by aggregates using the `h2o.ddply` function that was inspired by the `ddply` function from Hadley Wickham's [plyr](http://cran.r-project.org/web/packages/plyr/index.html) package, which is hosted on the Comprehensive R Archive Network (CRAN).
183 | 
184 |     wagpSummary <- function(frame) {
185 |       cbind(N      = length(frame$WAGP),
186 |             Min    = min(frame$WAGP),
187 |             Median = median(frame$WAGP),
188 |             Max    = max(frame$WAGP),
189 |             Mean   = mean(frame$WAGP),
190 |             StdDev = sd(frame$WAGP))
191 |     }
192 |     h2o.addFunction(h2oServer, wagpSummary)
193 |     statsByGroup <- as.data.frame(h2o.ddply(adult_2013, "RELP", wagpSummary))
194 |     colnames(statsByGroup)[-1L] <- c("N", "Min", "Median", "Mean", "Max", "StdDev")
195 |     statsByGroup <- statsByGroup[order(statsByGroup$Median, decreasing = TRUE), ]
196 |     rownames(statsByGroup) <- NULL
197 |     statsByGroup
198 | 
199 | ### Create training and test data sets to use during modeling
200 | 
201 | ###### As a final step in an exploration of H2O basics in R, we will create a 75% / 25% split, where the larger data set will be used for training a model and the smaller data set will be used for testing the usefulness of the model. We will achieve this by using the `h2o.runif` function to generate random uniforms over `[0, 1]` for each row and using those random values to determine the split designation for that row.
202 | 
203 |     rand <- h2o.runif(adult_2013, seed = 1185)
204 |     adult_2013_train <- adult_2013[rand <= 0.75, ]
205 |     adult_2013_train <- h2o.assign(adult_2013_train, key = "adult_2013_train")
206 |     adult_2013_test <- adult_2013[rand  > 0.75, ]
207 |     adult_2013_test <- h2o.assign(adult_2013_test, key = "adult_2013_test")
208 | 
209 | ###### Now check to make sure the size of the resulting data sets meet expectations.
210 | 
211 |     nrow(adult_2013)
212 |     nrow(adult_2013_train)
213 |     nrow(adult_2013_test)
214 | 
215 | ###### Perform a key-value store clean up.
216 | 
217 |     h2o.ls(h2oServer)
218 |     rmLastValues()
219 |     h2o.ls(h2oServer)
220 | 


--------------------------------------------------------------------------------
/tutorials/bigdataenv/H2OinBigDataEnvironments.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/bigdataenv/H2OinBigDataEnvironments.pdf


--------------------------------------------------------------------------------
/tutorials/devel/droplets/images/1-OpenProject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/droplets/images/1-OpenProject.png


--------------------------------------------------------------------------------
/tutorials/devel/droplets/images/2-JavaDroplet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/droplets/images/2-JavaDroplet.png


--------------------------------------------------------------------------------
/tutorials/devel/droplets/images/3-RebuildProject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/droplets/images/3-RebuildProject.png


--------------------------------------------------------------------------------
/tutorials/devel/droplets/images/4-RunTest.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/droplets/images/4-RunTest.png


--------------------------------------------------------------------------------
/tutorials/devel/droplets/images/5-TestPassed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/droplets/images/5-TestPassed.png


--------------------------------------------------------------------------------
/tutorials/devel/droplets/tutorial.md:
--------------------------------------------------------------------------------
  1 | # Start Developing Your Application On Top of H<sub>2</sub>O-dev
  2 | 
  3 | >This tutorial targets developers who are trying to build application on top of the [h2o-dev](https://github.com/0xdata/h2o-dev) repository. 
  4 | >It will walk you through a quick introduction of _H<sub>2</sub>O droplets_ - projects templates which can be used to build a new application.
  5 | 
  6 | ## 1. Cloning the examples repository
  7 | 
  8 | The `h2o-droplets` repository on GitHub contains some very simple starter projects for different languages.  Let's get started by cloning the `h2o-droplets` repository, and changing to that directory.
  9 | 
 10 | `$ git clone https://github.com/0xdata/h2o-droplets.git`
 11 | 
 12 | ```
 13 | Cloning into 'h2o-droplets'...
 14 | remote: Counting objects: 53, done.
 15 | remote: Compressing objects: 100% (33/33), done.
 16 | remote: Total 53 (delta 10), reused 39 (delta 0)
 17 | Unpacking objects: 100% (53/53), done.
 18 | Checking connectivity... done.
 19 | ```
 20 | 
 21 | `$ cd h2o-droplets`
 22 | 
 23 | > Note: if you are using H<sub>2</sub>O training sandbox, the repository is already cloned for you in `~/devel/h2o-droplets`.
 24 | 
 25 | ## 2. A quick look at the repo contents
 26 | 
 27 | As of this writing, the repository contains a Java example and a Scala example.  Each of these is an independent starter project.
 28 | 
 29 | `$ ls -al`
 30 | 
 31 | ```
 32 | total 8
 33 | drwxr-xr-x   6 tomk  staff   204 Oct 28 08:40 .
 34 | drwxr-xr-x  35 tomk  staff  1190 Oct 28 08:40 ..
 35 | drwxr-xr-x  13 tomk  staff   442 Oct 28 08:40 .git
 36 | -rw-r--r--   1 tomk  staff   322 Oct 28 08:40 README.md
 37 | drwxr-xr-x  11 tomk  staff   374 Oct 28 08:40 h2o-java-droplet
 38 | drwxr-xr-x  11 tomk  staff   374 Oct 28 08:40 h2o-scala-droplet
 39 | ```
 40 | 
 41 | Let's take a closer look at the Java example:
 42 | 
 43 | `$ find h2o-java-droplet -type f`
 44 | 
 45 | ```
 46 | h2o-java-droplet/.gitignore
 47 | h2o-java-droplet/build.gradle
 48 | h2o-java-droplet/gradle/wrapper/gradle-wrapper.jar
 49 | h2o-java-droplet/gradle/wrapper/gradle-wrapper.properties
 50 | h2o-java-droplet/gradle.properties
 51 | h2o-java-droplet/gradlew
 52 | h2o-java-droplet/gradlew.bat
 53 | h2o-java-droplet/README.md
 54 | h2o-java-droplet/settings.gradle
 55 | h2o-java-droplet/src/main/java/water/droplets/H2OJavaDroplet.java
 56 | h2o-java-droplet/src/test/java/water/droplets/H2OJavaDropletTest.java
 57 | ```
 58 | 
 59 | As you can see, the Java example contains of a `build.gradle` file, a Java source file, and a Java test file.
 60 | 
 61 | Look at the `build.gradle` file and you will see the following sections, which link the Java droplet sample project to a version of `h2o-dev` published in MavenCentral:
 62 | 
 63 | ```groovy
 64 | repositories {
 65 |     mavenCentral()
 66 | }
 67 | ```
 68 | 
 69 | ```groovy
 70 | ext {
 71 |   h2oVersion = '0.1.8'
 72 | }
 73 | ```
 74 | 
 75 | ```groovy
 76 | dependencies {
 77 |     // Define dependency on core of H2O
 78 |     compile "ai.h2o:h2o-core:${h2oVersion}"
 79 |     // Define dependency on H2O algorithm 
 80 |     compile "ai.h2o:h2o-algos:${h2oVersion}"
 81 |     // Demands web support 
 82 |     compile "ai.h2o:h2o-web:${h2oVersion}"
 83 | 
 84 |     // H2O uses JUnit for testing 
 85 |     testCompile 'junit:junit:4.11'
 86 | }
 87 | ```
 88 | 
 89 | This is all very standard Gradle stuff.  In particular, note that this example depends on three different H<sub>2</sub>O artifacts, all of which are built in the h2o-dev repository.  
 90 | 
 91 | * **_h2o-core_** contains base platform capabilities like H<sub>2</sub>O's in-memory distributed key/value store and mapreduce frameworks (the "water" package).
 92 | * **_h2o-algos_** contains math algorithms like GLM and Random Forest (the "hex" package).
 93 | * **_h2o-web_** contains the browser web UI (lots of javascript).
 94 | 
 95 | 
 96 | ## 3. Preparing the example for use in your IDE
 97 | 
 98 | Let's walk through an example using IntelliJ IDEA.  The first step is to use Gradle to build your IntelliJ project file.
 99 | 
100 | `$ cd h2o-java-droplet`  
101 | `$ ./gradlew idea`
102 | 
103 | ```
104 | :ideaModule
105 | Download http://repo1.maven.org/maven2/ai/h2o/h2o-core/0.1.8/h2o-core-0.1.8.pom
106 | Download http://repo1.maven.org/maven2/ai/h2o/h2o-algos/0.1.8/h2o-algos-0.1.8.pom
107 | Download http://repo1.maven.org/maven2/ai/h2o/h2o-web/0.1.8/h2o-web-0.1.8.pom
108 | [... many more one-time downloads not shown ...]
109 | :ideaProject
110 | :ideaWorkspace
111 | :idea
112 | 
113 | BUILD SUCCESSFUL
114 | 
115 | Total time: 51.429 secs
116 | ```
117 | 
118 | You will see three new files created with IDEA extensions.  The .ipr file is the project file.
119 | 
120 | `$ ls -al`
121 | 
122 | ```
123 | total 168
124 | drwxr-xr-x  15 tomk  staff    510 Oct 28 10:03 .
125 | drwxr-xr-x   6 tomk  staff    204 Oct 28 08:40 ..
126 | -rw-r--r--   1 tomk  staff    273 Oct 28 08:40 .gitignore
127 | drwxr-xr-x   3 tomk  staff    102 Oct 28 10:03 .gradle
128 | -rw-r--r--   1 tomk  staff   1292 Oct 28 08:40 README.md
129 | -rw-r--r--   1 tomk  staff   1409 Oct 28 08:40 build.gradle
130 | drwxr-xr-x   3 tomk  staff    102 Oct 28 08:40 gradle
131 | -rw-r--r--   1 tomk  staff     23 Oct 28 08:40 gradle.properties
132 | -rwxr-xr-x   1 tomk  staff   5080 Oct 28 08:40 gradlew
133 | -rw-r--r--   1 tomk  staff   2404 Oct 28 08:40 gradlew.bat
134 | -rw-r--r--   1 tomk  staff  33316 Oct 28 10:03 h2o-java-droplet.iml
135 | -rw-r--r--   1 tomk  staff   3716 Oct 28 10:03 h2o-java-droplet.ipr
136 | -rw-r--r--   1 tomk  staff   9299 Oct 28 10:03 h2o-java-droplet.iws
137 | -rw-r--r--   1 tomk  staff     39 Oct 28 08:40 settings.gradle
138 | drwxr-xr-x   4 tomk  staff    136 Oct 28 08:40 src
139 | ```
140 | 
141 | 
142 | ## 4. Opening the project
143 | 
144 |   * Since we have already created the project file, start up IDEA and choose Open Project.
145 | 
146 |   <img alt="Open Project" src="images/1-OpenProject.png" width="640"></img>
147 | 
148 |   * Choose the **_h2o-java-droplet.ipr_** project file that we just created with gradle.
149 | 
150 |   <img alt="JavaDroplet" src="images/2-JavaDroplet.png" width="640"></img>
151 | 
152 | 
153 | ## 5. Running the test inside the project
154 | 
155 |   * Rebuild the project.
156 | 
157 |   <img alt="Rebuild Project" src="images/3-RebuildProject.png" width="640"></img>
158 | 
159 |   * Run the test by right-clicking on the test name.
160 | 
161 |   <img alt="Run test" src="images/4-RunTest.png" width="640"></img>
162 | 
163 |   * Watch the test pass!
164 | 
165 |   <img alt="Test passed" src="images/5-TestPassed.png" width="640"></img>
166 | 
167 | 


--------------------------------------------------------------------------------
/tutorials/devel/sparkling_water/images/h2o-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/sparkling_water/images/h2o-ui.png


--------------------------------------------------------------------------------
/tutorials/devel/sparkling_water/images/rstudio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/sparkling_water/images/rstudio.png


--------------------------------------------------------------------------------
/tutorials/devel/sparkling_water/images/spark-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/devel/sparkling_water/images/spark-ui.png


--------------------------------------------------------------------------------
/tutorials/devel/sparkling_water/tutorial.md:
--------------------------------------------------------------------------------
  1 | # Start with Sparkling Water
  2 | 
  3 | > This short tutorial introduces _Sparkling Water_ project 
  4 | enabling H<sub>2</sub>O platform execution on the top of Spark.
  5 | 
  6 | > The tutorial guides through project building and demonstrates
  7 | > capabilities of _Sparkling Water_ on the example using Spark Shell
  8 | > to build a Deep Learning model with help of H<sub>2</sub>O algorithms and Spark data wrangling. 
  9 | 
 10 | 
 11 | ## Project Repository
 12 | The _Sparkling Water_ project is hosted on GitHub. 
 13 | You can clone it:
 14 | 
 15 | ```
 16 | $ git clone https://github.com/0xdata/sparkling-water.git
 17 | $ cd sparkling-water
 18 | ```
 19 | 
 20 | Or the repository is also already prepared on the provided H<sub>2</sub>O:
 21 | ```
 22 | cd ~/devel/sparkling-water
 23 | ```
 24 | 
 25 | ## Build the Project
 26 | The provided top-level `gradlew` command is used for building:
 27 | ```
 28 | $ ./gradlew build
 29 | ```
 30 | 
 31 | The command produces an assembly artifact which can be
 32 | executed directly on the Spark platform.
 33 | 
 34 | ## Run Sparkling Water
 35 | 
 36 | ### Export location of your Spark distribution 
 37 | 
 38 | ```
 39 | $ export SPARK_HOME=/opt/spark
 40 | ```
 41 | > Note: The provided sandbox image already contains exported shell variable `SPARK_HOME`
 42 | 
 43 | 
 44 | ### Run Sparkling Water
 45 | Start a Spark cluster with _Sparkling Water_:
 46 | ```
 47 | bin/run-sparkling.sh
 48 | ``` 
 49 | 
 50 | Go to [http://localhost:54321/steam/index.html](http://localhost:54321/steam/index.html) to access H<sub>2</sub>O web interface.
 51 | 
 52 | > Note: By default the command creates a Spark cluster specified by `local-cluster[3,2,1024]` Spark master address. Hence, the cluster contains 3 worker nodes, each has running H<sub>2</sub>O services.
 53 | 
 54 | ### Run Sparkling Shell
 55 | Start Spark shell with _Sparkling Water_:
 56 | ```
 57 | bin/sparkling-shell
 58 | ```
 59 | 
 60 | > Note: The command launches regular Spark shell with H<sub>2</sub>O services. To access Spark UI go to [http://localhost:4040](http://localhost:4040), to access H<sub>2</sub>O web UI go to [http://localhost:54321/steam/index.html](http://localhost:54321/steam/index.html).
 61 | 
 62 | ## Step-by-Step Example
 63 | 
 64 | 1. Run Sparkling shell with an embedded cluster consisting of 3 Spark worker nodes:
 65 |   ```
 66 |   export MASTER="local-cluster[3,2,1024]"
 67 |   bin/sparkling-shell
 68 |   ```
 69 | 
 70 | 2. You can go to [http://localhost:4040/](http://localhost:4040/) to see the Sparkling shell (i.e., Spark driver) status.
 71 | 
 72 |   ![H2O Web UI](images/spark-ui.png)
 73 | 
 74 | 3. Create H<sub>2</sub>O cloud using all 3 Spark workers:
 75 |   ```scala
 76 |   import org.apache.spark.h2o._
 77 |   import org.apache.spark.examples.h2o._
 78 |   val h2oContext = new H2OContext(sc).start()
 79 |   import h2oContext._
 80 |   ```
 81 | 
 82 | 4. Load weather data for Chicago international airport (ORD) with help of regular Spark RDD API:
 83 |   ```scala
 84 |   val weatherDataFile = "/data/h2o-training/sparkling-water/weather_ORD.csv"
 85 |   val wrawdata = sc.textFile(weatherDataFile,3).cache()
 86 |   // Parse and skip rows composed only of NAs
 87 |   val weatherTable = wrawdata.map(_.split(",")).map(row => WeatherParse(row)).filter(!_.isWrongRow())
 88 |   ```
 89 | 
 90 | 5. Load and parse flight data using H<sub>2</sub>O API:
 91 |   ```scala
 92 |   import java.io.File
 93 |   val dataFile = "/data/h2o-training/sparkling-water/allyears2k_headers.csv.gz"
 94 |   // Load and parse using H2O parser
 95 |   val airlinesData = new DataFrame(new File(dataFile))
 96 |   ```
 97 | 
 98 | 6. Go to [H<sub>2</sub>O web UI](http://localhost:54321/steam/index.html) and explore data:
 99 | 
100 |   ![H2O Web UI](images/h2o-ui.png)
101 | 
102 | 7. Select flights with destination in Chicago (ORD) with help of Spark API:
103 |   ```scala
104 |   val airlinesTable : RDD[Airlines] = toRDD[Airlines](airlinesData)
105 |   val flightsToORD = airlinesTable.filter(f => f.Dest==Some("ORD"))
106 |   ```
107 |   
108 | 8. Compute number of these flights:
109 |   ```scala
110 |   flightsToORD.count
111 |   ```
112 | 
113 | 9. Use Spark SQL to join flight data with weather data
114 |   ```scala
115 |   import org.apache.spark.sql.SQLContext
116 |   val sqlContext = new SQLContext(sc)
117 |   import sqlContext._
118 |   flightsToORD.registerTempTable("FlightsToORD")
119 |   weatherTable.registerTempTable("WeatherORD")
120 |   ```
121 | 
122 | 10. Perform SQL JOIN on both tables
123 |   ```scala
124 |   val bigTable = sql(
125 |           """SELECT
126 |             |f.Year,f.Month,f.DayofMonth,
127 |             |f.CRSDepTime,f.CRSArrTime,f.CRSElapsedTime,
128 |             |f.UniqueCarrier,f.FlightNum,f.TailNum,
129 |             |f.Origin,f.Distance,
130 |             |w.TmaxF,w.TminF,w.TmeanF,w.PrcpIn,w.SnowIn,w.CDD,w.HDD,w.GDD,
131 |             |f.ArrDelay
132 |             |FROM FlightsToORD f
133 |             |JOIN WeatherORD w
134 |             |ON f.Year=w.Year AND f.Month=w.Month AND f.DayofMonth=w.Day""".stripMargin)
135 |   ```
136 |   
137 | 11. Run deep learning to produce model estimating arrival delay:
138 |   ```scala
139 |   import hex.deeplearning.DeepLearning
140 |   import hex.deeplearning.DeepLearningModel.DeepLearningParameters
141 |   val dlParams = new DeepLearningParameters()
142 |   dlParams._train = bigTable
143 |   dlParams._response_column = 'ArrDelay
144 |   dlParams._epochs = 100
145 |   // Create a job  
146 |   val dl = new DeepLearning(dlParams)
147 |   val dlModel = dl.trainModel.get
148 |   ```
149 | 
150 | 12. Use model to estimate delay on training data
151 |   ```scala
152 |   val predictionH2OFrame = dlModel.score(bigTable)('predict)
153 |   val predictionsFromModel = toRDD[DoubleHolder](predictionH2OFrame).collect.map(_.result.getOrElse(Double.NaN))
154 |   ```
155 |   
156 | 13. Generate a R-code to show residuals graph:
157 |   ```scala
158 |   println(s"""
159 |   #
160 |   # R script for residual plot
161 |   #
162 |   # Import H2O library
163 |   library(h2o)
164 |   # Initialize H2O R-client
165 |   h = h2o.init()
166 |   # Fetch prediction and actual data, use remembered keys
167 |   pred = h2o.getFrame(h, "${predictionH2OFrame._key}")
168 |   act = h2o.getFrame (h, "${rame_rdd_14_b429e8b43d2d8c02899ccb61b72c4e57}")
169 |   # Select right columns
170 |   predDelay = pred$$predict
171 |   actDelay = act$$ArrDelay
172 |   # Make sure that number of rows is same  
173 |   nrow(actDelay) == nrow(predDelay)
174 |   # Compute residuals  
175 |   residuals = predDelay - actDelay
176 |   # Plot residuals   
177 |   compare = cbind (as.data.frame(actDelay$$ArrDelay), as.data.frame(residuals$$predict))
178 |   nrow(compare)
179 |   plot( compare[,1:2] )
180 |   """)
181 |   ```
182 | 
183 | 14. Open RStudio and execute generated code:
184 | 
185 |   ![RStudio](images/rstudio.png)
186 | 
187 |   > Note: RStudio has to contain the newest H2O-DEV client library.
188 | 
189 | 
190 | ## More information
191 |   * [_Sparkling Water_ GitHub](http://github.com/0xdata/sparkling-water)
192 |   * [H<sub>2</sub>O YouTube Channel](https://www.youtube.com/user/0xdata)
193 |   * [H<sub>2</sub>O SlideShare](http://www.slideshare.net/0xdata)
194 |   * [H<sub>2</sub>O Blog](http://h2o.ai/blog)
195 |   


--------------------------------------------------------------------------------
/tutorials/extab/excel.md:
--------------------------------------------------------------------------------
 1 | # Beauty and Big Data: Excel
 2 | 
 3 | ## Using Excel with H<sub>2</sub>O
 4 | 
 5 | When working with Excel the HTTP call to H2O will elicit a response in XML format that Microsoft Excel can parse in both 32-bit and 64-bit version. For Excel users that are already familiar with excel features and functions, this tutorial will work you through some basic H2O capabilities written in VBA. So the demonstration Excel file has to be macro enabled to access all the point and click features.
 6 | 
 7 | ### Initialization and Data Import
 8 | 
 9 | ![Setup Excel](images/excel01.png)
10 | 
11 |   * Start up a H2O session
12 |   * Input for *IP Address*: 'localhost'
13 |   * Input for *Port*: '54321'
14 |   * Input for *Heap Size*: '1g'
15 |   * Input for *Number of Nodes*: '1'
16 |   * Input for *File Path*: '/data/h2o-training/airlines/allyears2k.csv'
17 |   * Hit Submit to import the data with a progress bar in the lower left hand corner 
18 |   * The entry “H2O Data Hex Key” should automatically fill in with the destination key of the hex file now sitting in H2O
19 | 
20 | ### Data Summary
21 | 
22 |   * Hit *Generate Summary* to create a new worksheet with a summary of all the columns in the data set
23 |   * All columns except “NA%” are taken directly from H2O’s inspect page 
24 | 
25 | ![Summary](images/excel02.png)
26 | 
27 | ### Build GLM Model
28 | 
29 | ![Configure GLM Model](images/excel03.png)
30 | 
31 |   * Input for *Response Variable*: 'IsDepDelayed'
32 |   * Then hit “Populate Predictor Variables,” which will eliminate the response variable from the Predictor Variables as well as highlight the variables with high NA counts to ignore.
33 |   * Input for **family**: 'binomial'
34 |   * Fill in other information such as lambda, alpha, and the number of cross validation you want to run. 
35 |   * Hit submit to start the model build
36 | 
37 | ### Visualize Model Output
38 |   * On the *Output Models* section, select a model you'll like to visualize
39 |   * Hit *Show Output* to show the coefficients for the GLM model.
40 |   * Create a bar chart and sort the coefficients by magnitude.
41 | 
42 | ![Visualize Model Ouputs](images/excel04.png)
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/tutorials/extab/images/excel01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/excel01.png


--------------------------------------------------------------------------------
/tutorials/extab/images/excel02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/excel02.png


--------------------------------------------------------------------------------
/tutorials/extab/images/excel03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/excel03.png


--------------------------------------------------------------------------------
/tutorials/extab/images/excel04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/excel04.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_dashboard.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_data_connection1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_data_connection1.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_data_connection2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_data_connection2.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_execute.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_execute.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_execute2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_execute2.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_execute3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_execute3.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_execute4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_execute4.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_h2o_parameters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_h2o_parameters.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_r_connection1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_r_connection1.png


--------------------------------------------------------------------------------
/tutorials/extab/images/tableau_r_connection2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/tableau_r_connection2.png


--------------------------------------------------------------------------------
/tutorials/extab/images/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/extab/images/workflow.png


--------------------------------------------------------------------------------
/tutorials/extab/tableau.md:
--------------------------------------------------------------------------------
  1 | # Beauty and Big Data: Tableau
  2 | 
  3 | ## Tableau
  4 | 
  5 | Download [Tableau](http://www.tableausoftware.com/) in order to use notebooks available on our [github](https://github.com/0xdata/h2o/tree/master/tableau).
  6 | 
  7 | ### How does Tableau play with H2O?
  8 | 
  9 | Tableau is the frontend visual organizer that utilizes all the available statistic tools from open source R and H2O. Tableau will connect to R via a socket server using a library package already built for R. The H2O client package available for installation allows R to connect and communicate to H2O via a REST API. So by connecting Tableau to R, Tableau essentially can launch or initiate H2O and run any of the features already available for R.
 10 | 
 11 | <img alt="Integration Workflow" src="images/workflow.png" width="640"></img>
 12 | 
 13 | ### R Component
 14 | 
 15 | First, make sure to [install H<sub>2</sub>O](http://docs.0xdata.com/Ruser/Rinstall.html#r-installation) in R:
 16 | 
 17 | ```
 18 | > install.packages("h2o")
 19 | ```
 20 | 
 21 | Then install the Rserve package in R that will allow the user to start up a R session on a local server that Tableau will communicate with:
 22 | 
 23 | ```
 24 | > install.packages("Rserve")
 25 | > library("Rserve")
 26 | > run.Rserve(port = 6311)
 27 | ```
 28 | 
 29 | ### Tableau Front End
 30 | 
 31 |   * **Step 1: Connection Setup**
 32 |   
 33 | 	Open Demo_Template_8.1.twb which should have all the calculated fields containing R script already in the sidebar.
 34 | 
 35 | 	Navigate to “Help > Settings and Performance > Manage R Connection” to establish a connection to the R serve.
 36 | 	
 37 | 	![Manage R Connection](images/tableau_r_connection1.png)
 38 | 	  
 39 | 	Input for IP: 'localhost' and for port: '54321'
 40 | 
 41 | 	![Manage R Connection](images/tableau_r_connection2.png)
 42 | 
 43 |   * **Step 2: Data Connection**
 44 |   	
 45 | 	Set the workbook’s connection to the airlines_meta.csv data by navigating to the data section on the left sidebar, right clicking on the airlines_meta and choosing to “Edit Connection.”
 46 | 
 47 | 	![Data Connection](images/tableau_data_connection1.png)
 48 | 
 49 | 	![Data Connection](images/tableau_data_connection2.png)
 50 | 	
 51 | 
 52 |   * **Step 3: H2O Initialization**
 53 | 
 54 | 	Configure the IP Address and Port that H2O will launch at as well as the path to the full airlines data file.
 55 | 
 56 | 	![H2O Parameters](images/tableau_h2o_parameters.png)
 57 |  
 58 |   * **Step 4: Data Import**
 59 | 
 60 | 	![Execute function](images/tableau_execute.png)
 61 | 	
 62 | 	Execute “00 Load H2O and Tableau functions” to run:
 63 | 
 64 | 	```
 65 | 	> library(h2o)
 66 | 	> tableauFunctions <- functions(x){
 67 | 	> ...
 68 | 	> }
 69 | 	> print('Finish loading necessary functions')
 70 | 	```
 71 | 
 72 | 	Execute “01 Init H2O & Parse Data” to:
 73 | 
 74 | 	```
 75 | 	> library(h2o)
 76 | 	> localH2O = h2o.init(ip = "localhost", port = 54321, nthreads = -1)
 77 | 	> data = h2o.importFile(localH2O, "/data/h2o-training/airlines/allyears2k.csv")
 78 | 	```
 79 | 
 80 | 	Execute “02 Compute Aggregation with H2O’s ddply” to groupby columns and do roll ups. First calculate the number of flights coming and going per month:
 81 | 	```
 82 | 	numFlights = ddply(data.hex, 'Month', nrow)
 83 | 	numFlights.R = as.data.frame(numFlights)
 84 | 	```
 85 | 	
 86 | 	Then compute the number of cancelled flights per month:
 87 | 
 88 | 	```
 89 | 	fun2 = function(df) {sum(df$Cancelled)}
 90 | 	h2o.addFunction(h2oLocal, fun2)
 91 | 	canFlights = ddply(data.hex, 'Month', fun2)
 92 | 	canFlights.R = as.data.frame(canFlights)
 93 | 	```
 94 | 	
 95 | 	Execute “03 Run GLM” to build a GLM model in H2O and grab back coefficient values that will be plotted in multiple worksheets:
 96 | 
 97 | 	```
 98 | 	data.glm = h2o.glm(x = c('Origin', 'Dest', 'Distance', 'Unique Carrier') , y = 'Cancelled', data = data.hex, family = 'binomial', nfolds = 0, standardize=TRUE)
 99 | 	```
100 | 
101 |   * After the calculated fields finishes running, scroll through the different dashboards to see data differently.
102 | 
103 | 	<img alt="Tableau Dashboard" src="images/tableau_dashboard.png" width="1024"></img>
104 | 


--------------------------------------------------------------------------------
/tutorials/hive_udf_template/GBM-example.R:
--------------------------------------------------------------------------------
 1 | rmLastValues <- function(pattern = "Last.value.")
 2 | {
 3 |   keys <- h2o.ls(h2oServer, pattern = pattern)$Key
 4 |   if (!is.null(keys))
 5 |     h2o.rm(h2oServer, keys)
 6 |   invisible(keys)
 7 | }
 8 | 
 9 | # "Safe" system.  Error checks process exit status code.  stop() if it failed.
10 | safeSystem <- function(x) {
11 |    print(sprintf("+ CMD: %s", x))
12 |    res <- system(x)
13 |    print(res)
14 |    if (res != 0) {
15 |        msg <- sprintf("SYSTEM COMMAND FAILED (exit status %d)", res)
16 |        stop(msg)
17 |    }
18 | }
19 | 
20 | myIP <- "localhost"
21 | myPort <- 54321
22 | 
23 | library(h2o)
24 | h2oServer <- h2o.init(ip = myIP, port = myPort, startH2O = TRUE)
25 | 
26 | 
27 | pumsdir <- file.path("/Users/myhomedir/data/pums2013")
28 | trainfile <- "adult_2013_train.csv.gz"
29 | testfile  <- "adult_2013_test.csv.gz"
30 | 
31 | adult_2013_train <- h2o.importFile(h2oServer,
32 |                                    path = file.path(pumsdir, trainfile),
33 |                                    key = "adult_2013_train", sep = ",")
34 | 
35 | adult_2013_test <- h2o.importFile(h2oServer,
36 |                                   path = file.path(pumsdir, testfile),
37 |                                   key = "adult_2013_test", sep = ",")
38 | 
39 | dim(adult_2013_train)
40 | dim(adult_2013_test)
41 | 
42 | actual_log_wagp <- h2o.assign(adult_2013_test[, "LOG_WAGP"],
43 |                               key = "actual_log_wagp")
44 | rmLastValues()
45 | 
46 | for (j in c("COW", "SCHL", "MAR", "INDP", "RELP", "RAC1P", "SEX", "POBP")) {
47 |   adult_2013_train[[j]] <- as.factor(adult_2013_train[[j]])
48 |   adult_2013_test[[j]]  <- as.factor(adult_2013_test[[j]])
49 | }
50 | rmLastValues()
51 | 
52 | predset <- c("RELP", "SCHL", "COW", "MAR", "INDP", "RAC1P", "SEX", "POBP", "AGEP",
53 |                 "WKHP", "LOG_CAPGAIN", "LOG_CAPLOSS")
54 | 
55 | log_wagp_gbm_grid <- h2o.gbm(x = predset,
56 |                              y = "LOG_WAGP",
57 |                              data = adult_2013_train,
58 |                              key  = "log_wagp_gbm_grid",
59 |                              distribution = "gaussian",
60 |                              interaction.depth = c(5, 7),
61 |                              n.trees = 110,
62 |                              shrinkage = c(0.25, 0.275),
63 |                              validation = adult_2013_test,
64 |                              importance = TRUE)
65 | log_wagp_gbm_grid
66 | 
67 | 
68 | log_wagp_gbm_best <- log_wagp_gbm_grid@model[[1L]]
69 | log_wagp_gbm_best
70 | 
71 | model_key <- log_wagp_gbm_best@key
72 | tmpdir_name <- "generated_model"
73 | cmd <- sprintf("rm -fr %s", tmpdir_name)
74 | safeSystem(cmd)
75 | cmd <- sprintf("mkdir %s", tmpdir_name)
76 | safeSystem(cmd)
77 | cmd <- sprintf("curl -o %s/GBMPojo.java http://%s:%d/2/GBMModelView.java?_modelKey=%s", tmpdir_name, myIP, myPort, model_key)
78 | safeSystem(cmd)
79 | cmd <- sprintf("curl -o %s/h2o-model.jar http://127.0.0.1:54321/h2o-model.jar", tmpdir_name)
80 | safeSystem(cmd)
81 | cmd <- sprintf("sed -i '' 's/class %s/class GBMPojo/' %s/GBMPojo.java", model_key, tmpdir_name)
82 | safeSystem(cmd)
83 | 
84 | h2o.predict(log_wagp_gbm_best, adult_2013_test)
85 | 
86 | h2o.mse(h2o.predict(log_wagp_gbm_best, adult_2013_test),
87 |         actual_log_wagp)


--------------------------------------------------------------------------------
/tutorials/hive_udf_template/README.md:
--------------------------------------------------------------------------------
1 | # Using H2O for Scoring in Hive
2 | 
3 | This directory holds the template code and an example model for using an H2O model to score data as a Hive UDF.
4 | 
5 | Instructions can be found in the related blog post on the H2O.ai website.
6 | 


--------------------------------------------------------------------------------
/tutorials/hive_udf_template/localjars/h2o-model.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/hive_udf_template/localjars/h2o-model.jar


--------------------------------------------------------------------------------
/tutorials/hive_udf_template/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 	<groupId>com.h2o.hive.udf</groupId>
  5 | 	<artifactId>ScoreData</artifactId>
  6 | 	<packaging>jar</packaging>
  7 | 	<version>1.0-SNAPSHOT</version>
  8 | 	<name>ScoreData</name>
  9 | 	<url>http://maven.apache.org</url>
 10 | 	<properties>
 11 | 		<hive.version>0.14.0.2.2.0.0-2041</hive.version>
 12 | 		<hadoop.version>2.6.0.2.2.0.0-2041</hadoop.version>
 13 | 	</properties>
 14 | 	<build>
 15 | 		<pluginManagement>
 16 | 			<plugins>
 17 | 				<plugin>
 18 | 					<groupId>org.apache.maven.plugins</groupId>
 19 | 					<artifactId>maven-compiler-plugin</artifactId>
 20 | 					<version>2.3.2</version>
 21 | 					<configuration>
 22 | 						<source>1.6</source>
 23 | 						<target>1.6</target>
 24 | 					</configuration>
 25 | 				</plugin>
 26 | 			</plugins>
 27 | 		</pluginManagement>
 28 | 	</build>
 29 | 	<dependencies>
 30 | 		<dependency>
 31 | 			<groupId>water</groupId>
 32 | 			<artifactId>genmodel</artifactId>
 33 | 			<scope>system</scope>
 34 | 			<version>1.0</version>
 35 | 			<systemPath>${project.basedir}/localjars/h2o-model.jar</systemPath>
 36 | 		</dependency>
 37 | 		<dependency>
 38 | 			<groupId>junit</groupId>
 39 | 			<artifactId>junit</artifactId>
 40 | 			<version>4.8.2</version>
 41 | 			<scope>test</scope>
 42 | 		</dependency>
 43 | 		<dependency>
 44 | 			<groupId>org.apache.hive</groupId>
 45 | 			<artifactId>hive-exec</artifactId>
 46 | 			<version>${hive.version}</version>
 47 | 		</dependency>
 48 | 		<dependency>
 49 | 			<groupId>org.apache.hadoop</groupId>
 50 | 			<artifactId>hadoop-common</artifactId>
 51 | 			<version>${hadoop.version}</version>
 52 | 		</dependency>
 53 | 		<dependency>
 54 | 			<groupId>org.mortbay.jetty</groupId>
 55 | 			<artifactId>jetty-util</artifactId>
 56 | 			<version>6.1.26.hwx</version>
 57 | 		</dependency>
 58 | 		<dependency>
 59 | 			<groupId>org.mortbay.jetty</groupId>
 60 | 			<artifactId>jetty</artifactId>
 61 | 			<version>6.1.26.hwx</version>
 62 | 		</dependency>
 63 | 	</dependencies>
 64 | 	<repositories>
 65 | 		<repository>
 66 | 			<releases>
 67 | 				<enabled>true</enabled>
 68 | 				<updatePolicy>always</updatePolicy>
 69 | 				<checksumPolicy>warn</checksumPolicy>
 70 | 			</releases>
 71 | 			<snapshots>
 72 | 				<enabled>false</enabled>
 73 | 				<updatePolicy>never</updatePolicy>
 74 | 				<checksumPolicy>fail</checksumPolicy>
 75 | 			</snapshots>
 76 | 			<id>HDPReleases</id>
 77 | 			<name>HDP Releases</name>
 78 | 			<url>http://repo.hortonworks.com/content/repositories/releases/</url>
 79 | 			<layout>default</layout>
 80 | 		</repository>
 81 | 		<repository>
 82 | 			<releases>
 83 | 				<enabled>true</enabled>
 84 | 				<updatePolicy>always</updatePolicy>
 85 | 				<checksumPolicy>warn</checksumPolicy>
 86 | 			</releases>
 87 | 			<snapshots>
 88 | 				<enabled>false</enabled>
 89 | 				<updatePolicy>never</updatePolicy>
 90 | 				<checksumPolicy>fail</checksumPolicy>
 91 | 			</snapshots>
 92 | 			<id>HDPReleases-jetty-hadoop</id>
 93 | 			<name>HDP Releases-jetty-hadoop</name>
 94 | 			<url>http://repo.hortonworks.com/content/repositories/jetty-hadoop/</url>
 95 | 			<layout>default</layout>
 96 | 		</repository>
 97 | <!--
 98 | 			 <repository>
 99 | 				 <id>cloudera</id>
100 | 				 <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
101 | 			 </repository>
102 | -->
103 | 		 </repositories>
104 | 	 </project>
105 | 


--------------------------------------------------------------------------------
/tutorials/hive_udf_template/src/main/java/com/h2o/hive/udf/ScoreDataUDF.java:
--------------------------------------------------------------------------------
  1 | 
  2 | package com.h2o.hive.udf;
  3 | import java.util.Arrays;
  4 | import java.util.ArrayList;
  5 | import org.apache.hadoop.hive.ql.udf.UDFType;
  6 | import org.apache.hadoop.hive.ql.exec.Description;
  7 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  8 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
  9 | import org.apache.hadoop.hive.ql.metadata.HiveException;
 10 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 11 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 12 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 13 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 14 | 
 15 | @UDFType(deterministic = true, stateful = false)
 16 | @Description(name="scoredata", value="_FUNC_(*) - Returns a score for the given row",
 17 |         extended="Example:\n"+"> SELECT scoredata(*) FROM target_data;")
 18 | 
 19 | class ScoreDataUDF extends GenericUDF {
 20 |   private PrimitiveObjectInspector[] inFieldOI;
 21 |   GBMPojo p = new GBMPojo();
 22 | 
 23 |   @Override
 24 |   public String getDisplayString(String[] args) {
 25 |     return "scoredata("+Arrays.asList(args)+").";
 26 |   }
 27 |   @Override
 28 |   public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
 29 |     // Basic argument count check
 30 |     // Expects one less argument than model used; results column is dropped
 31 |     if (args.length != GBMPojo.NAMES.length-1) {
 32 |       throw new UDFArgumentLengthException("Incorrect number of arguments." +
 33 |               "  scoredata() requires: "+ Arrays.asList(GBMPojo.NAMES).subList(0,GBMPojo.NAMES.length-1)
 34 |               +", in the listed order.");
 35 |     }
 36 | 
 37 |     //Check input types
 38 |     inFieldOI = new PrimitiveObjectInspector[args.length];
 39 |     PrimitiveObjectInspector.PrimitiveCategory pCat;
 40 |     for (int i = 0; i < args.length; i++) {
 41 |       if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE)
 42 |         throw new UDFArgumentException("scoredata(...): Only takes primitive field types as parameters");
 43 |       pCat = ((PrimitiveObjectInspector) args[i]).getPrimitiveCategory();
 44 |       if (pCat != PrimitiveObjectInspector.PrimitiveCategory.STRING
 45 |               && pCat != PrimitiveObjectInspector.PrimitiveCategory.DOUBLE
 46 |               && pCat != PrimitiveObjectInspector.PrimitiveCategory.FLOAT
 47 |               && pCat != PrimitiveObjectInspector.PrimitiveCategory.LONG
 48 |               && pCat != PrimitiveObjectInspector.PrimitiveCategory.INT
 49 |               && pCat != PrimitiveObjectInspector.PrimitiveCategory.SHORT)
 50 |         throw new UDFArgumentException("scoredata(...): Cannot accept type: " + pCat.toString());
 51 |       inFieldOI[i] = (PrimitiveObjectInspector) args[i];
 52 |     }
 53 | 
 54 |     // the return type of our function is a float, so we provide the correct object inspector
 55 |     return PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
 56 |   }
 57 | 
 58 |   @Override
 59 |   public Object evaluate(DeferredObject[] record) throws HiveException {
 60 |     // Expects one less argument than model used; results column is dropped
 61 |     if (record != null && record.length == GBMPojo.NAMES.length-1) {
 62 |       double[] data = new double[record.length];
 63 |       //Sadly, HIVE UDF doesn't currently make the field name available.
 64 |       //Thus this UDF must depend solely on the arguments maintaining the same
 65 |       // field order seen by the original H2O model creation.
 66 |       for (int i = 0; i < record.length; i++) {
 67 | 	try {
 68 |           Object o = inFieldOI[i].getPrimitiveJavaObject(record[i].get());
 69 |           if (o instanceof java.lang.String) {
 70 |             // Hive wraps strings in double quotes, remove
 71 |             data[i] = p.mapEnum(i,((String) o).replace("\"",""));
 72 |             if (data[i] == -1)
 73 |               throw new UDFArgumentException("scoredata(...): Negative enum val from : "
 74 |                       + o +" for argument # "+i+".");
 75 |           } else if (o instanceof Double) {
 76 |             data[i] = ((Double)o).doubleValue();
 77 |           } else if (o instanceof Float) {
 78 |             data[i] = ((Float)o).doubleValue();
 79 |           } else if (o instanceof Long) {
 80 |             data[i] = ((Long)o).doubleValue();
 81 |           } else if (o instanceof Integer) {
 82 |             data[i] = ((Integer)o).doubleValue();
 83 |           } else if (o instanceof Short) {
 84 |             data[i] = ((Short)o).doubleValue();
 85 |           } else if (o == null) {
 86 |             return null;
 87 |           } else {
 88 |             throw new UDFArgumentException("scoredata(...): Cannot accept type: "
 89 |                     + o.getClass().toString()+" for argument # "+i+".");
 90 |           }
 91 | 	} catch (Throwable e) { throw new UDFArgumentException("Unexpected exception on argument # " + i + ". " + e.toString()); }
 92 |       }
 93 |       // get the predictions
 94 |       try {
 95 |         float[] preds = new float[p.getPredsSize()];
 96 |         p.predict(data, preds);
 97 |         return preds[0];
 98 |       } catch ( Throwable e) { throw new UDFArgumentException("H2O predict function threw exception: " + e.toString()); }
 99 |     } else {
100 |       if (record == null) return null; //throw new UDFArgumentException("scoredata() received a NULL row.");
101 |       else throw new UDFArgumentException("Incorrect number of arguments." +
102 |               "  scoredata() requires: "+ Arrays.asList(GBMPojo.NAMES).subList(0,GBMPojo.NAMES.length-1)+", in order.");
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/tutorials/hive_udf_template/src/test/java/com/h2o/hive/udf/UDFExampleTest.java:
--------------------------------------------------------------------------------
 1 | package com.h2o.hive.udf;
 2 | 
 3 | import org.apache.hadoop.hive.ql.metadata.HiveException;
 4 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
 5 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
 6 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 7 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaFloatObjectInspector;
 8 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 9 | 
10 | import org.junit.Test;
11 | import junit.framework.Assert;
12 | 
13 | public class UDFExampleTest {
14 |   @Test public void testUDFReturnsCorrectValues() throws HiveException {
15 |     // set up the models we need
16 |     ScoreDataUDF example = new ScoreDataUDF();
17 |     //From the test data set: "RELP", "SCHL", "COW", "MAR", "INDP", "RAC1P", "SEX", "POBP", "AGEP", "WKHP", "LOG_CAPGAIN", "LOG_CAPLOSS"
18 |     ObjectInspector RELP_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
19 |     ObjectInspector SCHL_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
20 |     ObjectInspector COW_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
21 |     ObjectInspector MAR_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
22 |     ObjectInspector INDP_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
23 |     ObjectInspector RAC1P_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
24 |     ObjectInspector SEX_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
25 |     ObjectInspector POBP_OI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
26 |     ObjectInspector AGEP_OI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
27 |     ObjectInspector WKHP_OI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
28 |     ObjectInspector LOG_CAPGAIN_OI = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
29 |     ObjectInspector LOG_CAPLOSS_OI = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
30 |     JavaFloatObjectInspector resultInspector = (JavaFloatObjectInspector) example.initialize(new ObjectInspector[]{RELP_OI,
31 |             SCHL_OI, COW_OI, MAR_OI, INDP_OI, RAC1P_OI, SEX_OI, POBP_OI, AGEP_OI, WKHP_OI, LOG_CAPGAIN_OI, LOG_CAPLOSS_OI });
32 |     // test our results
33 |     // Data from first line of test file: "0" "21" "1" "1" "7590" "1" "2" "1" 48 40 0.0 0.0
34 |     Object result1 = example.evaluate(new DeferredObject[]{new DeferredJavaObject("0"), new DeferredJavaObject("21"), // RELP, SCHL
35 |             new DeferredJavaObject("1"), new DeferredJavaObject("1"), new DeferredJavaObject("7590"), // COW, MAR, INDP
36 |             new DeferredJavaObject("1"), new DeferredJavaObject("2"),new DeferredJavaObject("1"), // RAC1P, SEX, POBP
37 |             new DeferredJavaObject(48), new DeferredJavaObject(40), new DeferredJavaObject(0.0), // AGEP, WKHP, LOG_CAPGAIN
38 |             new DeferredJavaObject(0.0)}); // LOG_CAPLOSS
39 |     Assert.assertEquals(10.476669311523438, resultInspector.get(result1), Math.ulp(resultInspector.get(result1)));
40 |     // Wrong number of arguments
41 |     Object result2 = example.evaluate(new DeferredObject[]{new DeferredJavaObject("0"), new DeferredJavaObject("21")});
42 |     Assert.assertNull(result2);
43 |     // Arguments are null
44 |     Object result3 = example.evaluate(new DeferredObject[]{new DeferredJavaObject(null), new DeferredJavaObject(null), // RELP, SCHL
45 |             new DeferredJavaObject(null), new DeferredJavaObject(null),new DeferredJavaObject(null), // RAC1P, SEX, POBP
46 |             new DeferredJavaObject(null), new DeferredJavaObject(null), new DeferredJavaObject(null), // AGEP, WKHP, LOG_CAPGAIN
47 |             new DeferredJavaObject(null)}); // LOG_CAPLOSS
48 |     Assert.assertNull(result3);
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/tutorials/marketing_usecases/h2o_training_yan_2014.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/marketing_usecases/h2o_training_yan_2014.pdf


--------------------------------------------------------------------------------
/tutorials/marketing_usecases/h2o_world_Vinod.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/marketing_usecases/h2o_world_Vinod.pdf


--------------------------------------------------------------------------------
/tutorials/marketing_usecases/marketing_usecases.R.md:
--------------------------------------------------------------------------------
 1 | # Marketing Usecases - KDDCup98
 2 | ####Use KDDCup98 dataset to design an intelligent direct mail campaign to maximize the overall profit (R tryout vs H2O solution)
 3 | 
 4 | ## - Use the dataset with selected features to try to run random forests on R
 5 | 
 6 |         rm(list=ls())
 7 |         setwd("/data/h2o-training/")
 8 |         Kdd98 <- read.csv("cup98LRN_z.csv")
 9 |         
10 | 
11 |         featureSet <- c("ODATEDW", "OSOURCE", "STATE", "ZIP", "PVASTATE", "DOB", "RECINHSE", "MDMAUD", "DOMAIN", "CLUSTER", "AGE", 
12 |         "HOMEOWNR", "CHILD03", "CHILD07",  "CHILD12", "CHILD18", "NUMCHLD", "INCOME", "GENDER", "WEALTH1", "HIT", "COLLECT1", 
13 |         "VETERANS", "BIBLE", "CATLG", "HOMEE", "PETS", "CDPLAY", "STEREO", "PCOWNERS", "PHOTO", "CRAFTS", "FISHER", "GARDENIN", 
14 |         "BOATS",  "WALKER", "KIDSTUFF", "CARDS", "PLATES", "PEPSTRFL", "CARDPROM", "MAXADATE", "NUMPROM", "CARDPM12", "NUMPRM12", 
15 |         "RAMNTALL", "NGIFTALL", "CARDGIFT", "MINRAMNT", "MAXRAMNT", "LASTGIFT", "LASTDATE", "FISTDATE", "TIMELAG", "AVGGIFT", 
16 |         "HPHONE_D", "RFA_2F", "RFA_2A", "MDMAUD_R", "MDMAUD_F", "MDMAUD_A", "CLUSTER2", "GEOCODE2", "TARGET_D")
17 | 
18 | 
19 | 
20 | 
21 |         kdd98 <- Kdd98[, setdiff(featureSet, c("CONTROLN", "TARGET_B"))]
22 |         ls()
23 |         library(randomForest)
24 |         rf <- randomForest(TARGET_D ~ ., data=kdd98)
25 | 
26 | ######You'll quickly see an error message - "Error in na.fail.default(list(TARGET_D = c(0, 0, 0, 0, 0, 0, 0, 0, 0,  :   missing values in object"
27 | 
28 | 		library(party)
29 | 		cf <- cforest(TARGET_D ~ ., data= kdd98, control = cforest_unbiased(mtry=2, ntree=50))
30 | 		
31 | 		
32 | ######After a while, a window will pop up showing "Unable to establish connection with R session". In order to continue to use Rstudio, you may go to the task manager to kill rsession. This demonstrates cforest runs out of memory for this trimmed dataset and cannot return any results.
33 | 
34 | ## - Use the complete dataset to run big data random forest with H2O
35 | 
36 | #####Let's use H2O to build a big data random forest model and predict who to mail and how much profit the fund-raising campaign may generate. 
37 | #####To run H2O on your VM, double click the H2O icon on your VM desktop. 
38 | #####Then open a browser, type localhost:54321 to access H2O web UI. 
39 | #####Let's start uploading the datasets, build the model, and do prediction.
40 | #####Name the prediction key "drf_prediction" and download to csv after predicting. 
41 | 
42 | ##-- Evaluate the total profit from the prediction 
43 | #####After getting the prediction, then run this with the H2O prediction.
44 |         kdd98v <- read.csv("cup98VAL_z.csv")		# read test data
45 |         kdd_pred = read.csv("drf_prediction.csv")		# read prediction value
46 |         kdd_pred_val <- apply(kdd_pred,1,function(row) if (row[1] > 0.68) 1 else 0 )
47 |         kdd98_withpred <- cbind(kdd98v, kdd_pred_val)
48 |         dim(kdd98v)
49 |         dim(kdd98_withpred)
50 |         kdd98_withpred$yield <- apply(kdd98_withpred,1,function(row) (as.numeric(row['TARGET_D']) - 0.68) * as.numeric(row[483]))
51 | 
52 | 
53 | 
54 |         sum(kdd98_withpred$yield)			# profit
55 |         max(kdd98_withpred$yield)			# max donation
56 |         sum(kdd_pred_val)				# mails sent
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/tutorials/setup/images/01_virtualbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/setup/images/01_virtualbox.png


--------------------------------------------------------------------------------
/tutorials/setup/images/02_vb_menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/setup/images/02_vb_menu.png


--------------------------------------------------------------------------------
/tutorials/setup/images/03_select_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/setup/images/03_select_file.png


--------------------------------------------------------------------------------
/tutorials/setup/images/04_import_wizard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/setup/images/04_import_wizard.png


--------------------------------------------------------------------------------
/tutorials/setup/install.md:
--------------------------------------------------------------------------------
 1 | # Setup H<sub>2</sub>O World Training Sandbox
 2 | 
 3 | ## 1. Prerequisites
 4 |   * VirtualBox
 5 |     * Provided on H<sub>2</sub>O World USB sticks, or
 6 |     * Available for [download](https://www.virtualbox.org/wiki/Downloads)
 7 |     
 8 |   * H<sub>2</sub>O World Training Sandbox Image (*.ova* format)
 9 |     * Provided on H<sub>2</sub>O World USB sticks, or
10 |     * Available for [download](https://s3.amazonaws.com/h2o-release/h2o/sandbox/h2oworld-training.ova) (7GB)
11 | 
12 | > Note: You can use also your favorite virtualization tool like VMWare Fusion, VMWare Player, or Microsoft Hyper-V.
13 | 
14 | ## 2. Setup Sandbox
15 |   * Open VirtualBox
16 |   ![VirtualBox Application](images/01_virtualbox.png)
17 | 
18 |   * Select the menu item _File > Import Appliance_ 
19 |   ![VirtualBox Menu](images/02_vb_menu.png)
20 | 
21 |   * Import wizard is shown
22 |   ![Select file](images/03_select_file.png)
23 | 
24 |   * Open provided _h2oworld-training.ova_ file
25 |   ![Select file](images/04_import_wizard.png)
26 | 
27 |   * Review imported image
28 |  
29 | ## 3. Launch Sandbox
30 |   * Select h2oworld-training item and click on _Start_ icon
31 |   
32 |   * Login screen
33 | 
34 | ### 3.1 Sandbox Credentials
35 |  * user: `h2o`, password: `h2o`
36 | 
37 | > Note: To access _root_ user account, use `sudo su -` or password `h2o`.
38 |  
39 | 
40 | ## 4. Sandbox Content
41 |   * [H<sub>2</sub>O v2.8.2.8 - "Maxwell"](http://h2o-release.s3.amazonaws.com/h2o/rel-maxwell/8/index.html)
42 |   * [Sparkling Water v0.2.1-31](http://h2o-release.s3.amazonaws.com/sparkling-water/master/31/index.html)
43 |   * [RStudio Desktop v0.98.1091](http://www.rstudio.com/products/rstudio/download/)
44 |   * [R version 3.1.1 (2014-07-10) - "Sock it to Me"](http://www.r-project.org)
45 |     * [H<sub>2</sub>O R package v2.8.2.8](http://h2o-release.s3.amazonaws.com/h2o/rel-maxwell/8/index.html#R)
46 |   * [Hortonworks HDP2.1 Sandbox](http://hortonworks.com/products/hortonworks-sandbox/#install)
47 |   * [Spark v1.1.0 for Hadoop 2.4](https://spark.apache.org/downloads.html)
48 |   * Git v1.7.1
49 |   * Oracle JDK v1.7.0_45
50 |   * [IntelliJ Idea 14 Community Edition](https://www.jetbrains.com/idea/download/)
51 |   
52 | 


--------------------------------------------------------------------------------
/tutorials/streaming/storm/H2OStormStarter.java:
--------------------------------------------------------------------------------
  1 | package storm.starter;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.task.OutputCollector;
  7 | import backtype.storm.task.TopologyContext;
  8 | import backtype.storm.topology.OutputFieldsDeclarer;
  9 | import backtype.storm.topology.TopologyBuilder;
 10 | import backtype.storm.topology.base.BaseRichBolt;
 11 | import backtype.storm.tuple.Fields;
 12 | import backtype.storm.tuple.Tuple;
 13 | import backtype.storm.tuple.Values;
 14 | import backtype.storm.utils.Utils;
 15 | import org.testng.annotations.Test;
 16 | 
 17 | import java.io.BufferedWriter;
 18 | import java.io.File;
 19 | import java.io.FileWriter;
 20 | import java.io.IOException;
 21 | import java.util.ArrayList;
 22 | import java.util.Map;
 23 | 
 24 | /**
 25 |  * This is a basic example of embedding an H2O scoring POJO into a Storm topology.
 26 |  */
 27 | public class H2OStormStarter {
 28 | 
 29 | 
 30 |   /**
 31 |    * The ScoreBolt is responsible for obtaining class probabilities from the score pojo.
 32 |    * It emits these probabilities to a ClassifierBolt, which classifies the observation as "cat" or "dog".
 33 |    */
 34 |   public static class PredictionBolt extends BaseRichBolt {
 35 |     OutputCollector _collector;
 36 | 
 37 |     @Override
 38 |     public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
 39 |       _collector = collector;
 40 |     }
 41 | 
 42 |     @Override public void execute(Tuple tuple) {
 43 | 
 44 |       GBMPojo p = new GBMPojo();
 45 | 
 46 |       // get the input tuple as a String[]
 47 |       ArrayList<String> vals_string = new ArrayList<String>();
 48 |       for (Object v : tuple.getValues()) vals_string.add((String)v);
 49 |       String[] raw_data = vals_string.toArray(new String[vals_string.size()]);
 50 | 
 51 |       // the score pojo requires a single double[] of input.
 52 |       // We handle all of the categorical mapping ourselves
 53 |       double data[] = new double[raw_data.length-1]; //drop the Label
 54 | 
 55 |       String[] colnames = tuple.getFields().toList().toArray(new String[tuple.size()]);
 56 | 
 57 |       // if the column is a factor column, then look up the value, otherwise put the double
 58 |       for (int i = 1; i < raw_data.length; ++i) {
 59 |         data[i-1] = p.getDomainValues(colnames[i]) == null
 60 |                 ? Double.valueOf(raw_data[i])
 61 |                 : p.mapEnum(p.getColIdx(colnames[i]), raw_data[i]);
 62 |       }
 63 | 
 64 |       // get the predictions
 65 |       double[] preds = new double [GBMPojo.NCLASSES+1];
 66 |       //p.predict(data, preds);
 67 |       p.score0(data, preds);
 68 | 
 69 |       // emit the results
 70 |       _collector.emit(tuple, new Values(raw_data[0], preds[1]));
 71 |       _collector.ack(tuple);
 72 |     }
 73 | 
 74 |     @Override
 75 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
 76 |       declarer.declare(new Fields("expected_class", "dogProbability"));
 77 |     }
 78 |   }
 79 | 
 80 |   /**
 81 |    * The ClassifierBolt receives the input probabilities and then makes a classification.
 82 |    * It uses a threshold value to determine how to classify the observation, which is computed based on the validation
 83 |    * done during model fitting.
 84 |    */
 85 |   public static class ClassifierBolt extends BaseRichBolt {
 86 |     OutputCollector _collector;
 87 |     final double _thresh = 0.54;
 88 | 
 89 |     @Override
 90 |     public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
 91 |       _collector = collector;
 92 |     }
 93 | 
 94 |     @Override
 95 |     public void execute(Tuple tuple) {
 96 |       String expected=tuple.getString(0);
 97 |       double dogProb = tuple.getDouble(1);
 98 |       String content = expected + "," + (dogProb <= _thresh ? "dog" : "cat");
 99 |       try {
100 |         File file = new File("/Users/ludirehak/apache/h2o-training/tutorials/streaming/storm/web/out"); // EDIT ME TO YOUR PATH!
101 |         if (!file.exists())  file.createNewFile();
102 |         FileWriter fw = new FileWriter(file.getAbsoluteFile());
103 |         BufferedWriter bw = new BufferedWriter(fw);
104 |         bw.write(content);
105 |         bw.close();
106 |       } catch (IOException e) {
107 |         e.printStackTrace();
108 |       }
109 |       _collector.emit(tuple, new Values(expected, dogProb <= _thresh ? "dog" : "cat"));
110 |       _collector.ack(tuple);
111 |     }
112 | 
113 |     @Override
114 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
115 |       declarer.declare(new Fields("expected_class", "class"));
116 |     }
117 |   }
118 | 
119 |   @Test
120 |   public static void h2o_storm() throws Exception {
121 |     TopologyBuilder builder = new TopologyBuilder();
122 | 
123 |     builder.setSpout("input_row", new TestH2ODataSpout(), 10);
124 |     builder.setBolt("score_probabilities", new PredictionBolt(), 3).shuffleGrouping("input_row");
125 |     builder.setBolt("classify", new ClassifierBolt(), 3).shuffleGrouping("score_probabilities");
126 | 
127 |     Config conf = new Config();
128 |     conf.setDebug(true);
129 | 
130 |     String[] args = null;
131 |     if (args != null && args.length > 0) {
132 |       conf.setNumWorkers(3);
133 | 
134 |       StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
135 |     }
136 |     else {
137 | 
138 |       LocalCluster cluster = new LocalCluster();
139 |       cluster.submitTopology("test", conf, builder.createTopology());
140 |       Utils.sleep(1000 * 60 * 60); // run for 1 hour
141 |       cluster.killTopology("test");
142 |       cluster.shutdown();
143 |     }
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/tutorials/streaming/storm/TestH2ODataSpout.java:
--------------------------------------------------------------------------------
 1 | package storm.starter;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.spout.SpoutOutputCollector;
 5 | import backtype.storm.task.TopologyContext;
 6 | import backtype.storm.topology.OutputFieldsDeclarer;
 7 | import backtype.storm.topology.base.BaseRichSpout;
 8 | import backtype.storm.tuple.Fields;
 9 | import backtype.storm.tuple.Values;
10 | import backtype.storm.utils.Utils;
11 | import org.slf4j.Logger;
12 | import org.slf4j.LoggerFactory;
13 | 
14 | import java.io.BufferedReader;
15 | import java.io.File;
16 | import java.io.FileReader;
17 | import java.util.*;
18 | import java.util.concurrent.atomic.AtomicInteger;
19 | 
20 | 
21 | public class TestH2ODataSpout extends BaseRichSpout {
22 |   public static Logger LOG = LoggerFactory.getLogger(TestH2ODataSpout.class);
23 |   boolean _isDistributed;
24 |   SpoutOutputCollector _collector;
25 |   AtomicInteger _cnt = new AtomicInteger(0);
26 | 
27 | 
28 |   public TestH2ODataSpout() {
29 |     this(true);
30 |   }
31 | 
32 |   public TestH2ODataSpout(boolean isDistributed) {
33 |     _isDistributed = isDistributed;
34 |   }
35 | 
36 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
37 |     _collector = collector;
38 |   }
39 | 
40 |   public void close() {
41 | 
42 |   }
43 | 
44 |   public void nextTuple() {
45 |     Utils.sleep(1000);
46 |     File file = new File("/Users/ludirehak/apache/h2o-training/tutorials/streaming/storm/live_data.csv");  // EDIT ME TO YOUR PATH!
47 |     String[] observation=null;
48 |     int i = 0;
49 |     try {
50 |       String line="";
51 |       BufferedReader br = new BufferedReader(new FileReader(file));
52 |       while (i++<=_cnt.get()) line = br.readLine(); // stream thru to next line
53 |       observation = line.split(",");
54 |     } catch (Exception e) {
55 |       e.printStackTrace();
56 |       _cnt.set(0);
57 |     }
58 |     _cnt.getAndIncrement();
59 |     if (_cnt.get() == 1000) _cnt.set(0); // force reset, for demo only!!!
60 |     _collector.emit(new Values(observation));
61 |   }
62 | 
63 |   public void ack(Object msgId) {
64 |     //empty
65 |   }
66 | 
67 |   public void fail(Object msgId) {
68 |     //empty
69 |   }
70 | 
71 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
72 |     LinkedList<String> fields_list = new LinkedList<String>(Arrays.asList(GBMPojo.NAMES));
73 |     fields_list.add(0,"Label");                            // put label, shift right
74 | 
75 |     String[] fields = fields_list.toArray(new String[fields_list.size()]); // emit these fields
76 |     declarer.declare(new Fields(fields));
77 |   }
78 | 
79 |   @Override
80 |   public Map<String, Object> getComponentConfiguration() {
81 |     if(!_isDistributed) {
82 |       Map<String, Object> ret = new HashMap<String, Object>();
83 |       ret.put(Config.TOPOLOGY_MAX_TASK_PARALLELISM, 1);
84 |       return ret;
85 |     } else {
86 |       return null;
87 |     }
88 |   }
89 | }


--------------------------------------------------------------------------------
/tutorials/streaming/storm/example.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # Example R code for generating an H2O Scoring POJO.
 3 | #
 4 | 
 5 | # "Safe" system.  Error checks process exit status code.  stop() if it failed.
 6 | safeSystem <- function(x) {
 7 |   print(sprintf("+ CMD: %s", x))
 8 |   res <- system(x)
 9 |   print(res)
10 |   if (res != 0) {
11 |     msg <- sprintf("SYSTEM COMMAND FAILED (exit status %d)", res)
12 |     stop(msg)
13 |   }
14 | }
15 | 
16 | library(h2o)
17 | 
18 | cat("Starting H2O\n")
19 | myIP <- "localhost"
20 | myPort <- 54321
21 | h <- h2o.init(ip = myIP, port = myPort, startH2O = TRUE)
22 | 
23 | cat("Building GBM model\n")
24 | df <- h2o.importFile(path = normalizePath("./training_data.csv"));
25 | y <- "Label"
26 | x <- c("Has4Legs","CoatColor","HairLength","TailLength","EnjoysPlay","StairsOutWindow","HoursSpentNapping","RespondsToCommands","EasilyFrightened","Age", "Noise1", "Noise2", "Noise3", "Noise4", "Noise5")
27 | gbm.h2o.fit <- h2o.gbm(training_frame = df, y = y, x = x, model_id = "GBMPojo", ntrees = 10)
28 | 
29 | cat("Downloading Java prediction model code from H2O\n")
30 | model_id <- gbm.h2o.fit@model_id
31 | 
32 | tmpdir_name <- "generated_model"
33 | cmd <- sprintf("rm -fr %s", tmpdir_name)
34 | safeSystem(cmd)
35 | cmd <- sprintf("mkdir %s", tmpdir_name)
36 | safeSystem(cmd)
37 | 
38 | h2o.download_pojo(gbm.h2o.fit, "./generated_model/")
39 | 
40 | cat("Note: H2O will shut down automatically if it was started by this R script and the script exits\n")
41 | 


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/cats_n_dogs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/cats_n_dogs.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/h2o_storm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/h2o_storm.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_1.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_10.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_11.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_2.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_3.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_4.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_6.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_7.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_8.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/images/ij_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/images/ij_9.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/premade_generated_model/h2o-genmodel.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/premade_generated_model/h2o-genmodel.jar


--------------------------------------------------------------------------------
/tutorials/streaming/storm/web/cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/web/cat.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/web/cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/web/cloud.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/web/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/streaming/storm/web/dog.png


--------------------------------------------------------------------------------
/tutorials/streaming/storm/web/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head lang="en">
  4 |     <meta charset="UTF-8">
  5 |     <title>Storm Demo Live</title>
  6 |     <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.10.1/jquery.min.js" type="text/javascript"></script>
  7 |     <script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
  8 | 
  9 |     <script>
 10 | 
 11 |         // use to randomly place rain drops along the cloud
 12 |         function getRandomInt(min, max) {
 13 |             return Math.floor(Math.random() * (max - min + 1)) + min;
 14 |         }
 15 |     </script>
 16 | 
 17 |     <style>
 18 |         #demo {
 19 |             width: 100%;
 20 |             height: 100%;
 21 |             position: relative;
 22 |         }
 23 | 
 24 |         #cloud,
 25 |         #woof,
 26 |         #predict,
 27 |         #meow {
 28 |             width: 100%;
 29 |             height: 100%;
 30 |             position: absolute;
 31 |             top: 0;
 32 |             left: 0;
 33 |         }
 34 | 
 35 |         #cloud {
 36 |             z-index: 10;
 37 |         }
 38 |         #predict { z-index: 5; }
 39 | 
 40 |         #cur {
 41 |             position: relative;
 42 |             top:45px;
 43 |             left:10%;
 44 |             width: 400px;
 45 |             height:200px;
 46 |             z-index:900;
 47 |         }
 48 | 
 49 |         #cat_cnt {
 50 |             position: absolute;
 51 |             top: 15px;
 52 |             left: 50%;
 53 |             z-index:900;
 54 |         }
 55 | 
 56 |         #ccnt {
 57 |             position: absolute;
 58 |             top: 15px;
 59 |             left: 100%;
 60 |             z-index:900;
 61 |             font-size: 90px;
 62 |         }
 63 | 
 64 |         #dcnt {
 65 |             position: absolute;
 66 |             top: 15px;
 67 |             left: 200%;
 68 |             z-index:900;
 69 |             font-size: 90px;
 70 |         }
 71 | 
 72 |         #dog_cnt {
 73 |             position: absolute;
 74 |             top: 10px;
 75 |             left: 160%;
 76 |             z-index:900;
 77 |         }
 78 | 
 79 |     </style>
 80 | </head>
 81 | <body>
 82 | 
 83 | 
 84 |   <div id = "demo">
 85 |       <div id="cloud"><img src="./cloud.png" style="width:100%;height:228px"></div>
 86 |       <div id="meow"></div>
 87 |       <div id="woof"></div>
 88 |       <div id="predict"></div>
 89 |       <div id="cur_val" style="display:none;">cat</div>
 90 |       <div id="old" style="display:none;"></div>
 91 |   </div>
 92 |   <div id="cur">
 93 |       <img id="cur_pic" src="./cat.png" style="display:none;">
 94 |       <img id="cat_cnt" src="./cat.png">
 95 |       <div id="ccnt" style="color: #ffffff;">0</div>
 96 |       <img id="dog_cnt" src="./dog.png">
 97 |       <div id="dcnt" style="color: #ffffff;">0</div>
 98 |   </div>
 99 |   <script>
100 |       var w = innerWidth;
101 |       var h = innerHeight;
102 | 
103 |       // simple method to GET the latest predictions from Storm
104 |       function gogogo() {
105 |           var xmlhttp = new XMLHttpRequest();
106 |           xmlhttp.onreadystatechange = function () {
107 |               if (xmlhttp.readyState==4 && xmlhttp.status==200) {
108 |                   var txt = xmlhttp.responseText;
109 |                   txt = txt.split(',');
110 |                   var predict  = txt[1];
111 |                   if (predict === undefined) return;
112 |                   var isCat = predict == "cat";
113 |                   var dogObs = isCat ? 0 : 1;
114 |                   var catObs = isCat ? 1 : 0;
115 |                   $('#dcnt').text(function(i, txt) { return parseInt($("#dcnt").text(), 10) + dogObs});
116 |                   $('#ccnt').text(function(i, txt) { return parseInt($("#ccnt").text(), 10) + catObs});
117 |                   $('#cur_val').text(predict);
118 |                   document.getElementById("cur_pic").src="./".concat(predict).concat(".png");
119 |               }
120 |           };
121 |           xmlhttp.open("GET", "./out", true);
122 |           xmlhttp.send();
123 |       }
124 | 
125 |       function rain() {
126 | 
127 |           // data for each vis
128 |           var data = Array.apply(null, new Array(50)).map(Number.prototype.valueOf,0).map(function() { return getRandomInt(30, window.innerWidth - 100)} );
129 |           var data2= Array.apply(null, new Array(50)).map(Number.prototype.valueOf,0).map(function() { return getRandomInt(30, window.innerWidth - 100)} );
130 |           var data3= [60,innerWidth/5,innerWidth*2/5,3*innerWidth/5, 4*innerWidth/5, innerWidth];
131 | 
132 |           // visualization references
133 |           var vis = d3.select("#meow").append("svg").attr("width", w+100).attr("height",innerHeight+100).style("display", "inline");
134 |           var vis2 =d3.select("#woof").append("svg").attr("width", w+100).attr("height",innerHeight+100).style("display", "inline");
135 |           var vis3 = d3.select("#predict").append("svg").attr("width", w+100).attr("height",innerHeight+100).style("display", "inline").attr("id", "pred");
136 | 
137 |           // set of objects in each visualization
138 |           var ones = vis.selectAll("rect").data(data);
139 |           var zeros=vis2.selectAll("circle").data(data2);
140 |           var imgs = vis3.selectAll("image").data(data3);
141 | 
142 |           // loop!
143 |           setInterval(function() {
144 |               // get the most recent prediction...
145 |               gogogo();
146 | 
147 |               // drop the 'cat.png' or 'dog.png' based on the $("#cur_val").text() value
148 |               imgs.enter()
149 |                       .append("svg:image")
150 |                       .attr("xlink:href",  function() { return "./".concat($("#cur_val").text()).concat(".png") })
151 |                       .attr("x", function(d) { return d; })
152 |                       .attr("y", "0")
153 |                       .attr("width", "300")
154 |                       .attr("height", "150")
155 |                       .transition()
156 |                       .delay(function () {
157 |                           return getRandomInt(0, 1000);
158 |                       })
159 |                       .each("end", function () {
160 |                           d3.select(this).transition().duration(1500).attr("y", innerHeight+200);
161 |                           d3.select(this).transition().delay(2000).duration(1).style("opacity", 0).ease("exp").remove();
162 |                       });
163 | 
164 |               // build the straight line drops of rain
165 |               ones.enter().append("rect")
166 |                       .attr("x", function (d) {
167 |                           return d;
168 |                       })
169 |                       .attr("y", 50)
170 |                       .attr("width", 2)
171 |                       .attr("height", 10)
172 |                       .style("border", "1px solid black")
173 |                       .transition()
174 |                       .delay(function () {
175 |                           return getRandomInt(0, 1000);
176 |                       })
177 |                       .each("end", function () {
178 |                           d3.select(this).transition().duration(1500).attr("y", innerHeight+200);
179 |                           d3.select(this).transition().delay(2000).duration(1).style("opacity", 0).ease("exp").remove();
180 |                       });
181 | 
182 |               // build the circle drops of rain
183 |               zeros.enter().append("circle")
184 |                       .attr("cx", function(d) { return d; })
185 |                       .attr("cy", 200)
186 |                       .attr("r", 5)
187 |                       .attr("width", 2)
188 |                       .attr("height", 25)
189 |                       .attr("fill", "white")
190 |                       .attr("stroke", "black")
191 |                       .style("border", "1px solid black")
192 |                       .transition()
193 |                       .delay(function () {
194 |                           return getRandomInt(500, 2000);
195 |                       })
196 |                       .each("end", function () {
197 |                           d3.select(this).transition().duration(2000).attr("cy", innerHeight+200);
198 |                           d3.select(this).transition().delay(2000).duration(1).style("opacity", 0).ease("exp").remove();
199 |                       });
200 | 
201 |           }, 1000);
202 |       }
203 | 
204 |       // launch the whole thing...
205 |       $(document).ready(function() {rain();});
206 |   </script>
207 | 
208 | </body>
209 | </html>
210 | 


--------------------------------------------------------------------------------
/tutorials/streaming/storm/web/out:
--------------------------------------------------------------------------------
1 | cat,cat


--------------------------------------------------------------------------------
/tutorials/supervised/classification/images/glm_f1_cutoff_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/supervised/classification/images/glm_f1_cutoff_0.png


--------------------------------------------------------------------------------
/tutorials/supervised/classification/images/glm_f1_cutoff_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/supervised/classification/images/glm_f1_cutoff_1.png


--------------------------------------------------------------------------------
/tutorials/supervised/classification/images/glm_roc_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/supervised/classification/images/glm_roc_0.png


--------------------------------------------------------------------------------
/tutorials/supervised/classification/images/glm_roc_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/supervised/classification/images/glm_roc_1.png


--------------------------------------------------------------------------------
/tutorials/supervised/classification/images/metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/supervised/classification/images/metrics.png


--------------------------------------------------------------------------------
/tutorials/supervised/gbm/gbm.R.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Gradient Boosting Machines in H2O
 2 | 
 3 | ###### This tutorial introduces H2O's Gradient (Tree) Boosting Machines framework in R.
 4 | 
 5 | #### Gradient Boosting Machines (GBM)
 6 | 
 7 | ##### Intuition: Average an ensemble of weakly predicting (small) trees where each tree "adjusts" to the "mistakes" of the preceding trees.
 8 | 
 9 | ##### Important components:
10 | ###### 1. Number of trees
11 | ###### 2. Maximum depth of tree
12 | ###### 3. Learning rate ( *shrinkage* parameter)
13 | 
14 | ###### where smaller learning rates tend to require larger number of tree and vice versa.
15 | 
16 | ### R Documentation
17 | 
18 | ###### The `h2o.gbm` function fits H2O's Gradient Boosting Machines from within R.
19 | 
20 |     library(h2o)
21 |     args(h2o.gbm)
22 | 
23 | ###### The R documentation (man page) for H2O's Gradient Boosting Machines can be opened from within R using the `help` or `?` functions:
24 | 
25 |     help(h2o.gbm)
26 | 
27 | ###### We can run the example from the man page using the `example` function:
28 | 
29 |     example(h2o.gbm)
30 | 
31 | ###### And run a longer demonstration from the `h2o` package using the `demo` function:
32 | 
33 |     demo(h2o.gbm)
34 | 


--------------------------------------------------------------------------------
/tutorials/supervised/glm/glm.R.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Generalized Linear Models in H2O
 2 | 
 3 | ###### This tutorial introduces H2O's Generalized Linear Models (GLM) framework in R.
 4 | 
 5 | ### Generalized Linear Models (GLM)
 6 | 
 7 | #### Intuition: A linear combination of predictors is sufficient for determining an outcome.
 8 | 
 9 | ##### Important components:
10 | ###### 1. Exponential family for error distribution (Gaussian/Normal, Binomial, Poisson, Gamma, Tweedie, etc.)
11 | ###### 2. Link function, whose inverse is used to generate predictions
12 | ###### 3. (Elastic Net) Mixing parameter between the L1 and L2 penalties on the coefficient estimates.
13 | ###### 4. (Elastic Net) Shrinkage parameter for the mixed penalty in 3.
14 | 
15 | ### R Documentation
16 | 
17 | ###### The `h2o.glm` function fits H2O's Generalized Linear Models from within R.
18 | 
19 |     library(h2o)
20 |     args(h2o.glm)
21 | 
22 | ###### The R documentation (man page) for H2O's Generalized Linear Models can be opened from within R using the `help` or `?` functions:
23 | 
24 |     help(h2o.glm)
25 | 
26 | ###### We can run the example from the man page using the `example` function:
27 | 
28 |     example(h2o.glm)
29 | 
30 | ###### And run a longer demonstration from the `h2o` package using the `demo` function:
31 | 
32 |     demo(h2o.glm)
33 | 


--------------------------------------------------------------------------------
/tutorials/supervised/randomforest/randomforest.R.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Random Forests in H2O
 2 | 
 3 | ###### This tutorial introduces H2O's Random Forest framework in R.
 4 | 
 5 | #### Random Forests
 6 | 
 7 | ##### Intuition: Average an ensemble of weakly predicting (larger) trees where each tree is *de-correlated* from all other trees.
 8 | 
 9 | ##### Important components:
10 | ###### 1. Number of trees
11 | ###### 2. Maximum depth of tree
12 | ###### 3. Number of variables randomly sampled as candidates for splits
13 | ###### 4. Sampling rate for constructing data set to use on each tree
14 | 
15 | ### R Documentation
16 | 
17 | ###### The `h2o.randomForest` function fits H2O's Random Forest from within R.
18 | 
19 |     library(h2o)
20 |     args(h2o.randomForest)
21 | 
22 | ###### The R documentation (man page) for H2O's Random Forest can be opened from within R using the `help` or `?` functions:
23 | 
24 |     help(h2o.randomForest)
25 | 
26 | ###### We can run the example from the man page using the `example` function:
27 | 
28 |     example(h2o.randomForest)
29 | 
30 | ###### And run a longer demonstration from the `h2o` package using the `demo` function:
31 | 
32 |     demo(h2o.randomForest)
33 | 


--------------------------------------------------------------------------------
/tutorials/supervised/regression/images/rand_glm_coef.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/supervised/regression/images/rand_glm_coef.png


--------------------------------------------------------------------------------
/tutorials/troubleshooting/images/Clusterstattunnel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/troubleshooting/images/Clusterstattunnel.png


--------------------------------------------------------------------------------
/tutorials/troubleshooting/images/TroubleshootingHadoopAmbariNodeMgr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/troubleshooting/images/TroubleshootingHadoopAmbariNodeMgr.png


--------------------------------------------------------------------------------
/tutorials/troubleshooting/images/TroubleshootingHadoopAmbariyarnscheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/troubleshooting/images/TroubleshootingHadoopAmbariyarnscheduler.png


--------------------------------------------------------------------------------
/tutorials/troubleshooting/images/TroubleshootingHadoopClouderayarnnodemgr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/troubleshooting/images/TroubleshootingHadoopClouderayarnnodemgr.png


--------------------------------------------------------------------------------
/tutorials/troubleshooting/images/TroubleshootingHadoopClouderayarnscheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/troubleshooting/images/TroubleshootingHadoopClouderayarnscheduler.png


--------------------------------------------------------------------------------
/tutorials/troubleshooting/images/UpdateR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/troubleshooting/images/UpdateR.png


--------------------------------------------------------------------------------
/tutorials/unsupervised/anomaly/anomaly.R.md:
--------------------------------------------------------------------------------
  1 | # Anomaly Detection on MNIST with H2O Deep Learning
  2 | 
  3 | ######This tutorial shows how a Deep Learning [Auto-Encoder](http://en.wikipedia.org/wiki/Autoencoder) model can be used to find outliers in a dataset. This file is both valid R and markdown code.
  4 | 
  5 | ######Consider the following three-layer neural network with one hidden layer and the same number of input neurons (features) as output neurons. The loss function is the MSE between the input and the output. Hence, the network is forced to learn the identity via a nonlinear, reduced representation of the original data. Such an algorithm is called a deep autoencoder; these models have been used extensively for unsupervised, layer-wise pretraining of supervised deep learning tasks, but here we consider the autoencoder's application for discovering anomalies in data.
  6 | 
  7 | #####![](images/autoencoder.png)
  8 | 
  9 | ######We use the well-known [MNIST](http://yann.lecun.com/exdb/mnist/) dataset of hand-written digits, where each row contains the 28^2=784 raw gray-scale pixel values from 0 to 255 of the digitized digits (0 to 9). 
 10 | 
 11 | ### Start H2O and load the MNIST data
 12 | 
 13 | ######Initialize the H2O server and import the MNIST training/testing datasets.
 14 | 
 15 |     library(h2o)
 16 |     h2oServer <- h2o.init(nthreads=-1)
 17 |     homedir <- "/data/h2o-training/mnist/"
 18 |     TRAIN = "train.csv.gz"
 19 |     TEST = "test.csv.gz"
 20 |     train_hex <- h2o.importFile(h2oServer, path = paste0(homedir,TRAIN), header = F, sep = ',', key = 'train.hex')
 21 |     test_hex <- h2o.importFile(h2oServer, path = paste0(homedir,TEST), header = F, sep = ',', key = 'test.hex')
 22 |  
 23 | ######The data consists of 784 (=28^2) pixel values per row, with (gray-scale) values from 0 to 255. The last column is the response (a label in 0,1,2,...,9).
 24 |  
 25 |     predictors = c(1:784)
 26 |     resp = 785
 27 | 
 28 | ######We do unsupervised training, so we can drop the response column.
 29 | 
 30 |     train_hex <- train_hex[,-resp]
 31 |     test_hex <- test_hex[,-resp]
 32 | 
 33 | ### Finding outliers - ugly hand-written digits
 34 | ######We train a Deep Learning Auto-Encoder to learn a compressed (low-dimensional) non-linear representation of the dataset, hence learning the intrinsic structure of the training dataset. The auto-encoder model is then used to transform all test set images to their reconstructed images, by passing through the lower-dimensional neural network. We then find outliers in a test dataset by comparing the reconstruction of each scanned digit with its original pixel values. The idea is that a high reconstruction error of a digit indicates that the test set point doesn't conform to the structure of the training data and can hence be called an outlier.
 35 | 
 36 | ####1. Learn what's *normal* from the training data
 37 | 
 38 | ######Train unsupervised Deep Learning autoencoder model on the training dataset. For simplicity, we train a model with 1 hidden layer of 50 Tanh neurons to create 50 non-linear features with which to reconstruct the original dataset.  We learned from the Dimensionality Reduction tutorial that 50 is a reasonable choice. For simplicity, we train the auto-encoder for only 1 epoch (one pass over the data). We explicitly include constant columns (all white background) for the visualization to be easier.
 39 | 
 40 |     ae_model <- h2o.deeplearning(x=predictors,
 41 |                                y=42, #response (ignored - pick any non-constant column)
 42 |                                data=train_hex,
 43 |                                activation="Tanh",
 44 |                                autoencoder=T,
 45 |                                hidden=c(50),
 46 |                                ignore_const_cols=F,
 47 |                                epochs=1)
 48 |                                
 49 | ######Note that the response column is ignored (it is only required because of a shared DeepLearning code framework).
 50 |   
 51 | ####2. Find outliers in the test data
 52 | ######The Anomaly app computes the per-row reconstruction error for the test data set. It passes it through the autoencoder model (built on the training data) and computes mean square error (MSE) for each row in the test set.
 53 |  
 54 |     test_rec_error <- as.data.frame(h2o.anomaly(test_hex, ae_model))
 55 |       
 56 | 
 57 | ######In case you wanted to see the lower-dimensional features created by the auto-encoder deep learning model, here's a way to extract them for a given dataset. This a non-linear dimensionality reduction, similar to PCA, but the values are capped by the activation function (in this case, they range from -1...1)
 58 | 
 59 |     test_features_deep <- h2o.deepfeatures(test_hex, ae_model, layer=1)
 60 |     summary(test_features_deep)
 61 | 
 62 | ####3. Visualize the *good*, the *bad* and the *ugly*
 63 | ######We will need a helper function for plotting handwritten digits (adapted from http://www.r-bloggers.com/the-essence-of-a-handwritten-digit/). Don't worry if you don't follow this code...
 64 | 
 65 |     plotDigit <- function(mydata, rec_error) {
 66 |       len<-nrow(mydata)
 67 |       N<-ceiling(sqrt(len))
 68 |       op <- par(mfrow=c(N,N),pty='s',mar=c(1,1,1,1),xaxt='n',yaxt='n')
 69 |       for (i in 1:nrow(mydata)) {
 70 |         colors<-c('white','black')
 71 |         cus_col<-colorRampPalette(colors=colors)
 72 |         z<-array(mydata[i,],dim=c(28,28))
 73 |         z<-z[,28:1]
 74 |         image(1:28,1:28,z,main=paste0("rec_error: ", round(rec_error[i],4)),col=cus_col(256))
 75 |       }
 76 |       on.exit(par(op))
 77 |     }
 78 |     
 79 |     plotDigits <- function(data, rec_error, rows) {
 80 |       row_idx <- order(rec_error[,1],decreasing=F)[rows]
 81 |       my_rec_error <- rec_error[row_idx,]
 82 |       my_data <- as.matrix(as.data.frame(data[row_idx,]))
 83 |       plotDigit(my_data, my_rec_error)
 84 |     }
 85 |       
 86 | ######Let's look at the test set points with low/median/high reconstruction errors. We will now visualize the original test set points and their reconstructions obtained by propagating them through the narrow neural net.
 87 |   
 88 |     test_recon <- h2o.predict(ae_model, test_hex)
 89 |     summary(test_recon)
 90 | 
 91 | ####The good
 92 | ######Let's plot the 25 digits with lowest reconstruction error. First we plot the reconstruction, then the original scanned images.
 93 |     
 94 |     plotDigits(test_recon, test_rec_error, c(1:25))
 95 |     plotDigits(test_hex,   test_rec_error, c(1:25))
 96 | 
 97 | #####![](images/good_both.png)
 98 | ######Clearly, a well-written digit 1 appears in both the training and testing set, and is easy to reconstruct by the autoencoder with minimal reconstruction error. Nothing is as easy as a straight line.
 99 | 
100 | ####The bad
101 | ######Now let's look at the 25 digits with median reconstruction error.
102 |    
103 |     plotDigits(test_recon, test_rec_error, c(4988:5012))
104 |     plotDigits(test_hex,   test_rec_error, c(4988:5012))
105 | 
106 | #####![](images/bad_both.png)
107 | ######These test set digits look "normal" - it is plausible that they resemble digits from the training data to a large extent, but they do have some particularities that cause some reconstruction error.
108 | 
109 | ####The ugly
110 | ######And here are the biggest outliers - The 25 digits with highest reconstruction error!
111 | 
112 |     plotDigits(test_recon, test_rec_error, c(9976:10000))
113 |     plotDigits(test_hex,   test_rec_error, c(9976:10000))
114 | 
115 | #####![](images/ugly_both.png)
116 | ######Now here are some pretty ugly digits that are plausibly not commonly found in the training data - some are even hard to classify by humans.
117 | 
118 | ###Voila!
119 | #####We were able to find outliers with H2O Deep Learning Auto-Encoder models. We would love to hear your usecase for Anomaly detection.
120 | 
121 | ######*Note:* Every run of DeepLearning results in different results since we use [Hogwild!](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf) parallelization with intentional race conditions between threads.  To get reproducible results at the expense of speed for small datasets, set reproducible=T and specify a seed.
122 | 
123 | #### More information can be found in the [H2O Deep Learning booklet](https://t.co/kWzyFMGJ2S) and in our [slides](http://www.slideshare.net/0xdata/presentations).
124 | 


--------------------------------------------------------------------------------
/tutorials/unsupervised/anomaly/images/autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/unsupervised/anomaly/images/autoencoder.png


--------------------------------------------------------------------------------
/tutorials/unsupervised/anomaly/images/bad_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/unsupervised/anomaly/images/bad_both.png


--------------------------------------------------------------------------------
/tutorials/unsupervised/anomaly/images/good_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/unsupervised/anomaly/images/good_both.png


--------------------------------------------------------------------------------
/tutorials/unsupervised/anomaly/images/ugly_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/unsupervised/anomaly/images/ugly_both.png


--------------------------------------------------------------------------------
/tutorials/unsupervised/clustering/clustering.R.md:
--------------------------------------------------------------------------------
 1 | # Unsupervised Learning and Clustering With H2O KMeans
 2 | 
 3 | ###### This tutorial shows how a [KMeans](http://en.wikipedia.org/wiki/K-means_clustering) model is trained. This file is both valid R and markdown code. We will use a variety of well-known datasets that are used in published papers, which evaluate various KMeans implementations.
 4 | 
 5 | ### Start H2O and build a KMeans model on iris
 6 | 
 7 | ###### Initialize the H2O server and import the datasets we need for this session.
 8 | 
 9 |     library(h2o)
10 |     h2oServer  <- h2o.init(nthreads=-1)
11 |     datadir <- "/data"
12 |     homedir <- file.path(datadir, "h2o-training", "clustering")
13 |     iris.h2o <- as.h2o(h2oServer, iris)
14 |     
15 | 
16 | ### Our first KMeans model
17 | 
18 | ###### It's easy to run KMeans, it's just like the `kmeans` method available in the stats package. We'll leave out the `Species` column and cluster on the iris flower attributes.
19 | 
20 |     km.model <- h2o.kmeans(data = iris.h2o, centers = 5, cols = 1:4, init="furthest")
21 | 
22 | ###### Let's look at the model summary:
23 | 
24 |     km.model
25 |     km.model@model$centers       # The centers for each cluster
26 |     km.model@model$tot.withinss  # total within cluster sum of squares
27 |     km.model@model$cluster       # cluster assignments per observation
28 | 
29 | ###### To see the model parameters that were used, access the model@model$params field:
30 |     
31 |     km.model@model$params
32 |     
33 | ###### You can get the R documentation help here:
34 | 
35 |     ?h2o.kmeans
36 |     
37 | ### Use the [Gap Statistic (Beta)](http://web.stanford.edu/~hastie/Papers/gap.pdf) To Find the Optimal Number of Clusters
38 | ###### This is essentially a grid search over KMeans.
39 | 
40 | ###### You can get the R documentation help here:
41 | 
42 |     ?h2o.gapStatistic
43 |     
44 | ###### The idea is that, for each 'k', generate WCSS from a reference distribution and examine the gap between the expected WCSS and the observed WCSS. To obtain the W_k from the reference distribution, 'B' Monte Carlo replicates are drawn from the reference distribution. For each replicate, a KMeans is constructed and the WCSS reported back.
45 | 
46 |     gap_stat <- h2o.gapStatistic(data = iris.h2o, K = 10, B = 100, boot_frac = .1, cols=1:4)
47 |                                 
48 | ###### Let's take a look at the output. The default output display will show the number of KMeans models that were run and the optimal value of k:
49 | 
50 |     gap_stat
51 |     
52 | ###### We can also run summary on the gap_stat model:
53 | 
54 |     summary(gap_stat)
55 |     
56 | ###### We can also plot our gap_stat model:
57 | 
58 |     plot(gap_stat)
59 |     
60 | ### Comparison against other KMeans implementations.
61 | ###### Let's use the [Census 1990 dataset](https://archive.ics.uci.edu/ml/datasets/US+Census+Data+%281990%29), which has 2.5 million data points with 68 integer features.
62 | 
63 |     # census1990 <- "Census1990.csv.gz"
64 |     # census.1990 <- h2o.importFile(h2oServer, path = file.path(homedir,census1990), header = F, sep = ',', key = 'census.1990.hex')
65 | 
66 |     # dim(census.1990)                                                            
67 |     # km.census <- h2o.kmeans(data = census.1990, centers = 12, init="furthest")  # NOT RUN: Too long on VM
68 |     # km.census@model$tot.withinss                                                
69 | 
70 | ###### We can compare the result with the published result from [Fast and Accurate KMeans on Large Datasets](http://papers.nips.cc/paper/4362-fast-and-accurate-k-means-for-large-datasets.pdf) where the cost for k = 12 and ~2GB of RAM was approximately 3.50E+18. This paper implements a streaming KMeans, so of course accuracy in the streaming case will not be as good as a batch job, but results are comparable within a few orders of magnitude. H2O gives the ability to work on datasets that don't fit in a single box's RAM without having to stream the data from cold storage: simply use distributed H2O.                                                                                                                                                                                                                                                                 
71 | 
72 | ###### We can also compare with [StreamKM++: A Clustering Algorithm for Data Streams](http://www.cs.uni-paderborn.de/uploads/tx_sibibtex/2012_AckermannMRSLS_StreamKMpp.pdf). For various k, we can compare our implementation, but we only do k = 30 here.
73 | 
74 |     # km.census <- h2o.kmeans(data = census.1990, centers = 30, init="furthest")  # NOT RUN: Too long on VM
75 |     # km.census@model$tot.withinss                                                # NOT RUN: Too long on VM
76 | 
77 | ##### We can also compare with the kmeans package:
78 | 
79 |     # census.1990.r <- read.csv(file.path(homedir,census1990)) 
80 |     # km.census.r <- kmeans(census.1990.r, centers = 24)         # NOT RUN: Quick-TRANSfer stage steps exceeded maximum (= 122914250) 
81 |     # km.census.r$tot.withinss
82 | 
83 | ###### Let's compare now on the big dataset BigCross [Big Cross](http://www.cs.uni-paderborn.de/en/fachgebiete/ag-bloemer/research/clustering/streamkmpp/), which has 11.6 million data points with 57 integer features.
84 | 
85 |     # bigcross   <- "BigCross.data.gz"
86 |     # big.cross   <- h2o.importFile(h2oServer, path = file.path(homedir,bigcross), header = F, sep = ',', key = 'big.cross.hex')
87 |     
88 |     # dim(big.cross)                                                                # NOT RUN: Too long on VM
89 |     # km.bigcross <- h2o.kmeans(data = big.cross, centers = 24, init="furthest")    # NOT RUN: Too long on VM
90 |     # km.bigcross@model$tot.withinss                                                # NOT RUN: Too long on VM
91 | 
92 | ###### We can compare the result with the published result from [Fast and Accurate KMeans on Large Datasets](http://papers.nips.cc/paper/4362-fast-and-accurate-k-means-for-large-datasets.pdf), where the cost for k = 24 and ~2GB of RAM was approximately 1.50E+14.
93 | 
94 | ###### We can also compare with [StreamKM++: A Clustering Algorithm for Data Streams](http://www.cs.uni-paderborn.de/uploads/tx_sibibtex/2012_AckermannMRSLS_StreamKMpp.pdf). For various k, we can compare our implementation, but we only do k = 30 here.
95 | 
96 |     # km.bigcross <- h2o.kmeans(data = big.cross, centers = 30, init="furthest")    # NOT RUN: Too long on VM
97 |     # km.bigcross@model$tot.withinss                                                # NOT RUN: Too long on VM
98 | 
99 | 


--------------------------------------------------------------------------------
/tutorials/unsupervised/dimreduction/dimreduction.R.md:
--------------------------------------------------------------------------------
 1 | # Dimensionality Reduction of MNIST
 2 | 
 3 | ######This tutorial shows how to reduce the dimensionality of a dataset with H2O. We will use both PCA and Deep Learning. This file is both valid R and markdown code. We use the well-known [MNIST](http://yann.lecun.com/exdb/mnist/) dataset of hand-written digits, where each row contains the 28^2=784 raw gray-scale pixel values from 0 to 255 of the digitized digits (0 to 9). 
 4 | 
 5 | ### Start H2O and load the MNIST data
 6 | 
 7 | ######Initialize the H2O server and import the MNIST training/testing datasets.
 8 | 
 9 |     library(h2o)
10 |     h2oServer <- h2o.init(nthreads=-1)
11 |     homedir <- "/data/h2o-training/mnist/"
12 |     DATA = "train.csv.gz"
13 |     data_hex <- h2o.importFile(h2oServer, path = paste0(homedir,DATA), header = F, sep = ',', key = 'train.hex')
14 |     
15 | ######The data consists of 784 (=28^2) pixel values per row, with (gray-scale) values from 0 to 255. The last column is the response (a label in 0,1,2,...,9).
16 |  
17 |     predictors = c(1:784)
18 |     resp = 785
19 | 
20 | ######We do unsupervised training, so we can drop the response column.
21 | 
22 |     data_hex <- data_hex[,-resp]
23 | 
24 | ### PCA - Principle Components Analysis
25 | 
26 | ###### Let's use [PCA](http://en.wikipedia.org/wiki/Principal_component_analysis) to compute the principal components of the MNIST data, and plot the standard deviations of the principal components (i.e., the square roots of the eigenvalues of the covariance/correlation matrix).
27 | 
28 |     pca_model <- h2o.prcomp(data_hex)
29 |     plot(pca_model@model$sdev)
30 | 
31 | #####![](images/mnist_pca_sdev.png)
32 | 
33 | ###### We see that the first 50 or 100 principal components cover the majority of the variance of this dataset.
34 |     
35 | ###### To reduce the dimensionality of MNIST to its 50 principal components, we use the h2o.predict() function with an extra argument `num_pc`:
36 | 
37 |     features_pca <- h2o.predict(pca_model, data_hex, num_pc=50)
38 |     summary(features_pca)
39 |    
40 | ### Deep Learning Autoencoder
41 | 
42 |     ae_model <- h2o.deeplearning(x=predictors,
43 |                                 y=42, #ignored (pick any non-constant predictor)
44 |                                 data_hex,
45 |                                 activation="Tanh",
46 |                                 autoencoder=T,
47 |                                 hidden=c(100,50,100),
48 |                                 epochs=1,
49 |                                 ignore_const_cols = F)
50 |     
51 | ###### We can now convert the data with the autoencoder model to 50-dimensional space (second hidden layer)
52 | 
53 |     features_ae <- h2o.deepfeatures(data_hex, ae_model, layer=2)
54 |     summary(features_ae)
55 | 
56 | ###### To get the full reconstruction from the output layer of the autoencoder, use h2o.predict() as follows
57 | 
58 |     data_reconstr <- h2o.predict(ae_model, data_hex)
59 |     summary(data_reconstr)
60 | 


--------------------------------------------------------------------------------
/tutorials/unsupervised/dimreduction/images/mnist_pca_sdev.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/h2o-world-2014-training/63a7ddebceafd32130e5852108a0e00f035824f0/tutorials/unsupervised/dimreduction/images/mnist_pca_sdev.png


--------------------------------------------------------------------------------
/tutorials/web_ui/tutorial.md:
--------------------------------------------------------------------------------
  1 | # Data Science Flow from H<sub>2</sub>O`s Web Interface
  2 | 
  3 | You can follow along with our video tutorial:
  4 | 
  5 | <iframe width="420" height="315" src="//www.youtube.com/embed/DL00ZSSTjOM" frameborder="0" allowfullscreen></iframe>
  6 | 
  7 | ## Step 1: Import Data
  8 | 
  9 | The airlines data set we are importing is a subset of the data made available by [RITA](http://www.transtats.bts.gov/OT_Delay/OT_DelayCause1.asp) with a mix of numeric and factor columns. In the following tutorial we will build multiple classification models predicting for flight delays, run model comparison and score on a specific chosen model.
 10 | 
 11 |   * Navigate to [*Data* > *Import File*](http://localhost:54321/2/ImportFiles2.html)
 12 |   * Input into path `/data/h2o-training/airlines/allyears2k.csv` and hit Submit
 13 |   * Hit on the nfs link [*`C:\data\h2o-training\airlines\allyears2k.csv`*](http://localhost:54321/2/Parse2.query?source_key=nfs:\C:\data\h2o-training\airlines\allyears2k.csv)
 14 |   * Scroll down the page to get a preview of your data before hitting Submit again.
 15 | 
 16 | ## Step 2: Data Summary
 17 |   * On the [data inspect page](http://localhost:54321/2/Inspect2.html?src_key=allyears2k.hex) navigate to the [*Summary*](http://localhost:54321/2/SummaryPage2.query?source=allyears2k.hex) which you can also access by [*Data* > *Summary*](http://localhost:54321/2/SummaryPage2.html)
 18 |   * Hit Submit to get a summary of all the columns in the data:
 19 | 	  * Numeric Columns: Min, Max, and Quantiles
 20 | 	  * Factor Columns: Counts of each factor, Cardinality, NAs
 21 |  
 22 | ## Step 3: Split Data into Test and Training Sets
 23 |   * Navigate back to data inspect page [*Data* > *View All* > *allyears2k.hex* > *Split Frame*](http://localhost:54321/2/FrameSplitPage.query?source=allyears2k.hex)
 24 |   * Select *shuffle* and hit Submit
 25 |   * Select [*allyears2k_suffled_part0.hex*](http://localhost:54321/2/Inspect2.html?src_key=allyears2k_shuffled_part0.hex) for the training frame
 26 | 
 27 | ## Step 4: Build a GLM model
 28 |  
 29 | 
 30 |   * Go to [*Model* > *Generalized Linear Model*](http://localhost:54321/2/GLM2.html)
 31 |   * Input for *source*: `allyears2k_shuffled_part0.hex`
 32 |   * Select for *response*: `IsDepDelayed`
 33 |   * Select to ignore all columns (Ctrl+A) except for `Year`, `Month`, `DayofMonth`, `DayOfWeek`, `UniqueCarrier`, `Origin`, `Dest`,  and `Distance` (Ctrl)
 34 |   * Select for *family*: `binomial` 
 35 |   * Check *use all factor levels* and *variable importances*
 36 |   * Hit submit to start the job
 37 |   
 38 | 
 39 | ## Step 5: Build a 50 Tree GBM model
 40 | 
 41 | 
 42 |   * Go to [*Model* > *Gradient Boosting Machine*](http://localhost:54321/2/GBM.html)
 43 |   * Input for *source*: `allyears2k_shuffled_part0.hex`
 44 |   * Select for *response*: `IsDepDelayed`
 45 |   * Select to ignore all columns (Ctrl+A) except for `Year`, `Month`, `DayofMonth`, `DayOfWeek`, `UniqueCarrier`, `Origin`, `Dest`,  and `Distance` (Ctrl)
 46 |   * Hit Submit to start the job 
 47 | 
 48 | 
 49 | ## Step 6: Build a simplier 5 Tree GBM model
 50 | 
 51 | 
 52 |   * Go to [*Model* > *Gradient Boosting Machine*](http://localhost:54321/2/GBM.html)
 53 |   * Input for *source*: `allyears2k_shuffled_part0.hex`
 54 |   * Select for *response*: `IsDepDelayed`
 55 |   * Select to ignore all columns (Ctrl+A) except for `Year`, `Month`, `DayofMonth`, `DayOfWeek`, `UniqueCarrier`, `Origin`, `Dest`,  and `Distance` (Ctrl)
 56 |   * Input for *ntrees*: `5`
 57 |   * Hit Submit to start the job 
 58 |   
 59 | > On the model output page, hit the **JSON** tab.
 60 | > 
 61 | > On the model output page, hit the **JAVA** tab.
 62 | 
 63 | 
 64 | ## Step 7: Deep Learning with Model Grid Search
 65 | 
 66 | 
 67 |   * Go to [*Model* > *Gradient Boosting Machine*](http://localhost:54321/2/DeepLearning.html)
 68 |   * Input for *source*: `allyears2k_shuffled_part0.hex`
 69 |   * Select for *response*: `IsDepDelayed`
 70 |   * Select to ignore all columns (Ctrl+A) except for `Year`, `Month`, `DayofMonth`, `DayOfWeek`, `UniqueCarrier`, `Origin`, `Dest`,  and `Distance` (Ctrl)
 71 |   * Input for *hidden*: `(10,10), (20,20,20)`
 72 |   * Hit Submit to start the job 
 73 | 
 74 | > The models are sorted by error rates. Scroll to all the way to the right to select the first model on the list.
 75 |   
 76 | ## Step 8: Multimodel Scoring Engine
 77 | 
 78 |   * Navigate to [*Score* > *Multi model Scoring (beta)*](http://localhost:54321/steam/index.html)
 79 |   * Select data set `allyears2k.hex` and scroll to the compatible models and select `VIEW THESE MODELS...`
 80 |   * Select all the models on the left hand task bar.
 81 |   * Hit *SCORE...* and select `allyears2k_shuffled_part1.hex` and hit *OK*
 82 | 
 83 | > The tabular viewing of the models allows the user to have a side by side comparison of all the models.
 84 |   
 85 | ### Creating Visualizations
 86 | 
 87 |   * Navigate to *ADVANCED* Tab to see overlaying ROC curves
 88 |   * Hit *ADD VISUALIZATION...*
 89 |   * *For the X-Axis Field* choose `Training Time (ms)`
 90 |   * *For the Y-Axis Field* choose `AUC`
 91 | 
 92 | > Examine the new graph you created. Weigh the value of extra gain in accuracy for time taken to train the models. Before selecting a model and copying the key of the model.
 93 | 
 94 | 
 95 | ## Step 9: Create Frame with Predicted Values
 96 | 
 97 |   
 98 |   * Navigate back to [*Home Page* > *Score* > *Predict*](http://local:host:54321/2/Predict.html)
 99 |   * Input for *model*: paste the model key you got from Step 8
100 |   * Input for *data*: `allyears2k_shuffled_part1.hex`
101 |   * Input for *prediction*: `pred`
102 | 
103 | 
104 | ## Step 10: Export Predicted Values as CSV
105 | 
106 |   * Inspect the [prediction frame](http://localhost:54321/2/Inspect2.html?src_key=pred)
107 |   * Select *Download as CSV*
108 | 
109 | or export any frame:
110 | 
111 |   * Navigate to [*Data* > *Export Files*](http://localhost:54321/2/ExportFiles.html)
112 |   * Input for *src key*: `pred`
113 |   * Input for *path*: `/data/h2o-training/airlines/pred.csv`
114 | 
115 | ## Step 11: Save a model for use later
116 | 
117 |   * Navigate to [*Data* > *View All*](http://localhost:54321/StoreView.html)
118 |   * Choose to filter by the model key
119 |   * Hit [*Save Model*](http://localhost:54321/2/SaveModel)
120 |   * Input for *path*: `/data/h2o-training/airlines/50TreesGBMmodel`
121 |   * Hit Submit
122 | 
123 | ## Errors?! Download and send us the log files!
124 | 
125 |   * Navigate to [*Admin* > *Inspect Log*](http://localhost:54321/LogView.html)
126 |   * Hit *Download all logs*
127 | 
128 | ## Step 12: Shutdown your H2O instance
129 | 
130 |   * Go to [*Admin* > *Shutdown*]
131 | 
132 | ## Extra Bonus: Reload that saved model
133 |   
134 |   * In a active H<sub>2</sub>O session
135 |   * Navigate to the [Load Model](http://localhost:54321/2/LoadModel.html)
136 |   * Input for *path*: `/data/h2o-training/airlines/50TreesGBMmodel`
137 |   * Hit Submit 


--------------------------------------------------------------------------------