├── .gitignore
├── ArmTemplates
    ├── configure_arm.json
    ├── dsvm_arm.json
    ├── fraud-detection_arm.json
    └── fraud-detection_hdi_arm.json
├── Data
    ├── Account_Info.csv
    ├── Fraud_Transactions.csv
    ├── Untagged_Transactions.csv
    └── readme.md
├── LICENSE
├── R
    ├── Fraud.rproj
    ├── Fraud.rxproj
    ├── Fraud.sln
    ├── Fraud_Detection_Notebook.ipynb
    ├── modeling_main.R
    ├── readme.md
    ├── step1_tagging.R
    ├── step2_splitting_preprocessing.R
    ├── step3_feature_engineering.R
    └── step4_training_evaluation.R
├── README.md
├── RSparkCluster
    ├── copy_dev_to_prod.R
    ├── development_main.R
    ├── in_memory_scoring.R
    ├── production_main.R
    ├── readme.md
    ├── step0_directories_creation.R
    ├── step1_merge_account_info.R
    ├── step2_tagging.R
    ├── step3_splitting.R
    ├── step4_preprocessing.R
    ├── step5_create_risk_tables.R
    ├── step6_feature_engineering.R
    ├── step7_training.R
    ├── step8_prediction.R
    ├── step9_evaluation.R
    └── web_scoring_main.R
├── Resources
    ├── ActionScripts
    │   ├── ConfigureSQL.ps1
    │   ├── CreateDatabase.sql
    │   ├── CreateSQLObjectsR.sql
    │   ├── FraudSetup.ps1
    │   ├── SetupVM.ps1
    │   ├── createShortcuts.ps1
    │   ├── frauddetection_Help.url
    │   └── hdisetup.sh
    ├── exampleuser.sql
    ├── images
    │   └── fraud.jpg
    └── readme.md
├── SECURITY.md
├── SQLR
    ├── CreateRiskTable.sql
    ├── OnlineFraudDetection.ps1
    ├── ParseString.sql
    ├── ScoreOneTrans.sql
    ├── SortAcctTable.sql
    ├── Step0_CreateTables.sql
    ├── Step10A_Evaluation.sql
    ├── Step10B_Evaluation_AUC.sql
    ├── Step1_MergeAcctInfo.sql
    ├── Step2_Tagging.sql
    ├── Step3_SplitData.sql
    ├── Step4_Preprocess.sql
    ├── Step5_Save2History.sql
    ├── Step6_CreateRiskTables.sql
    ├── Step7_FeatureEngineer.sql
    ├── Step8_Training.sql
    ├── Step9_Prediction.sql
    ├── UtilityFunctions.sql
    ├── createuser.sql
    └── readme.md
├── Website
    ├── package.json
    ├── public
    │   ├── css
    │   │   ├── bootstrap.min.css
    │   │   ├── bootswatch.css
    │   │   └── myCSS.css
    │   ├── fonts
    │   │   ├── glyphicons-halflings-regular.eot
    │   │   ├── glyphicons-halflings-regular.svg
    │   │   ├── glyphicons-halflings-regular.ttf
    │   │   └── glyphicons-halflings-regular.woff
    │   ├── img
    │   │   ├── bracelet.jpg
    │   │   ├── earrings.jpg
    │   │   ├── heart.jpg
    │   │   ├── logo.gif
    │   │   ├── progress.gif
    │   │   └── ring.jpg
    │   └── js
    │   │   ├── boostrap.min.js
    │   │   ├── customize.js
    │   │   ├── jquery-3.2.1.min.js
    │   │   ├── jquery-3.2.1.min.map
    │   │   ├── scoreClaim.js
    │   │   └── startUp.js
    ├── readme.md
    ├── server.js
    ├── startMe.vbs
    └── views
    │   ├── home.handlebars
    │   └── layouts
    │       └── main.handlebars
├── onlinefraud.pbix
└── onlinefraudHDI.pbix


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.suo
  8 | *.user
  9 | *.userosscache
 10 | *.sln.docstates
 11 | 
 12 | # User-specific files (MonoDevelop/Xamarin Studio)
 13 | *.userprefs
 14 | 
 15 | # Build results
 16 | [Dd]ebug/
 17 | [Dd]ebugPublic/
 18 | [Rr]elease/
 19 | [Rr]eleases/
 20 | x64/
 21 | x86/
 22 | bld/
 23 | [Bb]in/
 24 | [Oo]bj/
 25 | [Ll]og/
 26 | 
 27 | # Visual Studio 2015 cache/options directory
 28 | .vs/
 29 | # Uncomment if you have tasks that create the project's static files in wwwroot
 30 | #wwwroot/
 31 | 
 32 | # MSTest test Results
 33 | [Tt]est[Rr]esult*/
 34 | [Bb]uild[Ll]og.*
 35 | 
 36 | # NUNIT
 37 | *.VisualState.xml
 38 | TestResult.xml
 39 | 
 40 | # Build Results of an ATL Project
 41 | [Dd]ebugPS/
 42 | [Rr]eleasePS/
 43 | dlldata.c
 44 | 
 45 | # .NET Core
 46 | project.lock.json
 47 | project.fragment.lock.json
 48 | artifacts/
 49 | **/Properties/launchSettings.json
 50 | 
 51 | *_i.c
 52 | *_p.c
 53 | *_i.h
 54 | *.ilk
 55 | *.meta
 56 | *.obj
 57 | *.pch
 58 | *.pdb
 59 | *.pgc
 60 | *.pgd
 61 | *.rsp
 62 | *.sbr
 63 | *.tlb
 64 | *.tli
 65 | *.tlh
 66 | *.tmp
 67 | *.tmp_proj
 68 | *.log
 69 | *.vspscc
 70 | *.vssscc
 71 | .builds
 72 | *.pidb
 73 | *.svclog
 74 | *.scc
 75 | 
 76 | # Chutzpah Test files
 77 | _Chutzpah*
 78 | 
 79 | # Visual C++ cache files
 80 | ipch/
 81 | *.aps
 82 | *.ncb
 83 | *.opendb
 84 | *.opensdf
 85 | *.sdf
 86 | *.cachefile
 87 | *.VC.db
 88 | *.VC.VC.opendb
 89 | 
 90 | # Visual Studio profiler
 91 | *.psess
 92 | *.vsp
 93 | *.vspx
 94 | *.sap
 95 | 
 96 | # TFS 2012 Local Workspace
 97 | $tf/
 98 | 
 99 | # Guidance Automation Toolkit
100 | *.gpState
101 | 
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 | *.DotSettings.user
106 | 
107 | # JustCode is a .NET coding add-in
108 | .JustCode
109 | 
110 | # TeamCity is a build add-in
111 | _TeamCity*
112 | 
113 | # DotCover is a Code Coverage Tool
114 | *.dotCover
115 | 
116 | # Visual Studio code coverage results
117 | *.coverage
118 | *.coveragexml
119 | 
120 | # NCrunch
121 | _NCrunch_*
122 | .*crunch*.local.xml
123 | nCrunchTemp_*
124 | 
125 | # MightyMoose
126 | *.mm.*
127 | AutoTest.Net/
128 | 
129 | # Web workbench (sass)
130 | .sass-cache/
131 | 
132 | # Installshield output folder
133 | [Ee]xpress/
134 | 
135 | # DocProject is a documentation generator add-in
136 | DocProject/buildhelp/
137 | DocProject/Help/*.HxT
138 | DocProject/Help/*.HxC
139 | DocProject/Help/*.hhc
140 | DocProject/Help/*.hhk
141 | DocProject/Help/*.hhp
142 | DocProject/Help/Html2
143 | DocProject/Help/html
144 | 
145 | # Click-Once directory
146 | publish/
147 | 
148 | # Publish Web Output
149 | *.[Pp]ublish.xml
150 | *.azurePubxml
151 | # TODO: Comment the next line if you want to checkin your web deploy settings
152 | # but database connection strings (with potential passwords) will be unencrypted
153 | *.pubxml
154 | *.publishproj
155 | 
156 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
157 | # checkin your Azure Web App publish settings, but sensitive information contained
158 | # in these scripts will be unencrypted
159 | PublishScripts/
160 | 
161 | # NuGet Packages
162 | *.nupkg
163 | # The packages folder can be ignored because of Package Restore
164 | **/packages/*
165 | # except build/, which is used as an MSBuild target.
166 | !**/packages/build/
167 | # Uncomment if necessary however generally it will be regenerated when needed
168 | #!**/packages/repositories.config
169 | # NuGet v3's project.json files produces more ignorable files
170 | *.nuget.props
171 | *.nuget.targets
172 | 
173 | # Microsoft Azure Build Output
174 | csx/
175 | *.build.csdef
176 | 
177 | # Microsoft Azure Emulator
178 | ecf/
179 | rcf/
180 | 
181 | # Windows Store app package directories and files
182 | AppPackages/
183 | BundleArtifacts/
184 | Package.StoreAssociation.xml
185 | _pkginfo.txt
186 | 
187 | # Visual Studio cache files
188 | # files ending in .cache can be ignored
189 | *.[Cc]ache
190 | # but keep track of directories ending in .cache
191 | !*.[Cc]ache/
192 | 
193 | # Others
194 | ClientBin/
195 | ~$*
196 | *~
197 | *.dbmdl
198 | *.dbproj.schemaview
199 | *.jfm
200 | *.pfx
201 | *.publishsettings
202 | orleans.codegen.cs
203 | 
204 | # Since there are multiple workflows, uncomment next line to ignore bower_components
205 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
206 | #bower_components/
207 | 
208 | # RIA/Silverlight projects
209 | Generated_Code/
210 | 
211 | # Backup & report files from converting an old project file
212 | # to a newer Visual Studio version. Backup files are not needed,
213 | # because we have git ;-)
214 | _UpgradeReport_Files/
215 | Backup*/
216 | UpgradeLog*.XML
217 | UpgradeLog*.htm
218 | 
219 | # SQL Server files
220 | *.mdf
221 | *.ldf
222 | *.ndf
223 | 
224 | # Business Intelligence projects
225 | *.rdl.data
226 | *.bim.layout
227 | *.bim_*.settings
228 | 
229 | # Microsoft Fakes
230 | FakesAssemblies/
231 | 
232 | # GhostDoc plugin setting file
233 | *.GhostDoc.xml
234 | 
235 | # Node.js Tools for Visual Studio
236 | .ntvs_analysis.dat
237 | node_modules/
238 | 
239 | # Typescript v1 declaration files
240 | typings/
241 | 
242 | # Visual Studio 6 build log
243 | *.plg
244 | 
245 | # Visual Studio 6 workspace options file
246 | *.opt
247 | 
248 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
249 | *.vbw
250 | 
251 | # Visual Studio LightSwitch build output
252 | **/*.HTMLClient/GeneratedArtifacts
253 | **/*.DesktopClient/GeneratedArtifacts
254 | **/*.DesktopClient/ModelManifest.xml
255 | **/*.Server/GeneratedArtifacts
256 | **/*.Server/ModelManifest.xml
257 | _Pvt_Extensions
258 | 
259 | # Paket dependency manager
260 | .paket/paket.exe
261 | paket-files/
262 | 
263 | # FAKE - F# Make
264 | .fake/
265 | 
266 | # JetBrains Rider
267 | .idea/
268 | *.sln.iml
269 | 
270 | # CodeRush
271 | .cr/
272 | 
273 | # Python Tools for Visual Studio (PTVS)
274 | __pycache__/
275 | *.pyc
276 | 
277 | # Cake - Uncomment if you are using it
278 | # tools/**
279 | # !tools/packages.config
280 | 
281 | # Telerik's JustMock configuration file
282 | *.jmconfig
283 | 
284 | # BizTalk build output
285 | *.btp.cs
286 | *.btm.cs
287 | *.odx.cs
288 | *.xsd.cs
289 | 


--------------------------------------------------------------------------------
/ArmTemplates/configure_arm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "contentVersion": "1.0.0.0",
 3 |     "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 4 |     "parameters": {
 5 |       "adminUsername": {
 6 |         "type": "string",
 7 |         "metadata": {
 8 |           "description": "Username for the Virtual Machine."
 9 |         }
10 |       },
11 |       "adminPassword": {
12 |         "type": "securestring",
13 |         "metadata": {
14 |           "description": "Password for the Virtual Machine."
15 |         }
16 |       },
17 |       "vmName": {
18 |         "type": "string",
19 |         "metadata": {
20 |           "description": "Name for the Virtual Machine."
21 |         }
22 |       }
23 |     },
24 |     "variables": {
25 |       "location": "[resourceGroup().location]"
26 |     },
27 |     "resources": [
28 |       {
29 |         "type": "Microsoft.Compute/virtualMachines/extensions",
30 |         "name": "[concat(parameters('vmName'),'/FraudDetectionSetup')]",
31 |         "apiVersion": "2015-05-01-preview",
32 |         "location": "[variables('location')]",
33 |         "properties": {
34 |           "publisher": "Microsoft.Compute",
35 |           "type": "CustomScriptExtension",
36 |           "typeHandlerVersion": "1.7",
37 |           "autoUpgradeMinorVersion": false,
38 |           "settings": {
39 |             "fileUris": [
40 |               "https://raw.githubusercontent.com/Microsoft/r-server-fraud-detection/master/Resources/ActionScripts/FraudSetup.ps1"
41 |             ],
42 |             "commandToExecute": "[concat('powershell.exe -ExecutionPolicy Unrestricted -File FraudSetup.ps1 -serverName ',  parameters('vmName') ,' -username ',parameters('adminUsername'),' -password ',parameters('adminPassword'))]"
43 |           }
44 |         }
45 |       }
46 |     ],
47 |     "outputs": {
48 |     }
49 |   }


--------------------------------------------------------------------------------
/ArmTemplates/dsvm_arm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "contentVersion": "1.0.0.0",
  3 |     "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
  4 |     "parameters": {
  5 |         "adminUsername": {
  6 |             "type": "string",
  7 |             "metadata": {
  8 |                 "description": "Username for the Virtual Machine"
  9 |             }
 10 |         },
 11 |         "adminPassword": {
 12 |             "type": "securestring",
 13 |             "metadata": {
 14 |                 "description": "Password for the Virtual Machine"
 15 |             }
 16 |         },
 17 |         "vmName": {
 18 |             "type": "string",
 19 |             "metadata": {
 20 |                 "description": "Name for the Virtual Machine"
 21 |             }
 22 |         },
 23 |         "vmSize": {
 24 |             "type": "string",
 25 |             "metadata": {
 26 |                 "description": "Size for the Virtual Machine"
 27 |             }
 28 |         }
 29 |     },
 30 |     "variables": {
 31 |         "apiVersion": "2015-10-01",
 32 |         "location": "[resourceGroup().location]",
 33 |         "imagePublisher": "microsoft-ads",
 34 |         "imageOffer": "windows-data-science-vm",
 35 |         "sku": "windows2016",
 36 |         "version": "03.25.19",
 37 |         "OSDiskName": "osdiskforwindowssimple",
 38 |         "nicName": "[parameters('vmName')]",
 39 |         "addressPrefix": "10.0.0.0/16",
 40 |         "subnetName": "Subnet",
 41 |         "subnetPrefix": "10.0.0.0/24",
 42 |         "storageAccountType": "Standard_LRS",
 43 |         "storageAccountName": "[concat(uniquestring(resourceGroup().id), 'windsvm')]",
 44 |         "publicIPAddressType": "Dynamic",
 45 |         "publicIPAddressName": "[tolower(concat('co',parameters('vmName'),uniquestring(resourceGroup().id)))]",
 46 |         "vmStorageAccountContainerName": "vhds",
 47 |         "vmName": "[parameters('vmName')]",
 48 |         "vmSize": "[parameters('vmSize')]",
 49 |         "virtualNetworkName": "[parameters('vmName')]",
 50 |         "vnetID": "[resourceId('Microsoft.Network/virtualNetworks',variables('virtualNetworkName'))]",
 51 |         "subnetRef": "[concat(variables('vnetID'),'/subnets/',variables('subnetName'))]"
 52 |     },
 53 |     "resources": [
 54 |       {
 55 |         "type": "Microsoft.Storage/storageAccounts",
 56 |         "name": "[variables('storageAccountName')]",
 57 |         "apiVersion": "2015-05-01-preview",
 58 |         "location": "[variables('location')]",
 59 |         "properties": {
 60 |           "accountType": "[variables('storageAccountType')]"
 61 |         }
 62 |       },
 63 |         {
 64 |             "apiVersion": "2015-05-01-preview",
 65 |             "type": "Microsoft.Network/publicIPAddresses",
 66 |             "name": "[variables('publicIPAddressName')]",
 67 |             "location": "[variables('location')]",
 68 |             "properties": {
 69 |                 "publicIPAllocationMethod": "[variables('publicIPAddressType')]",
 70 |                 "dnsSettings": {
 71 |                     "domainNameLabel": "[variables('publicIPAddressName')]"
 72 |                 }
 73 |             }
 74 |         },
 75 |         {
 76 |             "apiVersion": "2015-05-01-preview",
 77 |             "type": "Microsoft.Network/virtualNetworks",
 78 |             "name": "[variables('virtualNetworkName')]",
 79 |             "location": "[variables('location')]",
 80 |             "properties": {
 81 |                 "addressSpace": {
 82 |                     "addressPrefixes": [
 83 |                         "[variables('addressPrefix')]"
 84 |                     ]
 85 |                 },
 86 |                 "subnets": [
 87 |                     {
 88 |                         "name": "[variables('subnetName')]",
 89 |                         "properties": {
 90 |                             "addressPrefix": "[variables('subnetPrefix')]"
 91 |                         }
 92 |                     }
 93 |                 ]
 94 |             }
 95 |         },
 96 |         {
 97 |             "apiVersion": "2015-05-01-preview",
 98 |             "type": "Microsoft.Network/networkInterfaces",
 99 |             "name": "[variables('nicName')]",
100 |             "location": "[variables('location')]",
101 |             "dependsOn": [
102 |                 "[concat('Microsoft.Network/publicIPAddresses/', variables('publicIPAddressName'))]",
103 |                 "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]"
104 |             ],
105 |             "properties": {
106 |                 "ipConfigurations": [
107 |                     {
108 |                         "name": "ipconfig1",
109 |                         "properties": {
110 |                             "privateIPAllocationMethod": "Dynamic",
111 |                             "publicIPAddress": {
112 |                                 "id": "[resourceId('Microsoft.Network/publicIPAddresses',variables('publicIPAddressName'))]"
113 |                             },
114 |                             "subnet": {
115 |                                 "id": "[variables('subnetRef')]"
116 |                             }
117 |                         }
118 |                     }
119 |                 ]
120 |             }
121 |         },
122 |         {
123 |             "apiVersion": "2015-06-15",
124 |             "type": "Microsoft.Compute/virtualMachines",
125 |             "name": "[parameters('vmName')]",
126 |             "location": "[variables('location')]",
127 |             "plan": {
128 |                 "name": "[variables('sku')]",
129 |                 "publisher": "[variables('imagePublisher')]",
130 |                 "product": "[variables('imageOffer')]"
131 |             },
132 |             "tags": {
133 |                 "Application": "DataScience"
134 |             },
135 |             "dependsOn": [
136 |                 "[concat('Microsoft.Storage/storageAccounts/', variables('storageAccountName'))]",
137 |                 "[concat('Microsoft.Network/networkInterfaces/', variables('nicName'))]"
138 |             ],
139 |             "properties": {
140 |                 "hardwareProfile": {
141 |                     "vmSize": "[variables('vmSize')]"
142 |                 },
143 |                 "osProfile": {
144 |                     "computerName": "[parameters('vmName')]",
145 |                     "adminUsername": "[parameters('adminUsername')]",
146 |                     "adminPassword": "[parameters('adminPassword')]"
147 |                 },
148 |                 "storageProfile": {
149 |                     "imageReference": {
150 |                         "publisher": "[variables('imagePublisher')]",
151 |                         "offer": "[variables('imageOffer')]",
152 |                         "sku": "[variables('sku')]",
153 |                         "version": "[variables('version')]"
154 |                     },
155 |                     "osDisk": {
156 |                         "name": "osdisk",
157 |                         "vhd": {
158 |                             "uri": "[concat('http://',variables('storageAccountName'),'.blob.core.windows.net/',variables('vmStorageAccountContainerName'),'/',variables('OSDiskName'), parameters('vmName'),  '.vhd')]"
159 |                         },
160 |                         "caching": "ReadWrite",
161 |                         "createOption": "FromImage"
162 |                     }
163 |                 },
164 |                 "networkProfile": {
165 |                     "networkInterfaces": [
166 |                         {
167 |                             "id": "[resourceId('Microsoft.Network/networkInterfaces',variables('nicName'))]"
168 |                         }
169 |                     ]
170 |                 },
171 |                 "diagnosticsProfile": {
172 |                     "bootDiagnostics": {
173 |                         "enabled": "true",
174 |                         "storageUri": "[concat('http://',variables('storageAccountName'),'.blob.core.windows.net')]"
175 |                     }
176 |                 }
177 |             }
178 |         }
179 |     ],
180 |     "outputs": {
181 |         "vmUrl": {
182 |             "value": "[concat('https://ms.portal.azure.com/#resource/subscriptions/', subscription().subscriptionId, '/resourceGroups/', resourceGroup().name, '/providers/Microsoft.Compute/virtualMachines/', parameters('vmName'))]",
183 |             "type": "string"
184 |         },
185 |         "vmFqdn": {
186 |             "value": "[reference( variables('publicIPAddressName')).dnsSettings.fqdn]",
187 |             "type": "string"
188 |         },
189 |         "vmAdminUsername": {
190 |             "value": "[parameters('adminUsername')]",
191 |             "type": "string"
192 |         },
193 |         "vmAdminPassword": {
194 |             "value": "[parameters('adminPassword')]",
195 |             "type": "string"
196 |         },
197 |         "sqlServerName": {
198 |             "value": "[parameters('vmName')]",
199 |             "type": "string"
200 |         }
201 |     }
202 | }
203 | 


--------------------------------------------------------------------------------
/ArmTemplates/fraud-detection_arm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
  3 |     "contentVersion": "1.0.0.0",
  4 |     "parameters": {
  5 |     "adminUsername": {
  6 |       "type": "string",
  7 |       "metadata": {
  8 |         "description": "Username for the Virtual Machine."
  9 |       }
 10 |     },
 11 |     "adminPassword": {
 12 |       "type": "securestring",
 13 |       "metadata": {
 14 |         "description": "Password for the Virtual Machine. The password must be 8 or more characters long. It must contain 1+ uppercase character(s), 1+ lowercase character(s),  1+ number(s), and 1+ special character(s) from ~!@#$%^&()-_+=|<>\/;:,."
 15 |       }
 16 |     },
 17 |       "vmSize": {
 18 |         "type": "string",
 19 |         "defaultValue": "Standard_DS4_v2",
 20 |         "allowedValues": [
 21 |           "Basic_A0",
 22 |           "Basic_A1",
 23 |           "Basic_A2",
 24 |           "Basic_A3",
 25 |           "Basic_A4",
 26 |           "Standard_A0",
 27 |           "Standard_A1",
 28 |           "Standard_A2",
 29 |           "Standard_A3",
 30 |           "Standard_A4",
 31 |           "Standard_A5",
 32 |           "Standard_A6",
 33 |           "Standard_A7",
 34 |           "Standard_A8",
 35 |           "Standard_A9",
 36 |           "Standard_A10",
 37 |           "Standard_A11",
 38 |           "Standard_A1_v2",
 39 |           "Standard_A2_v2",
 40 |           "Standard_A4_v2",
 41 |           "Standard_A8_v2",
 42 |           "Standard_A2m_v2",
 43 |           "Standard_A4m_v2",
 44 |           "Standard_A8m_v2",
 45 |           "Standard_B1s",
 46 |           "Standard_B1ms",
 47 |           "Standard_B2s",
 48 |           "Standard_B2ms",
 49 |           "Standard_B4ms",
 50 |           "Standard_B8ms",
 51 |           "Standard_D1",
 52 |           "Standard_D2",
 53 |           "Standard_D3",
 54 |           "Standard_D4",
 55 |           "Standard_D11",
 56 |           "Standard_D12",
 57 |           "Standard_D13",
 58 |           "Standard_D14",
 59 |           "Standard_D1_v2",
 60 |           "Standard_D2_v2",
 61 |           "Standard_D3_v2",
 62 |           "Standard_D4_v2",
 63 |           "Standard_D5_v2",
 64 |           "Standard_D2_v3",
 65 |           "Standard_D4_v3",
 66 |           "Standard_D8_v3",
 67 |           "Standard_D16_v3",
 68 |           "Standard_D32_v3",
 69 |           "Standard_D64_v3",
 70 |           "Standard_D2s_v3",
 71 |           "Standard_D4s_v3",
 72 |           "Standard_D8s_v3",
 73 |           "Standard_D16s_v3",
 74 |           "Standard_D32s_v3",
 75 |           "Standard_D64s_v3",
 76 |           "Standard_D11_v2",
 77 |           "Standard_D12_v2",
 78 |           "Standard_D13_v2",
 79 |           "Standard_D14_v2",
 80 |           "Standard_D15_v2",
 81 |           "Standard_DS1",
 82 |           "Standard_DS2",
 83 |           "Standard_DS3",
 84 |           "Standard_DS4",
 85 |           "Standard_DS11",
 86 |           "Standard_DS12",
 87 |           "Standard_DS13",
 88 |           "Standard_DS14",
 89 |           "Standard_DS1_v2",
 90 |           "Standard_DS2_v2",
 91 |           "Standard_DS3_v2",
 92 |           "Standard_DS4_v2",
 93 |           "Standard_DS5_v2",
 94 |           "Standard_DS11_v2",
 95 |           "Standard_DS12_v2",
 96 |           "Standard_DS13_v2",
 97 |           "Standard_DS14_v2",
 98 |           "Standard_DS15_v2",
 99 |           "Standard_DS13-4_v2",
100 |           "Standard_DS13-2_v2",
101 |           "Standard_DS14-8_v2",
102 |           "Standard_DS14-4_v2",
103 |           "Standard_E2_v3",
104 |           "Standard_E4_v3",
105 |           "Standard_E8_v3",
106 |           "Standard_E16_v3",
107 |           "Standard_E32_v3",
108 |           "Standard_E64_v3",
109 |           "Standard_E2s_v3",
110 |           "Standard_E4s_v3",
111 |           "Standard_E8s_v3",
112 |           "Standard_E16s_v3",
113 |           "Standard_E32s_v3",
114 |           "Standard_E64s_v3",
115 |           "Standard_E32-16_v3",
116 |           "Standard_E32-8s_v3",
117 |           "Standard_E64-32s_v3",
118 |           "Standard_E64-16s_v3",
119 |           "Standard_F1",
120 |           "Standard_F2",
121 |           "Standard_F4",
122 |           "Standard_F8",
123 |           "Standard_F16",
124 |           "Standard_F1s",
125 |           "Standard_F2s",
126 |           "Standard_F4s",
127 |           "Standard_F8s",
128 |           "Standard_F16s",
129 |           "Standard_F2s_v2",
130 |           "Standard_F4s_v2",
131 |           "Standard_F8s_v2",
132 |           "Standard_F16s_v2",
133 |           "Standard_F32s_v2",
134 |           "Standard_F64s_v2",
135 |           "Standard_F72s_v2",
136 |           "Standard_G1",
137 |           "Standard_G2",
138 |           "Standard_G3",
139 |           "Standard_G4",
140 |           "Standard_G5",
141 |           "Standard_GS1",
142 |           "Standard_GS2",
143 |           "Standard_GS3",
144 |           "Standard_GS4",
145 |           "Standard_GS5",
146 |           "Standard_GS4-8",
147 |           "Standard_GS4-4",
148 |           "Standard_GS5-16",
149 |           "Standard_GS5-8",
150 |           "Standard_H8",
151 |           "Standard_H16",
152 |           "Standard_H8m",
153 |           "Standard_H16m",
154 |           "Standard_H16r",
155 |           "Standard_H16mr",
156 |           "Standard_L4s",
157 |           "Standard_L8s",
158 |           "Standard_L16s",
159 |           "Standard_L32s",
160 |           "Standard_M64s",
161 |           "Standard_M64ms",
162 |           "Standard_M128s",
163 |           "Standard_M128ms",
164 |           "Standard_M64-32ms",
165 |           "Standard_M64-16ms",
166 |           "Standard_M128-64ms",
167 |           "Standard_M128-32ms",
168 |           "Standard_NC6",
169 |           "Standard_NC12",
170 |           "Standard_NC24",
171 |           "Standard_NC24r",
172 |           "Standard_NC6s_v2",
173 |           "Standard_NC12s_v2",
174 |           "Standard_NC24s_v2",
175 |           "Standard_NC24rs_v2",
176 |           "Standard_NC6s_v3",
177 |           "Standard_NC12s_v3",
178 |           "Standard_NC24s_v3",
179 |           "Standard_NC24rs_v3",
180 |           "Standard_ND6s",
181 |           "Standard_ND12s",
182 |           "Standard_ND24s",
183 |           "Standard_ND24rs",
184 |           "Standard_NV6",
185 |           "Standard_NV12",
186 |           "Standard_NV24"
187 |         ],
188 |         "metadata": {
189 |           "description": "Select a SKU for the virtual machine. Recommended SKU is 'Standard_DS4_v2'"
190 |         }
191 |       }
192 |     },
193 |     "variables": {
194 |       "location": "[resourceGroup().location]",
195 |       "vmName": "[toLower(concat('co', uniqueString(resourceGroup().id)))]"
196 |     },
197 |     "resources": [
198 |       {
199 |         "apiVersion": "2017-05-10",
200 |         "name": "dsvmTemplate",
201 |         "type": "Microsoft.Resources/deployments",
202 |         "properties": {
203 |           "mode": "incremental",
204 |           "templateLink": {
205 |             "uri": "https://raw.githubusercontent.com/Microsoft/r-server-fraud-detection/master/ArmTemplates/dsvm_arm.json",
206 |             "contentVersion": "1.0.0.0"
207 |           },
208 |           "parameters": {
209 |             "adminUsername": { "value": "[parameters('adminUsername')]" },
210 |             "adminPassword": { "value": "[parameters('adminPassword')]" },
211 |             "vmName": { "value": "[variables('vmName')]" },
212 |             "vmSize": { "value": "[parameters('vmSize')]" }
213 |           }
214 |         }
215 |       },
216 |       {
217 |         "apiVersion": "2017-05-10",
218 |         "name": "configureTemplate",
219 |         "type": "Microsoft.Resources/deployments",
220 |         "dependsOn": [
221 |           "dsvmTemplate"
222 |         ],
223 |         "properties": {
224 |           "mode": "incremental",
225 |           "templateLink": {
226 |             "uri": "https://raw.githubusercontent.com/Microsoft/r-server-fraud-detection/master/ArmTemplates/configure_arm.json",
227 |             "contentVersion": "1.0.0.0"
228 |           },
229 |           "parameters": {
230 |             "adminUsername": { "value": "[parameters('adminUsername')]" },
231 |             "adminPassword": { "value": "[parameters('adminPassword')]" },
232 |             "vmName": { "value": "[variables('vmName')]" }
233 |           }
234 |         }
235 |       }
236 |     ],
237 |     "outputs": {}
238 |   }
239 |   


--------------------------------------------------------------------------------
/ArmTemplates/fraud-detection_hdi_arm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#",
  3 |     "contentVersion": "0.9.0.0",
  4 |     "parameters": {
  5 |         "clusterName": {
  6 |             "type": "string",
  7 |             "metadata": {
  8 |                 "description": "The name of the HDInsight cluster to create."
  9 |             }
 10 |         },
 11 |         "clusterLoginUserName": {
 12 |             "type": "string",
 13 |             "defaultValue": "admin",
 14 |             "metadata": {
 15 |                 "description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards."
 16 |             }
 17 |         },
 18 |         "clusterLoginPassword": {
 19 |             "type": "securestring",
 20 |             "metadata": {
 21 |                 "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter."
 22 |             }
 23 |         },
 24 |         "location": {
 25 |             "type": "string",
 26 |             "defaultValue": "westus2",
 27 |             "metadata": {
 28 |                 "description": "The location where all azure resources will be deployed."
 29 |             }
 30 |         },
 31 |         "clusterWorkerNodeCount": {
 32 |             "type": "int",
 33 |             "defaultValue": 3,
 34 |             "metadata": {
 35 |                 "description": "The number of nodes in the HDInsight cluster. Make sure to set this to at least 3."
 36 |             }
 37 |         },
 38 |         "sshUserName": {
 39 |             "type": "string",
 40 |             "defaultValue": "sshuser",
 41 |             "metadata": {
 42 |                 "description": "These credentials can be used to remotely access the cluster."
 43 |             }
 44 |         },
 45 |         "sshPassword": {
 46 |             "type": "securestring",
 47 |             "metadata": {
 48 |                 "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter."
 49 |             }
 50 |         }
 51 |     },
 52 |     "variables": {
 53 |         "defaultStorageAccount": { 
 54 |           "name": "[uniqueString(resourceGroup().id)]",
 55 |           "type": "Standard_LRS"
 56 |         }
 57 |     },
 58 |     "resources": [
 59 |         {
 60 |             "apiVersion": "2015-03-01-preview",
 61 |             "name": "[parameters('clusterName')]",
 62 |             "type": "Microsoft.HDInsight/clusters",
 63 |             "location": "[parameters('location')]",
 64 |             "dependsOn": [
 65 |                 "[concat('Microsoft.Storage/storageAccounts/',variables('defaultStorageAccount').name)]"
 66 |             ],
 67 |             "properties": {
 68 |                 "clusterVersion": "3.6",
 69 |                 "osType": "Linux",
 70 |                 "tier": "standard",
 71 |                 "clusterDefinition": {
 72 |                     "kind": "MLSERVICES",
 73 |                     "configurations": {
 74 |                         "gateway": {
 75 |                             "restAuthCredential.isEnabled": true,
 76 |                             "restAuthCredential.username": "[parameters('clusterLoginUserName')]",
 77 |                             "restAuthCredential.password": "[parameters('clusterLoginPassword')]"
 78 |                         },
 79 |                         "rserver": {
 80 |                             "rstudio": true
 81 |                         }
 82 |                     }
 83 |                 },
 84 |                 "storageProfile": {
 85 |                     "storageaccounts": [
 86 |                         {
 87 |                             "name": "[replace(replace(concat(reference(concat('Microsoft.Storage/storageAccounts/', variables('defaultStorageAccount').name), '2016-01-01').primaryEndpoints.blob),'https:',''),'/','')]",
 88 |                             "isDefault": true,
 89 |                             "container": "[parameters('clusterName')]",
 90 |                             "key": "[listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('defaultStorageAccount').name), '2016-01-01').keys[0].value]"
 91 |                         }
 92 |                     ]
 93 |                 },
 94 |                 "computeProfile": {
 95 |                     "roles": [
 96 |                         {
 97 |                             "autoscale": null,
 98 |                             "name": "headnode",
 99 |                             "minInstanceCount": 1,
100 |                             "targetInstanceCount": 2,
101 |                             "hardwareProfile": {
102 |                                 "vmSize": "Standard_D12_V2"
103 |                             },
104 |                             "osProfile": {
105 |                                 "linuxOperatingSystemProfile": {
106 |                                     "username": "[parameters('sshUserName')]",
107 |                                     "password": "[parameters('sshPassword')]"
108 |                                 }
109 |                             },
110 |                             "virtualNetworkProfile": null,
111 |                             "scriptActions": []
112 |                         },
113 |                         {
114 |                             "autoscale": null,
115 |                             "name": "workernode",
116 |                             "targetInstanceCount": "[parameters('clusterWorkerNodeCount')]",
117 |                             "hardwareProfile": {
118 |                                 "vmSize": "Standard_D4_V2"
119 |                             },
120 |                             "osProfile": {
121 |                                 "linuxOperatingSystemProfile": {
122 |                                     "username": "[parameters('sshUserName')]",
123 |                                     "password": "[parameters('sshPassword')]"
124 |                                 }
125 |                             },
126 |                             "virtualNetworkProfile": null,
127 |                             "scriptActions": []
128 |                         },
129 |                         {
130 |                             "autoscale": null,
131 |                             "name": "zookeepernode",
132 |                             "minInstanceCount": 1,
133 |                             "targetInstanceCount": 3,
134 |                             "hardwareProfile": {
135 |                                 "vmSize": "Standard_A2_V2"
136 |                             },
137 |                             "osProfile": {
138 |                                 "linuxOperatingSystemProfile": {
139 |                                     "username": "[parameters('sshUserName')]",
140 |                                     "password": "[parameters('sshPassword')]"
141 |                                 }
142 |                             },
143 |                             "virtualNetworkProfile": null,
144 |                             "scriptActions": []
145 |                         },
146 |                         {
147 |                             "autoscale": null,
148 |                             "name": "edgenode",
149 |                             "minInstanceCount": 1,
150 |                             "targetInstanceCount": 1,
151 |                             "hardwareProfile": {
152 |                                 "vmSize": "Standard_D4_V2"
153 |                             },
154 |                             "osProfile": {
155 |                                 "linuxOperatingSystemProfile": {
156 |                                     "username": "[parameters('sshUserName')]",
157 |                                     "password": "[parameters('sshPassword')]"
158 |                                 }
159 |                             },
160 |                             "virtualNetworkProfile": null,
161 |                             "scriptActions": [
162 |                                 {
163 |                                     "name": "lcrsetup",
164 |                                     "uri": "https://raw.githubusercontent.com/Microsoft/r-server-fraud-detection/master/Resources/ActionScripts/hdisetup.sh",
165 |                                     "parameters": "",
166 |                                     "isHeadNode": false,
167 |                                     "isWorkerNode": false,
168 |                                     "isPersisted": true,
169 |                                     "isZookeeperNode": false,
170 |                                     "isEdgeNode": true,
171 |                                     "applicationName": null
172 |                                 }
173 |                             ]
174 |                         }
175 |                     ]
176 |                 }
177 |             }
178 |         },
179 |         {
180 |             "type": "Microsoft.Storage/storageAccounts",
181 |             "name": "[variables('defaultStorageAccount').name]",
182 |             "apiVersion": "2015-05-01-preview",
183 |             "location": "[parameters('location')]",
184 |             "properties": {
185 |                 "accountType": "Standard_LRS"
186 |             }
187 |         }
188 |     ]
189 | }


--------------------------------------------------------------------------------
/Data/readme.md:
--------------------------------------------------------------------------------
 1 | <img src="../Resources/images/fraud.jpg" align="right" width="50%">
 2 | 
 3 | # Fraud Detection 
 4 | ## Implemented on SQL Server 2016 R Services and HDInsight Spark
 5 | 
 6 | For all documentation, visit the [Fraud Detection website](https://microsoft.github.io/r-server-fraud-detection/).
 7 | 
 8 | **NOTE:** Please don't use "Download ZIP" to get this repository, as it will change the line endings in the data files. Use "git clone" to get a local copy of this repository. 
 9 | 
10 | # Contributing
11 | 
12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/R/Fraud.rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/R/Fraud.rxproj:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup Label="Globals">
 4 |     <ProjectGuid>69478067-7c7d-458b-9aa5-299a620e57e3</ProjectGuid>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup>
 7 |     <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
 8 |     <Configuration Condition="'$(Configuration)' == ''">Debug</Configuration>
 9 |     <Platform Condition="'$(Platform)' == ''">AnyCPU</Platform>
10 |   </PropertyGroup>
11 |   <PropertyGroup>
12 |     <StartupFile>script.R</StartupFile>
13 |   </PropertyGroup>
14 |   <ProjectExtensions>
15 |     <VisualStudio>
16 |       <UserProperties />
17 |     </VisualStudio>
18 |   </ProjectExtensions>
19 |   <Target Name="Build" />
20 |   <Import Project="$(MSBuildUserExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Rules\rtvs.rules.props" Condition="Exists('$(MSBuildUserExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Rules\rtvs.rules.props')" />
21 |   <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Rules\rtvs.rules.props" Condition="Exists('$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Rules\rtvs.rules.props') And !Exists('$(MSBuildUserExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Rules\rtvs.rules.props')" />
22 |   <Import Project="$(MSBuildUserExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Targets\Microsoft.R.targets" Condition="Exists('$(MSBuildUserExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Targets\Microsoft.R.targets')" />
23 |   <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Targets\Microsoft.R.targets" Condition="Exists('$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Targets\Microsoft.R.targets') And !Exists('$(MSBuildUserExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\RTVS\Targets\Microsoft.R.targets')" />
24 |   <Import Project="Fraud.InMemory.Targets" Condition="Exists('Fraud.InMemory.Targets')" />
25 | </Project>


--------------------------------------------------------------------------------
/R/Fraud.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.25420.1
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{DA7A21FA-8162-4350-AD77-A8D1B671F3ED}") = "Fraud", "Fraud.rxproj", "{69478067-7C7D-458B-9AA5-299A620E57E3}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{69478067-7C7D-458B-9AA5-299A620E57E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | 		{69478067-7C7D-458B-9AA5-299A620E57E3}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | 		{69478067-7C7D-458B-9AA5-299A620E57E3}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | 		{69478067-7C7D-458B-9AA5-299A620E57E3}.Release|Any CPU.Build.0 = Release|Any CPU
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | EndGlobal
23 | 


--------------------------------------------------------------------------------
/R/modeling_main.R:
--------------------------------------------------------------------------------
  1 | ##########################################################################################################################################
  2 | ## This R script will do the following:
  3 | ## 1. Specify parameters: Full path of the three input tables, SQL Server database name, User ID, Password, and Server Name.
  4 | ## 2. Source the different scripts for the Development Stage. 
  5 | 
  6 | ## Input : Full path of the three input tables, database name, User ID, Password, and Server Name.
  7 | ## Output: Trained model and Predictions on the testing set as well as performance metrics.
  8 | 
  9 | ##########################################################################################################################################
 10 | 
 11 | # Load library. 
 12 | library(RevoScaleR)
 13 | 
 14 | # Set the working directory to the R scripts location.
 15 | # setwd()
 16 | 
 17 | ##########################################################################################################################################
 18 | ## SPECIFY INPUTS
 19 | ##########################################################################################################################################
 20 | 
 21 | # Data sets full path. The paths below work if the working directory is set to the R scripts location. 
 22 | Untagged_Transactions <- "../Data/Untagged_Transactions.csv"
 23 | Account_Info <- "../Data/Account_Info.csv"
 24 | #Fraud <- "../Data/Fraud.csv"
 25 | Fraud_Transactions <- "../Data/Fraud_Transactions.csv"
 26 | 
 27 | 
 28 | # Creating the connection string. Specify:
 29 | ## Database name. If it already exists, tables will be overwritten. If not, it will be created.
 30 | ## Server name. If conecting remotely to the DSVM, the full DNS address should be used with the port number 1433 (which should be enabled) 
 31 | db_name <- "FraudR"
 32 | server <- "localhost"
 33 | connection_string <- sprintf("Driver=SQL Server;Server=%s;Database=%s;Trusted_Connection=TRUE", server, db_name)
 34 | # Above connection is set up to use your Windows credentials
 35 | # To use an id/password instead, add them in the lines below and uncomment 
 36 | # user_id <- "XXXYOURID"
 37 | # password <- "XXXYOURPW"
 38 | # connection_string <- sprintf("Driver=SQL Server;Server=%s;Database=%s;UID=%s;PWD=%s", server, db_name, user_id, password)
 39 | 
 40 | ##############################################################################################################################
 41 | ## Database Creation. 
 42 | ##############################################################################################################################
 43 | 
 44 | # Open an Odbc connection with SQL Server master database only to create a new database with the rxExecuteSQLDDL function.
 45 | 
 46 | connection_string_master <- sprintf("Driver=SQL Server;Server=%s;Database=master;Trusted_Connection=TRUE", server)
 47 | # Or with id/password:
 48 | # connection_string_master <- sprintf("Driver=SQL Server;Server=%s;Database=master;UID=%s;PWD=%s", server, user_id, password)
 49 | 
 50 | outOdbcDS_master <- RxOdbcData(table = "Default_Master", connectionString = connection_string_master)
 51 | rxOpen(outOdbcDS_master, "w")
 52 | 
 53 | # Create database if applicable. 
 54 | query <- sprintf( "if not exists(SELECT * FROM sys.databases WHERE name = '%s') CREATE DATABASE %s;", db_name, db_name)
 55 | rxExecuteSQLDDL(outOdbcDS_master, sSQLString = query)
 56 | 
 57 | #Create SQLRUserGroup 
 58 | 
 59 | query <- sprintf("USE [%s] CREATE USER [dsvm\\SQLRUserGroup] FOR LOGIN [dsvm\\SQLRUserGroup]", db_name)
 60 | rxExecuteSQLDDL(outOdbcDS_master, sSQLString = query)
 61 | 
 62 | # Close Obdc connection to master database. 
 63 | rxClose(outOdbcDS_master)
 64 | 
 65 | ##############################################################################################################################
 66 | ## Odbc connection and SQL Compute Context. 
 67 | ##############################################################################################################################
 68 | 
 69 | # Open an Obdc connection with the SQL Server database that will store the modeling tables. (Only used for rxExecuteSQLddl) 
 70 | outOdbcDS <- RxOdbcData(table = "Default", connectionString = connection_string)
 71 | rxOpen(outOdbcDS, "w")
 72 | 
 73 | # Define SQL Compute Context for in-database computations. 
 74 | sql <- RxInSqlServer(connectionString = connection_string)
 75 | 
 76 | ##############################################################################################################################
 77 | ## Modeling Pipeline.
 78 | ##############################################################################################################################
 79 | 
 80 | # Step 1: Tagging. 
 81 | print("Step 1: Tagging.")
 82 | source("./step1_tagging.R")
 83 | 
 84 | # Step 2: Splitting & Preprocessing the training set. 
 85 | print("Step 2: Splitting and Preprocessing the training set.")
 86 | source("./step2_splitting_preprocessing.R")
 87 | 
 88 | # Step 3: Feature Engineering. 
 89 | print("Step 3: Feature Engineering on the training set.")
 90 | source("./step3_feature_engineering.R")
 91 | 
 92 | # Step 4: training, preprocessing and feature engineering on the testing set, scoring and evaluation of GBT. 
 93 | print("Step 4: Training, Scoring and Evaluating.")
 94 | source("./step4_training_evaluation.R")
 95 | 
 96 | # Close the Obdc connection used for rxExecuteSQLddl functions. 
 97 | rxClose(outOdbcDS)
 98 | 
 99 | ##########################################################################################################################################
100 | ## Function to get the top n rows of a table stored on SQL Server.
101 | ## You can execute this function at any time during  your progress by removing the comment "#", and inputting:
102 | ##  - the table name.
103 | ##  - the number of rows you want to display.
104 | ##########################################################################################################################################
105 | 
106 | display_head <- function(table_name, n_rows){
107 |   table_sql <- RxSqlServerData(sqlQuery = sprintf("SELECT TOP(%s) * FROM %s", n_rows, table_name), connectionString = connection_string)
108 |   table <- rxImport(table_sql)
109 |   print(table)
110 | }
111 | 
112 | # table_name <- "insert_table_name"
113 | # n_rows <- 10
114 | # display_head(table_name, n_rows)
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/R/readme.md:
--------------------------------------------------------------------------------
 1 | <img src="../Resources/images/fraud.jpg" align="right" width="50%">
 2 | 
 3 | # Fraud Detection 
 4 | ## Implemented on SQL Server 2016 R Services and HDInsight Spark
 5 | 
 6 | For all documentation, visit the [Fraud Detection website](https://microsoft.github.io/r-server-fraud-detection/).
 7 | 
 8 | **NOTE:** Please don't use "Download ZIP" to get this repository, as it will change the line endings in the data files. Use "git clone" to get a local copy of this repository. 
 9 | 
10 | # Contributing
11 | 
12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
13 | 


--------------------------------------------------------------------------------
/R/step2_splitting_preprocessing.R:
--------------------------------------------------------------------------------
  1 | ##########################################################################################################################################
  2 | ## This R script will do the following :
  3 | ## 1. Split the tagged data set into a Training and a Testing set. 
  4 | ## 2. Clean the training set and perform some preprocessing.  
  5 | 
  6 | ## Input : Tagged data set.
  7 | ## Output: Training and Testing sets, and cleaned Training set Tagged_Training_Processed.   
  8 | 
  9 | ##########################################################################################################################################
 10 | 
 11 | # Set the compute context to SQL. 
 12 | rxSetComputeContext(sql)
 13 | 
 14 | #############################################################################################################################################
 15 | ## The block below will split the Tagged data set into a Training and a Testing set.
 16 | ############################################################################################################################################
 17 | 
 18 | print("Randomly splitting into a training and a testing set...")
 19 | 
 20 | # Create the Hash_Id table containing accountID hashed to 100 bins. 
 21 | # The advantage of using a hashing function for splitting is to:
 22 | # - ensure that the same accountID ends up in the same split.
 23 | # - permit repeatability of the experiment.  
 24 | rxExecuteSQLDDL(outOdbcDS, sSQLString = "DROP TABLE if exists Hash_Id;")
 25 | 
 26 | rxExecuteSQLDDL(outOdbcDS, sSQLString = 
 27 |   "SELECT accountID, ABS(CAST(CAST(HashBytes('MD5', accountID) AS VARBINARY(64)) AS BIGINT) % 100) AS hashCode  
 28 |   INTO Hash_Id
 29 |   FROM Tagged ;")
 30 | 
 31 | # Point to the training set. 
 32 | # At the same time, we remove:
 33 | # - variables not used in the next steps (intermediate variables, variables not needed for the training, variables with only missing values). 
 34 | # - observations with labels equal to 2 (pre-fraud).
 35 | # - observations where accountID, transactionID and transactionDateTime are missing. 
 36 | # - observations where the transaction amount in USD is negative. 
 37 | 
 38 | query_training <- "SELECT label, accountID, transactionID, transactionDateTime, isProxyIP, paymentInstrumentType, cardType, paymentBillingAddress,
 39 |                           paymentBillingPostalCode, paymentBillingCountryCode, paymentBillingName, accountAddress, accountPostalCode,  
 40 |                           accountCountry, accountOwnerName, shippingAddress, transactionCurrencyCode,localHour, ipState, ipPostCode,
 41 |                           ipCountryCode, browserLanguage, paymentBillingState, accountState, transactionAmountUSD, digitalItemCount, 
 42 |                           physicalItemCount, accountAge, paymentInstrumentAgeInAccount, numPaymentRejects1dPerUser, isUserRegistered,
 43 |                           transactionDate, transactionTime
 44 |                    FROM Tagged 
 45 |                    WHERE accountID IN (SELECT accountID from Hash_Id WHERE hashCode <= 70)
 46 |                    AND label != 2
 47 |                    AND accountID IS NOT NULL
 48 |                    AND transactionID IS NOT NULL 
 49 |                    AND transactionDateTime IS NOT NULL 
 50 |                    AND cast(transactionAmountUSD as float) >= 0"
 51 | 
 52 | Tagged_Training_sql <- RxSqlServerData(sqlQuery = query_training, connectionString = connection_string)
 53 | 
 54 | ############################################################################################################################################
 55 | ## The block below will clean the Tagged data. 
 56 | ############################################################################################################################################
 57 | 
 58 | print("Cleaning and preprocessing the training set...")
 59 | 
 60 | clean_preprocess <- function(input_data_query, output_sql_name){
 61 |   
 62 |   # Detect variables with missing values. 
 63 |   # No missing values in accountID, transactionID and transactionDateTime since we already filtered out missing values in the query above. 
 64 |   # For rxSummary to give correct info on characters, stringsAsFactors = TRUE should be used in the pointer to the SQL Tagged_Training table.
 65 |   Tagged_Data_sql_stringsfactors <- RxSqlServerData(sqlQuery = input_data_query, connectionString = connection_string, stringsAsFactors = TRUE)
 66 |   var <- rxGetVarNames(Tagged_Data_sql_stringsfactors)
 67 |   formula <- as.formula(paste("~", paste(var, collapse = "+")))
 68 |   summary <- rxSummary(formula, Tagged_Data_sql_stringsfactors, byTerm = TRUE)
 69 |   variables_NA <- summary$sDataFrame[summary$sDataFrame$MissingObs > 0, 1]
 70 |   variables_NA <- variables_NA[!variables_NA %in% c("accountID", "transactionID", "transactionDateTime", "transactionDate", "transactionTime")]
 71 |   
 72 |   # If no missing values, we will only preprocess the data. Otherwise, we clean and preprocess. 
 73 |   if(length(variables_NA) == 0){
 74 |     print("No missing values: only preprocessing will be performed.")
 75 |   } else{ 
 76 |     print("Variables containing missing values are:")
 77 |     print(variables_NA)
 78 |   }
 79 |   
 80 |   # Function to replace missing values with 0. It will be wrapped into rxDataStep. 
 81 |   preprocessing <- function(data) {
 82 |     data <- data.frame(data, stringsAsFactors = FALSE)
 83 |     
 84 |     # Replace missing values with 0 except for localHour with -99. 
 85 |     if(length(var_with_NA) > 0){
 86 |       for(i in 1:length(var_with_NA)){
 87 |         row_na <- which(is.na(data[, var_with_NA[i]])) 
 88 |         if(var_with_NA[i] == c("localHour")){
 89 |           data[row_na, var_with_NA[i]] <- "-99"
 90 |         } else{
 91 |           data[row_na, var_with_NA[i]] <- "0"
 92 |         }
 93 |       }
 94 |     }
 95 |     
 96 |     # Fix some data entries in isUserRegistered, which should be binary.  
 97 |     row_na <- which(data[, c("isUserRegistered")] %in% as.character(seq(1, 9)))
 98 |     data[row_na, c("isUserRegistered")] <- "0"
 99 |     
100 |     # Convert a few variables to numeric, replacing non-numeric entries with 0. a few other variables to fix some data entries.  
101 |     numeric_to_fix <- c("accountAge", "paymentInstrumentAgeInAccount", "numPaymentRejects1dPerUser", "transactionAmountUSD",
102 |                         "digitalItemCount", "physicalItemCount")
103 |     for(i in 1:length(numeric_to_fix)){
104 |       data[, numeric_to_fix[i]] <- as.numeric(data[, numeric_to_fix[i]])
105 |       row_na <- which(is.na(as.numeric(data[, numeric_to_fix[i]])))
106 |       data[row_na, numeric_to_fix[i]] <- 0
107 |     }
108 |     return(data)  
109 |   }
110 |   
111 |   # Input and Output pointers. 
112 |   Input_sql <- RxSqlServerData(sqlQuery = input_data_query, connectionString = connection_string)
113 |   Output_sql <- RxSqlServerData(table =  output_sql_name, connectionString = connection_string)
114 |   
115 |   # We drop the output if it already exists as a view in case the SQL SP was executed in the same database before. 
116 |   rxExecuteSQLDDL(outOdbcDS, sSQLString = sprintf("IF OBJECT_ID ('%s', 'V') IS NOT NULL DROP VIEW %s ;", 
117 |                                                   output_sql_name, output_sql_name))
118 |   
119 |   # Perform the data cleaning with rxDataStep. 
120 |   ## To preserve the type of transactionDateTime, we recreate it.
121 |   rxDataStep(inData = Input_sql, 
122 |              outFile = Output_sql, 
123 |              overwrite = TRUE, 
124 |              rowsPerRead = 200000,
125 |              transformFunc = preprocessing,
126 |              transformObjects = list(var_with_NA = variables_NA),
127 |              transforms = list(
128 |                transactionDateTime = as.character(as.POSIXct(paste(transactionDate, sprintf("%06d", as.numeric(transactionTime)), sep=""), format = "%Y%m%d %H%M%S", tz = "GMT"))
129 |              ))
130 | 
131 | }
132 | 
133 | # Apply the preprocessing and cleaning to the training set. 
134 | clean_preprocess(input_data_query = query_training, 
135 |                  output_sql_name = "Tagged_Training_Processed")
136 | 
137 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="Resources/images/fraud.jpg" align="right" width="50%">
 2 | 
 3 | # Fraud Detection 
 4 | Predict if an online purchase transaction is fraudulent. This is an important scenario in many industries, including retail and finance.
 5 | 
 6 | ### Deploy to Azure on SQL Server
 7 | [![Deploy to Azure (SQL Server)](https://raw.githubusercontent.com/Azure/Azure-CortanaIntelligence-SolutionAuthoringWorkspace/master/docs/images/DeployToAzure.PNG)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2FMicrosoft%2Fr-server-fraud-detection%2Fmaster%2FArmTemplates%2Ffraud-detection_arm.json)
 8 | 
 9 | ## More samples and information
10 | > Discover more examples at [Microsoft Machine Learning Server](https://github.com/Microsoft/ML-Server)
11 | 
12 | For all documentation, visit the [Fraud Detection website](https://microsoft.github.io/r-server-fraud-detection/).
13 | 
14 | **NOTE:** Please don't use "Download ZIP" to get this repository, as it will change the line endings in the data files. Use "git clone" to get a local copy of this repository. 
15 | 
16 | ## Contributing
17 | 
18 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
19 | 


--------------------------------------------------------------------------------
/RSparkCluster/copy_dev_to_prod.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will define a function, copy_to_prod, that: 
 3 | ## 1. Cleans up an already existing directory or create it on the edge node, ProdModelDir. 
 4 | ## 2. Copies to that directory: Summary statistics, Bins, Logistic Regression model, etc. from the Development directory.  
 5 | 
 6 | ## Input : DevModelDir: Path to the directory on the edge node storing the summary statistics, bins, model, etc.
 7 | ##         ProdModelDir: Path o the directory where the contents of DevModelDir should be copied.
 8 | ## Output: ProdModelDir with data trasferred from DevModelDir.
 9 | 
10 | 
11 | ## It should be applied:
12 | ## a) If running the Production stage for the first time. 
13 | ## b) If you want to run the Production stage with a newly trained model; the older one will be overwritten.  
14 | ##########################################################################################################################################
15 | 
16 | copy_dev_to_prod <- function(DevModelDir, ProdModelDir){
17 |   
18 |   # Clean or create a new directory in the Prodution directory. 
19 |   if(dir.exists(ProdModelDir)){
20 |     system(paste("rm -rf ", ProdModelDir, sep = "")) # remove the directory if exists
21 |     system(paste("mkdir -p -m 777 ", ProdModelDir, sep = "")) # create a new directory
22 |   } else {
23 |     system(paste("mkdir -p -m 777 ", ProdModelDir, sep = "")) # make new directory if doesn't exist
24 |   }
25 |   
26 |   # Copy the model, statistics and other data from the Development directory to the Production directory. 
27 |   system(paste("cp ", DevModelDir, "/*.rds ", ProdModelDir, sep = ""))
28 |   
29 | }
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/RSparkCluster/development_main.R:
--------------------------------------------------------------------------------
  1 | ##########################################################################################################################################
  2 | ## This R script will do the following:
  3 | ## 1. Specify parameters for main function.
  4 | ## 2. Define the main function for development. 
  5 | ## 3. Invoke the main function.
  6 | 
  7 | ## Input : 1. Full path of the three input tables on HDFS.
  8 | ##         2. Working directories on local edge node and HDFS
  9 | ##         3. Stage: "Dev" for development.
 10 | ## Output: The evaluation metrics of the model. 
 11 | ##         Tables and model to be used for Production or Web Scoring are copied to the Production directory. 
 12 | 
 13 | ##########################################################################################################################################
 14 | 
 15 | # Current working directory should be set with setwd() to the location of the .R files.
 16 | 
 17 | ##########################################################################################################################################
 18 | ## Open Spark Connection and load RevoScaleR library. 
 19 | ##########################################################################################################################################
 20 | 
 21 | rxSparkConnect(consoleOutput = TRUE, reset = TRUE)
 22 | library(RevoScaleR)
 23 | 
 24 | ##########################################################################################################################################
 25 | ## Data sets full path
 26 | ##########################################################################################################################################
 27 | 
 28 | # Write the full path to the 3 data sets.
 29 | Untagged_Transactions <- "/Fraud/Data/untaggedTransactions.csv"
 30 | Account_Info <- "/Fraud/Data/accountInfo.csv"
 31 | Fraud_Transactions <- "/Fraud/Data/fraudTransactions.csv"
 32 | 
 33 | ##########################################################################################################################################
 34 | ## Directories
 35 | ##########################################################################################################################################
 36 | 
 37 | # Local (edge node) working directory. We assume it already exists. 
 38 | LocalWorkDir <- paste("/var/RevoShare/", Sys.info()[["user"]], "/Fraud/dev", sep="") 
 39 | #dir.create(LocalWorkDir, recursive = TRUE)
 40 | 
 41 | # HDFS directory for user calculation. We assume it already exists. 
 42 | HDFSWorkDir <- paste("/",Sys.info()[["user"]],"/Fraud/dev", sep="")
 43 | #rxHadoopMakeDir(HDFSWorkDir)
 44 | 
 45 | ##############################################################################################################################
 46 | ## Define main function
 47 | ##############################################################################################################################
 48 | 
 49 | ## The user should replace the directory in "source" function with the directory of his own.
 50 | ## The directory should be the full path containing the source scripts.
 51 | 
 52 | fraud_dev <- function(Untagged_Transactions,
 53 |                       Account_Info,
 54 |                       Fraud_Transactions,
 55 |                       LocalWorkDir,
 56 |                       HDFSWorkDir, 
 57 |                       Stage = "Dev",
 58 |                       update_prod_flag = 1){
 59 |   
 60 |   # step0: intermediate directories creation.
 61 |   print("Creating Intermediate Directories on Local and HDFS...")
 62 |   source("./step0_directories_creation.R")
 63 |   
 64 |   ## Define and create the directory where Risk tables, models etc. will be saved in the Development stage.
 65 |   LocalModelsDir <- file.path(LocalWorkDir, "model")
 66 |   if(dir.exists(LocalModelsDir)){
 67 |     system(paste("rm -rf ",LocalModelsDir,"/*", sep = "")) # clean up the directory if exists
 68 |   } else {
 69 |     dir.create(LocalModelsDir, recursive = TRUE) # make new directory if doesn't exist
 70 |   }
 71 |   
 72 |   # step1: merging with account info
 73 |   source("./step1_merge_account_info.R")
 74 |   print("Step 1: Merging with account info...")
 75 |   merge_account_info(Untagged_Transactions = Untagged_Transactions,
 76 |                      Account_Info = Account_Info,
 77 |                      HDFSWorkDir = HDFSWorkDir, 
 78 |                      Stage = Stage)
 79 |   
 80 |   # step2: tagging
 81 |   source("./step2_tagging.R")
 82 |   print("Step 2: Tagging...")
 83 |   tagging(Input_Hive_Table = "UntaggedTransactionsAccountUnique",
 84 |           Fraud_Transactions = Fraud_Transactions,
 85 |           HDFSWorkDir = HDFSWorkDir)
 86 |   
 87 |   # step3: splitting
 88 |   print("Step3: Splitting...")
 89 |   source("./step3_splitting.R")
 90 |   
 91 |   # step4: preprocessing
 92 |   print("Step4: Preprocessing...")
 93 |   source("./step4_preprocessing.R")
 94 |   preprocess(HDFSWorkDir = HDFSWorkDir,
 95 |              HiveTable = "TaggedTraining")
 96 |   preprocess(HDFSWorkDir = HDFSWorkDir,
 97 |              HiveTable = "TaggedTesting")
 98 |   
 99 |   # step5: creating risk tables
100 |   print("Step5: Creating risk tables...")
101 |   source("./step5_create_risk_tables.R")
102 |   create_risk_tables(LocalWorkDir = LocalWorkDir,
103 |                      HDFSWorkDir = HDFSWorkDir,
104 |                      HiveTable = "TaggedTrainingProcessed",
105 |                      smooth1 = 10,
106 |                      smooth2 = 100)
107 |   
108 |   # step6: feature engineering
109 |   print("Step6: Feature Engineering...")
110 |   source("./step6_feature_engineering.R")
111 |   feature_engineering(LocalWorkDir = LocalWorkDir,
112 |                       HDFSWorkDir = HDFSWorkDir,
113 |                       HiveTable = "TaggedTrainingProcessed",
114 |                       Stage = Stage)
115 |   feature_engineering(LocalWorkDir = LocalWorkDir,
116 |                       HDFSWorkDir = HDFSWorkDir,
117 |                       HiveTable = "TaggedTestingProcessed",
118 |                       Stage = Stage)
119 |   
120 |   
121 |   # step7: training 
122 |   print("Step7: Training...")
123 |   source("./step7_training.R")
124 |   training(HDFSWorkDir = HDFSWorkDir,
125 |            LocalWorkDir = LocalWorkDir,
126 |            Input_Data_Xdf = "TaggedTrainingProcessedFeatures")
127 |   
128 |   # copy risk tables, model object to production folder if update_prod_flag = 1
129 |   if (update_prod_flag == 1){
130 |     # Production directory that will hold the development data. 
131 |     ProdModelDir <- paste("/var/RevoShare/", Sys.info()[["user"]], "/Fraud/prod/model/", sep="") 
132 |     # Development directory that holds data to be used in Production. 
133 |     DevModelDir <- LocalModelsDir
134 |     
135 |     source("./copy_dev_to_prod.R")
136 |     copy_dev_to_prod(DevModelDir, ProdModelDir)
137 |   } 
138 |   
139 |   # step8: prediction
140 |   print("Step8: Prediction...")
141 |   source("./step8_prediction.R")
142 |   prediction(HDFSWorkDir = HDFSWorkDir,
143 |              LocalWorkDir = LocalWorkDir,
144 |              Input_Data_Xdf = "TaggedTestingProcessedFeatures",
145 |              Stage = Stage)
146 |   
147 |   # step9: evaluation
148 |   print("Step9: Evaluation...")
149 |   source("./step9_evaluation.R")
150 |   evaluation(HDFSWorkDir = HDFSWorkDir,
151 |              Scored_Data_Xdf = "PredictScore")
152 | }
153 | 
154 | ##############################################################################################################################
155 | ## Apply the main function
156 | ##############################################################################################################################
157 | 
158 | fraud_dev (Untagged_Transactions = Untagged_Transactions, 
159 |            Account_Info = Account_Info,
160 |            Fraud_Transactions = Fraud_Transactions, 
161 |            LocalWorkDir = LocalWorkDir, 
162 |            HDFSWorkDir = HDFSWorkDir, 
163 |            Stage = "Dev",
164 |            update_prod_flag = 1)
165 | 


--------------------------------------------------------------------------------
/RSparkCluster/production_main.R:
--------------------------------------------------------------------------------
  1 | ##########################################################################################################################################
  2 | ## This R script will do the following:
  3 | ## 1. Specify parameters for main function.
  4 | ## 2. Define the main function for production batch scoring. 
  5 | ## 3. Invoke the main function.
  6 | 
  7 | ## Input : 1. Full path of the two input tables on HDFS (for scoring with Spark) 
  8 | ##            OR the two tables as data frames (for in-memory scoring).
  9 | ##         2. Working directories on local edge node and HDFS.
 10 | ##         3. Stage: "Prod" for batch scoring.
 11 | ## Output: The directory on HDFS which contains the Scores (Spark version) or The Scores table (in-memory version).
 12 | 
 13 | ##########################################################################################################################################
 14 | 
 15 | ##########################################################################################################################################
 16 | ## Load the RevoScaleR library and Open Spark Connection
 17 | ##########################################################################################################################################
 18 | 
 19 | library(RevoScaleR)
 20 | rxSparkConnect(consoleOutput = TRUE, reset = TRUE)
 21 | 
 22 | ##########################################################################################################################################
 23 | ## Directories
 24 | ##########################################################################################################################################
 25 | 
 26 | # Local (edge node) working directory. We assume it already exists. 
 27 | LocalWorkDir <- paste("/var/RevoShare/", Sys.info()[["user"]], "/Fraud/prod", sep="") 
 28 | #dir.create(LocalWorkDir, recursive = TRUE)
 29 | 
 30 | # HDFS directory for user calculation. We assume it already exists. 
 31 | HDFSWorkDir <- paste("/",Sys.info()[["user"]],"/Fraud/prod", sep="")
 32 | #rxHadoopMakeDir(HDFSWorkDir)
 33 | 
 34 | # Current working directory should be set with setwd() to the location of the .R files.
 35 | 
 36 | ##########################################################################################################################################
 37 | ## Data sets full path
 38 | ##########################################################################################################################################
 39 | 
 40 | # Paths to the input data sets on HDFS. 
 41 | Untagged_Transactions_str <- "/Fraud/Data/untaggedTransactions_Prod.csv"
 42 | Account_Info_str <- "/Fraud/Data/accountInfo.csv"
 43 | 
 44 | # Import the .csv files as data frames. stringsAsFactors = F to avoid converting the ID variables to factors, which takes a very long time.
 45 | Untagged_Transactions_df <- rxImport(RxTextData(file = Untagged_Transactions_str, fileSystem = RxHdfsFileSystem()), stringsAsFactors = F)
 46 | Account_Info_df <- rxImport(RxTextData(file = Account_Info_str, fileSystem = RxHdfsFileSystem()), stringsAsFactors = F)
 47 | 
 48 | 
 49 | ##############################################################################################################################
 50 | ## Define main function
 51 | ##############################################################################################################################
 52 | 
 53 | ## If Untagged_Transactions and Account_Info are data frames, the web scoring is done in_memory. 
 54 | ## Use paths to csv files on HDFS for large data sets that do not fit in-memory. 
 55 | 
 56 | fraud_batch_scoring <- function(Untagged_Transactions, 
 57 |                                 Account_Info, 
 58 |                                 LocalWorkDir,
 59 |                                 HDFSWorkDir,
 60 |                                 Stage = "Prod")
 61 | {
 62 |   
 63 |   # Directory that holds the tables and model from the Development stage.
 64 |   LocalModelsDir <- file.path(LocalWorkDir, "model")
 65 |   
 66 |   if((class(Untagged_Transactions) == "data.frame") & (class(Account_Info) == "data.frame")){ # In-memory scoring. 
 67 |     source("./in_memory_scoring.R")
 68 |     print("Scoring in-memory...")
 69 |     return(in_memory_scoring(Untagged_Transactions, Account_Info, Stage = Stage))
 70 |     
 71 |   } else{ # Using Spark for scoring. 
 72 |     
 73 |     rxSparkConnect(consoleOutput = TRUE, reset = TRUE)
 74 |     
 75 |     # step0: intermediate directories creation.
 76 |     print("Creating Intermediate Directories on Local and HDFS...")
 77 |     source("./step0_directories_creation.R")
 78 |     
 79 |     # step1: merging the raw data. 
 80 |     source("./step1_merge_account_info.R")
 81 |     print("Step 1: Production data merging.")
 82 |     
 83 |     merge_account_info(Untagged_Transactions = Untagged_Transactions,
 84 |                        Account_Info = Account_Info,
 85 |                        HDFSWorkDir = HDFSWorkDir,
 86 |                        Stage = Stage)
 87 |     
 88 |     # step2: additional preprocessing. 
 89 |     source("./step4_preprocessing.R")
 90 |     print("Step 2: Additional preprocessing of the production data.")
 91 |     
 92 |     preprocess(HDFSWorkDir = HDFSWorkDir,
 93 |                HiveTable = "TaggedProd")
 94 |     
 95 |     
 96 |     # step3: feature engineering
 97 |     source("./step6_feature_engineering.R")
 98 |     print("Step 3: Feature Engineering.")
 99 |     
100 |     feature_engineering(LocalWorkDir = LocalWorkDir,
101 |                         HDFSWorkDir = HDFSWorkDir,
102 |                         HiveTable = "TaggedProdProcessed",
103 |                         Stage = Stage)
104 |     
105 |     # step4: making predictions. 
106 |     source("./step8_prediction.R")
107 |     print("Step 4: Making Predictions.")
108 |     
109 |     prediction(HDFSWorkDir = HDFSWorkDir,
110 |                LocalWorkDir = LocalWorkDir,
111 |                Input_Data_Xdf = "TaggedProdProcessedFeatures",
112 |                Stage = Stage)
113 |     
114 |     # Return the directory storing the final scores. 
115 |     return(file.path(HDFSWorkDir,"temp", "PredictScore"))
116 |     
117 |   }
118 | }
119 | 
120 | ##############################################################################################################################
121 | ## Apply the main function
122 | ##############################################################################################################################
123 | 
124 | # Case 1: Input are data frames. Scoring is performed in-memory. 
125 | Scores <-  fraud_batch_scoring(Untagged_Transactions = Untagged_Transactions_df, 
126 |                                Account_Info = Account_Info_df, 
127 |                                LocalWorkDir = LocalWorkDir,
128 |                                HDFSWorkDir = HDFSWorkDir, 
129 |                                Stage = "Prod")
130 | 
131 | # Case 2: Input are paths to csv files. Scoring using Spark. 
132 | ## This alternative is slow and should only be used if the data set to score is too large to fit in memory.
133 | #scores_directory <- fraud_batch_scoring(Untagged_Transactions = Untagged_Transactions_str, 
134 | #                                        Account_Info = Account_Info_str, 
135 | #                                        LocalWorkDir = LocalWorkDir,
136 | #                                        HDFSWorkDir = HDFSWorkDir, 
137 | #                                       Stage = "Prod")
138 | 
139 | # Warning: in case you get the following error: "Error: file.exists(inData1) is not TRUE", 
140 | # you should reset your R session with Ctrl + Shift + F10 (or Session -> Restart R) and try running it again.
141 | 


--------------------------------------------------------------------------------
/RSparkCluster/readme.md:
--------------------------------------------------------------------------------
 1 | <img src="../Resources/images/fraud.jpg" align="right" width="50%">
 2 | 
 3 | # Fraud Detection 
 4 | ## Implemented on SQL Server 2016 R Services and HDInsight Spark
 5 | 
 6 | For all documentation, visit the [Fraud Detection website](https://microsoft.github.io/r-server-fraud-detection/).
 7 | 
 8 | **NOTE:** Please don't use "Download ZIP" to get this repository, as it will change the line endings in the data files. Use "git clone" to get a local copy of this repository. 
 9 | 
10 | # Contributing
11 | 
12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
13 | 


--------------------------------------------------------------------------------
/RSparkCluster/step0_directories_creation.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will do the following:
 3 | ## 1. Create or clean up an intermediate directory, LocalIntermediateDir, on the edge node. 
 4 | ## 2. Create or clean up an intermediate directory, HDFSIntermediateDir, on HDFS. 
 5 | 
 6 | ##########################################################################################################################################
 7 | 
 8 | # Intermediate folders paths one on the edge node and one on HDFS.
 9 | LocalIntermediateDir <- file.path(LocalWorkDir, "temp")
10 | HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp")
11 | 
12 | # Clean up the folders if they already exist and create them otherwise.
13 | if(dir.exists(LocalIntermediateDir)){
14 |   system(paste("rm -rf ",LocalIntermediateDir,"/*", sep="")) # clean up the directory if exists
15 | } else {
16 |   dir.create(LocalIntermediateDir, recursive = TRUE) # make new directory if doesn't exist
17 | }
18 | 
19 | if(rxHadoopFileExists(HDFSIntermediateDir)){
20 |   rxHadoopRemoveDir(HDFSIntermediateDir, skipTrash = TRUE)
21 |   rxHadoopMakeDir(HDFSIntermediateDir)
22 | } else {
23 |   rxHadoopMakeDir(HDFSIntermediateDir)
24 | }
25 | 
26 | # Grant access authority for the edge node intermediate folder.
27 | system(paste("chmod g+s ", LocalIntermediateDir, sep=""))
28 | system(paste("setfacl -d -m g::rwx ", LocalIntermediateDir, sep=""))
29 | system(paste("setfacl -d -m o::rwx ", LocalIntermediateDir, sep=""))


--------------------------------------------------------------------------------
/RSparkCluster/step1_merge_account_info.R:
--------------------------------------------------------------------------------
  1 | ##########################################################################################################################################
  2 | ## This R script will do the following:
  3 | ## 1. Convert the UntaggedTransaction and AccountInfo data sets to hive tables.
  4 | ## 2. Create the transactionDateTime variable based on transactionDate and transactionTime.
  5 | ## 3. Merge the two tables Untagged_Transaction ad Account_Info.
  6 | ## 4. Remove duplicates.
  7 | 
  8 | ## Input : 1. 2 Data Tables: Untagged_Transactions, Account_Info.
  9 | ##         2. HDFSWorkDir: the working directory on HDFS.
 10 | ##         3. Stage: "Dev" for development, "Prod" for batch scoring, "Web" for web scoring. 
 11 | ## Output: Hive table: UntaggedTransactionsAccountUnique (Stage = "Dev") or TaggedProd (Stage = "Prod" or "Web").
 12 | 
 13 | ##########################################################################################################################################
 14 | 
 15 | merge_account_info <- function(Untagged_Transactions,
 16 |                                Account_Info,
 17 |                                HDFSWorkDir,
 18 |                                Stage)
 19 | {
 20 |  
 21 |   # For the Production or Web-Scoring stages, in order to avoid overwriting hive tables from the Development stage, 
 22 |   # we will add the suffix Prod to the table names. This is encoded in the variable hive_name that will be
 23 |   ## an empty string for Dev
 24 |   ## "Prod" for Prod or Web. 
 25 |   if(Stage == "Dev"){
 26 |     hive_name <- ""
 27 |   }else{
 28 |     hive_name <- "Prod"
 29 |   }
 30 |   
 31 |   # Define the intermediate directory that will hold the intermediate data.  
 32 |   HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp")
 33 |   
 34 |   
 35 |   ##############################################################################################################################
 36 |   ## The block below will convert the data format to Hive in order to increase the efficiency of rx functions. 
 37 |   ##############################################################################################################################
 38 |   
 39 |   print("Converting the input data to Hive on HDFS...")
 40 |   
 41 |   # Create Hive pointers for the 3 data sets on HDFS. 
 42 |   Untagged_Transactions_hive <- RxHiveData(table = sprintf("UntaggedTransactions%s", hive_name)) 
 43 |   Account_Info_hive <- RxHiveData(table = sprintf("AccountInfo%s", hive_name)) 
 44 |   
 45 |   # Check the input format. Return an error if it is not a path. 
 46 |   if((class(Untagged_Transactions) == "character") & (class(Account_Info) == "character")){
 47 |     
 48 |     # Text pointers to the inputs. 
 49 |     Untagged_Transactions_txt <- RxTextData(Untagged_Transactions, firstRowIsColNames = TRUE, fileSystem = RxHdfsFileSystem())
 50 |     Account_Info_txt <- RxTextData(Account_Info, firstRowIsColNames = TRUE, fileSystem = RxHdfsFileSystem()) 
 51 |     
 52 |     # Conversion to Hive tables. 
 53 |     ## At the same time, we create transactionDateTime and recordDateTime. This is done by:
 54 |     ## converting transactionTime into a 6 digit time.
 55 |     ## concatenating transactionDate and transactionTime.
 56 |     ## converting it to a DateTime "%Y%m%d %H%M%S" format. 
 57 |     rxDataStep(inData = Untagged_Transactions_txt, outFile = Untagged_Transactions_hive, overwrite = TRUE,
 58 |                transforms = list(
 59 |                  transactionDateTime = as.character(as.POSIXct(paste(transactionDate, sprintf("%06d", as.numeric(transactionTime)), sep=""), format = "%Y%m%d %H%M%S", tz = "GMT"))
 60 |                ))
 61 |     
 62 |     rxDataStep(inData = Account_Info_txt, outFile = Account_Info_hive, overwrite = TRUE,
 63 |                transforms = list(
 64 |                  recordDateTime = as.character(as.POSIXct(paste(transactionDate, sprintf("%06d", as.numeric(transactionTime)), sep=""), format = "%Y%m%d %H%M%S", tz = "GMT"))
 65 |                ))
 66 |     
 67 |   } else {
 68 |     stop("invalid input format")
 69 |   }
 70 |   
 71 |   #############################################################################################################################################
 72 |   ## The block below will merge the two tables Untagged_Transactions and Account_Info. 
 73 |   ############################################################################################################################################
 74 |   
 75 |   print("Merging the 2 tables Untagged_Transactions and Account_Info ...")
 76 |   
 77 |   # Inner join of the 2 tables Untagged_Transactions and Account_Info using HIVE command 
 78 |   Drop_Untagged_Transactions_Account_query <- sprintf("hive -e \"DROP TABLE IF EXISTS UntaggedTransactionsAccount%s\"", hive_name)
 79 |   Create_Untagged_Transactions_Account_query <-  sprintf("hive -e \"CREATE TABLE UntaggedTransactionsAccount%s AS 
 80 |   SELECT ut.*, latestRecord, ai.accountOwnerName, ai.accountAddress, ai.accountPostalCode, ai.accountCity, ai.accountState,
 81 |   ai.accountCountry, ai.accountOpenDate, ai.accountAge, ai.isUserRegistered, 
 82 |   ai.paymentInstrumentAgeInAccount, ai.numPaymentRejects1dPerUser
 83 |   FROM UntaggedTransactions%s ut
 84 |   full outer join (
 85 |   SELECT t1.accountID, max(t2.recordDateTime) as latestRecord, t1.transactionDateTime 
 86 |   FROM UntaggedTransactions%s t1 join AccountInfo%s t2 
 87 |   ON t2.accountID = t1.accountID 
 88 |   WHERE t2.recordDateTime <= t1.transactionDateTime
 89 |   GROUP BY t1.accountID, t1.transactionDateTime
 90 |   ) as lastTrans
 91 |   ON (ut.accountID = lastTrans.accountID and ut.transactionDateTime = lastTrans.transactionDateTime)
 92 |   JOIN AccountInfo%s ai
 93 |   ON ut.accountID = ai.accountID and latestRecord = ai.recordDateTime\"", hive_name, hive_name, hive_name, hive_name, hive_name)
 94 |   
 95 |   # drop UntaggedTransactionsAccount table if exists
 96 |   #cat(Drop_Untagged_Transactions_Account_query)
 97 |   system(Drop_Untagged_Transactions_Account_query)
 98 |   
 99 |   # create table UntaggedTransactionsAccount by merging Untagged_Transactions and Account_Info tables
100 |   #cat(Create_Untagged_Transactions_Account_query)
101 |   system(Create_Untagged_Transactions_Account_query)
102 |   
103 |   ############################################################################################################################################
104 |   ## The block below will remove duplicates from UntaggedTransactionsAccount
105 |   ############################################################################################################################################
106 |   
107 |   print("Removing duplicates ...")
108 |   
109 |   Drop_UntaggedTransactionsAccountUnique_query <-sprintf("
110 |   hive -e \"
111 |   DROP TABLE IF EXISTS UntaggedTransactionsAccountUnique%s\"
112 |   ", hive_name)
113 |   
114 |   Remove_UntaggedTransactionsAccount_Duplicates_query <- sprintf("
115 |   hive -e \"
116 |   CREATE TABLE UntaggedTransactionsAccountUnique%s AS
117 |   SELECT t.* FROM
118 |   (SELECT *, ROW_NUMBER() OVER (PARTITION BY transactionID, accountID, transactionDateTime, transactionAmount
119 |   ORDER BY transactionID ASC) RN 
120 |   FROM UntaggedTransactionsAccount%s) as t
121 |   WHERE t.RN = 1\"
122 |   ", hive_name, hive_name)
123 |   
124 |   system(Drop_UntaggedTransactionsAccountUnique_query)
125 |   system(Remove_UntaggedTransactionsAccount_Duplicates_query)
126 |   
127 |   #############################################################################################################################################
128 |   ## The block below will tag the UntaggedTransactionsAccount by creating a fake label for rxPredict to work correctly.
129 |   ## We also exclude transactions with a negative dollar amount or missing ID variables. This preprocessing step is done in the splitting
130 |   ## step for the Development stage. 
131 |   ############################################################################################################################################
132 |   
133 |   if(Stage == "Prod" | Stage == "Web"){
134 |     print("Adding a fake label and removing rows with missing ID variables or negative transaction amount...")
135 |     
136 |     Drop_Tagged_query <- "hive -e \"DROP TABLE IF EXISTS TaggedProd\""
137 |     Tagging_query <- "
138 |      hive -e \"create table TaggedProd as
139 |      select t.*, 1 as label
140 |      from  UntaggedTransactionsAccountUniqueProd as t
141 |      where accountID IS NOT NULL
142 |      and transactionID IS NOT NULL 
143 |      and transactionDateTime IS NOT NULL 
144 |      and transactionAmountUSD >= 0\"
145 |     "
146 |     #cat(Drop_Tagged_query)
147 |     system(Drop_Tagged_query)
148 |     
149 |     #cat(Tagging_query)
150 |     system(Tagging_query)
151 |   }
152 |   
153 |   print("Merging account info finished!")
154 | }


--------------------------------------------------------------------------------
/RSparkCluster/step2_tagging.R:
--------------------------------------------------------------------------------
  1 | ##########################################################################################################################################
  2 | ## This R script will do the following:
  3 | ## 1. Convert the fraud data set to a hive table.
  4 | ## 2. Create the transactionDateTime variable based on transactionDate and transactionTime for fraud table.
  5 | ## 3. Remove duplicates for fraud table.
  6 | ## 4. Merge the input table with fraud table and create the label at the same time.
  7 | 
  8 | ## Input : 1. Input_Hive_Table: name of the hive table from the merging step with the untagged transactions and account info. 
  9 | ##         2. Path to csv Fraud files with the raw data Fraud_Transactions.
 10 | ##         3. HDFSWorkDir:Working directory on HDFS.
 11 | ## Output: Tagged data.
 12 | 
 13 | ##########################################################################################################################################
 14 | 
 15 | 
 16 | tagging <- function(Input_Hive_Table,
 17 |                     Fraud_Transactions,
 18 |                     HDFSWorkDir)
 19 | {
 20 |   
 21 |   # Define the intermediate directory holding the input data.  
 22 |   HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp")
 23 |   
 24 |   
 25 |   ##############################################################################################################################
 26 |   ## The block below will convert the data format to Hive in order to increase the efficiency of rx functions. 
 27 |   ##############################################################################################################################
 28 |   
 29 |   print("Converting the fraud data to Hive on HDFS...")
 30 |   
 31 |   # Create Hive pointers for the 3 data sets on HDFS. 
 32 |   Fraud_Transactions_hive <- RxHiveData(table = "FraudTransactions") 
 33 |   
 34 |   # Check the input format. Return an error if it is not a path. 
 35 |   if(class(Fraud_Transactions) == "character"){
 36 |     
 37 |     # Text pointers to the inputs. 
 38 |     Fraud_Transactions_txt <- RxTextData(Fraud_Transactions, firstRowIsColNames = TRUE, fileSystem = RxHdfsFileSystem()) 
 39 |     
 40 |     # Conversion to Hive tables. 
 41 |     ## At the same time, we create transactionDateTime. This is done by:
 42 |     ## converting transactionTime into a 6 digit time.
 43 |     ## concatenating transactionDate and transactionTime.
 44 |     ## converting it to a DateTime "%Y%m%d %H%M%S" format. 
 45 |     rxDataStep(inData = Fraud_Transactions_txt, 
 46 |                outFile = Fraud_Transactions_hive,
 47 |                overwrite = TRUE, 
 48 |                transforms = list(
 49 |                  transactionDateTime = as.character(as.POSIXct(paste(transactionDate, sprintf("%06d", as.numeric(transactionTime)), sep=""), format = "%Y%m%d %H%M%S", tz = "GMT"))
 50 |                ))
 51 |     
 52 |   } else {
 53 |     stop("invalid input format")
 54 |   }
 55 |   
 56 |   
 57 |   ############################################################################################################################################
 58 |   ## The block below will remove duplicates from the FraudTransactions table.
 59 |   ############################################################################################################################################
 60 |   print("Removing duplicates in the Fraud table...")
 61 |   
 62 |   Drop_FraudTransactionsUnique_query <-"
 63 |   hive -e \"
 64 |   DROP TABLE IF EXISTS FraudTransactionsUnique\"
 65 |   "
 66 |   Remove_FraudTransactions_Duplicates_query <- "
 67 |   hive -e \"
 68 |   CREATE TABLE FraudTransactionsUnique AS
 69 |   SELECT t.* FROM
 70 |   (SELECT *, ROW_NUMBER() OVER (PARTITION BY transactionID, accountID, transactionDateTime, transactionAmount
 71 |   ORDER BY transactionID ASC) RN 
 72 |   FROM FraudTransactions) as t
 73 |   WHERE t.RN = 1\"
 74 |   "
 75 |   
 76 |   system(Drop_FraudTransactionsUnique_query)
 77 |   system(Remove_FraudTransactions_Duplicates_query)
 78 |   
 79 |   #############################################################################################################################################
 80 |   ## The block below will tag the Input_Hive_Table on account level.
 81 |   ## The tagging is completed by merging UntaggedTransactionsAccount table with FraudTransactions table.
 82 |   ## The tagging logic is: 
 83 |   #    if accountID can't be found in fraud dataset => tag as 0, non fraud
 84 |   #    if accountID found in fraud dataset but transactionDateTime is out of the fraud time range => tag as 2, pre-fraud
 85 |   #    if accountID found in fraud dataset and transactionDateTime is within the fraud time range => tag as 1, fraud
 86 |   ############################################################################################################################################
 87 |   print("Tagging on account level ...")
 88 |   
 89 |   Drop_Tagged_query <- "hive -e \"DROP TABLE IF EXISTS Tagged\""
 90 |   Tagging_query <- paste("
 91 |                          hive -e \"create table Tagged as
 92 |                          select t.*, 
 93 |                          case when sDT is not null and tDT >= sDT and tDT <= eDT then 1
 94 |                          when sDT is not null and tDT < sDT then 2 
 95 |                          when sDT is not null and tDT > eDT then 2
 96 |                          when sDT is null then 0 end as label
 97 |                          from 
 98 |                          (select t1.*, t1.transactionDateTime as tDT, t2.startDateNTime as sDT, t2.endDateNTime as eDT
 99 |                          from ", Input_Hive_Table," as t1
100 |                          left join
101 |                          (select accountID, min(transactionDateTime) as startDateNTime,  max(transactionDateTime) as endDateNTime
102 |                          from FraudTransactionsUnique 
103 |                          group by accountID) as t2
104 |                          on t1.accountID = t2.accountID) as t\"
105 |                          ")
106 |   #cat(Drop_Tagged_query)
107 |   system(Drop_Tagged_query)
108 |   
109 |   #cat(Tagging_query)
110 |   system(Tagging_query)
111 |   
112 |   print("Tagging finished!")
113 | }


--------------------------------------------------------------------------------
/RSparkCluster/step3_splitting.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will do the following :
 3 | ## 1. Hash the tagged data by accountID.
 4 | ## 2. Split the tagged data set into a Training and a Testing set. 
 5 | 
 6 | 
 7 | ## Input : Tagged data set.
 8 | ## Output: Training and Testing sets.
 9 | 
10 | ##########################################################################################################################################
11 | 
12 | ##############################################################################################################################
13 | ## The block below will hash accountID and split data into training and testing
14 | ##############################################################################################################################
15 | 
16 | ## Hash accountID
17 | print("Create HashID table by hash accountID...")
18 | 
19 | Drop_HashID_query <- "
20 | hive -e \"drop table if exists HashID\"
21 | "
22 | Hashing_query <-"
23 | hive -e \"create table HashID as
24 | select accountID, abs(hash(accountID)%100) as hashCode from Tagged\"
25 | "
26 | system(Drop_HashID_query)
27 | system(Hashing_query)
28 | 
29 | ## Split into training and testing
30 | print("Split Tagged data into training and testing based on hashCode...")
31 | 
32 | Drop_TaggedTraining_query <- "
33 | hive -e \"drop table if exists TaggedTraining\" 
34 | "
35 | Get_TaggedTraining_query <- "
36 | hive -e \"CREATE TABLE TaggedTraining AS
37 | SELECT label, accountID, transactionID, transactionDateTime, isProxyIP, paymentInstrumentType, cardType, paymentBillingAddress,
38 | paymentBillingPostalCode, paymentBillingCountryCode, paymentBillingName, accountAddress, accountPostalCode,  
39 | accountCountry, accountOwnerName, shippingAddress, transactionCurrencyCode,localHour, ipState, ipPostCode,
40 | ipCountryCode, browserLanguage, paymentBillingState, accountState, transactionAmountUSD, digitalItemCount, 
41 | physicalItemCount, accountAge, paymentInstrumentAgeInAccount, numPaymentRejects1dPerUser, isUserRegistered,
42 | transactionDate, transactionTime
43 | FROM Tagged 
44 | WHERE accountID IN (SELECT accountID from HashID WHERE hashCode <= 70)
45 | AND label != 2
46 | AND accountID IS NOT NULL
47 | AND transactionID IS NOT NULL 
48 | AND transactionDateTime IS NOT NULL 
49 | AND transactionAmountUSD >= 0\"
50 | "
51 | system(Drop_TaggedTraining_query)
52 | system(Get_TaggedTraining_query)
53 | 
54 | Drop_TaggedTesting_query <- "
55 | hive -e \"drop table if exists TaggedTesting\" 
56 | "
57 | Get_TaggedTesting_query <- "
58 | hive -e \"CREATE TABLE TaggedTesting AS
59 | SELECT label, accountID, transactionID, transactionDateTime, isProxyIP, paymentInstrumentType, cardType, paymentBillingAddress,
60 | paymentBillingPostalCode, paymentBillingCountryCode, paymentBillingName, accountAddress, accountPostalCode,  
61 | accountCountry, accountOwnerName, shippingAddress, transactionCurrencyCode,localHour, ipState, ipPostCode,
62 | ipCountryCode, browserLanguage, paymentBillingState, accountState, transactionAmountUSD, digitalItemCount, 
63 | physicalItemCount, accountAge, paymentInstrumentAgeInAccount, numPaymentRejects1dPerUser, isUserRegistered,
64 | transactionDate, transactionTime
65 | FROM Tagged 
66 | WHERE accountID IN (SELECT accountID from HashID WHERE hashCode > 70)
67 | AND label != 2
68 | AND accountID IS NOT NULL
69 | AND transactionID IS NOT NULL 
70 | AND transactionDateTime IS NOT NULL 
71 | AND transactionAmountUSD >= 0\"
72 | "
73 | system(Drop_TaggedTesting_query)
74 | system(Get_TaggedTesting_query)
75 | 
76 | print("Splitting finished!")
77 | 
78 | 


--------------------------------------------------------------------------------
/RSparkCluster/step4_preprocessing.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will perform preprocessing on an input data.
 3 | 
 4 | ## Input : 1. HDFSWorkDir:Working directory on HDFS.
 5 | ##         2. HiveTable: Input data name of Hive table to be preprocessed.
 6 | ## Output: Hive table with preprocessed data.
 7 | 
 8 | ##########################################################################################################################################
 9 | 
10 | preprocess <- function(HDFSWorkDir,
11 |                        HiveTable){
12 |   
13 |   # Define the intermediate directory holding the input data.  
14 |   HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp") 
15 |   
16 |   
17 |   # get variables with missing values 
18 |   print("getting variable names with missing values...")
19 |   
20 |   # Point to the input hive table, while converting the strings to factors for correct computations with rxSummary. 
21 |   factorRiskInfo <- mapply(function(names){list(type = "factor")}, 
22 |                            c("paymentinstrumenttype",
23 |                              "cardtype",
24 |                              "paymentbillingpostalcode",
25 |                              "paymentbillingcountrycode",
26 |                              "accountpostalcode",
27 |                              "accountcountry",
28 |                              "transactioncurrencycode",
29 |                              "ipstate",
30 |                              "ippostcode",
31 |                              "browserlanguage",
32 |                              "paymentbillingstate",
33 |                              "accountstate",
34 |                              "isuserregistered"
35 |                            ) , 
36 |                            SIMPLIFY = FALSE)
37 |   
38 |   Input_Table_hive <- RxHiveData(table = HiveTable)
39 |   Input_Table_hivefactors <- RxHiveData(table = HiveTable, colInfo = factorRiskInfo) 
40 |   
41 |   var <- rxGetVarNames(Input_Table_hive)
42 |   formula <- as.formula(paste("~", paste(var, collapse = "+")))
43 |   summary <- rxSummary(formula, Input_Table_hivefactors, byTerm = TRUE)
44 |   variables_NA <- summary$sDataFrame[summary$sDataFrame$MissingObs > 0, 1]
45 |   variables_NA <- variables_NA[!variables_NA %in% c("accountid", "transactionid", "transactiondatetime", "transactiondate", "transactiontime")]
46 |   
47 |   # If no missing values, we will only preprocess the data. Otherwise, we clean and preprocess. 
48 |   if(length(variables_NA) == 0){
49 |     print("No missing values: only preprocessing will be performed.")
50 |   } else{ 
51 |     print("Variables containing missing values are:")
52 |     print(variables_NA)
53 |   }
54 |   
55 |   preprocessing <- function(data) {
56 |     data <- data.frame(data, stringsAsFactors = FALSE)
57 |     
58 |     # Replace missing values with 0 except for localHour with -99. 
59 |     if(length(var_with_NA) > 0){
60 |       for(i in 1:length(var_with_NA)){
61 |         row_na <- which(is.na(data[, var_with_NA[i]])) 
62 |         if(var_with_NA[i] == c("localhour")){
63 |           data[row_na, var_with_NA[i]] <- "-99"
64 |         } else{
65 |           data[row_na, var_with_NA[i]] <- "0"
66 |         }
67 |       }
68 |     }
69 |     
70 |     # Fix some data entries in isUserRegistered, which should be binary.  
71 |     row_na <- which(data[, c("isuserregistered")] %in% as.character(seq(1, 9)))
72 |     data[row_na, c("isuserregistered")] <- "0"
73 |     
74 |     # Convert a few variables to numeric, replacing non-numeric entries with 0. a few other variables to fix some data entries.  
75 |     numeric_to_fix <- c("accountage", "paymentinstrumentageinaccount", "numpaymentrejects1dperuser", "transactionamountusd",
76 |                         "digitalitemcount", "physicalitemcount")
77 |     for(i in 1:length(numeric_to_fix)){
78 |       data[, numeric_to_fix[i]] <- as.numeric(data[, numeric_to_fix[i]])
79 |       row_na <- which(is.na(as.numeric(data[, numeric_to_fix[i]])))
80 |       data[row_na, numeric_to_fix[i]] <- 0
81 |     }
82 |     return(data)  
83 |   }
84 |   
85 |   # Output pointer. 
86 |   Output_Table_hive <- RxHiveData(table = paste(HiveTable,"Processed",sep=""))
87 |   
88 |   # set compute context to local
89 |   print("preprocessing...")
90 |   rxDataStep(inData = Input_Table_hive, 
91 |              outFile = Output_Table_hive, 
92 |              overwrite = TRUE, 
93 |              transformFunc = preprocessing,
94 |              transformObjects = list(var_with_NA = variables_NA)
95 |   )
96 |   
97 |   print("Preprocessing finished!")
98 | }


--------------------------------------------------------------------------------
/RSparkCluster/step5_create_risk_tables.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will create the risk tables for various character variables.  
 3 | 
 4 | ## Input: 1. LocalWorkDir and HDFSWorkDir: working directories on HDFS and local edge node.
 5 | ##        2. HiveTable: name of the Hive table containing the preprocessed training set to be used to create risk tables.
 6 | ##        3. smooth1 and smooth2: smoothing parameters used to compute the risk values. 
 7 | 
 8 | ## Output: Risk tables embedded in a list Risk_list, saved on the edge node. 
 9 | ##########################################################################################################################################
10 | 
11 | create_risk_tables <- function(LocalWorkDir,
12 |                                HDFSWorkDir,
13 |                                HiveTable,
14 |                                smooth1,
15 |                                smooth2){
16 |   
17 |   # Define the intermediate directory holding the input data.  
18 |   HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp")
19 |   
20 |   # Define the directory where Risk tables will be saved in the Development stage.
21 |   LocalModelsDir <- file.path(LocalWorkDir, "model")
22 |   
23 |   # Variables for which we create Risk Tables. 
24 |   risk_vars <- c("transactioncurrencycode", "localhour", "ipstate", "ippostcode","ipcountrycode", "browserlanguage",
25 |                  "accountpostalcode", "accountstate", "accountcountry", "paymentbillingpostalcode", "paymentbillingstate",
26 |                  "paymentbillingcountrycode")
27 |   
28 |   # Point to the input hive table, while converting the strings to factors for correct computations with rxSummary. 
29 |   factorRiskInfo <- mapply(function(names){list(type = "factor")}, risk_vars, SIMPLIFY = FALSE)
30 |   Tagged_Processed_hivefactors <- RxHiveData(table = HiveTable, colInfo = factorRiskInfo) 
31 |   
32 |   # Count the number of fraud and non-fraud observations for each level of the variables in risk_vars. 
33 |   ## This is done in the following way:
34 |   ## rxExecBy will split the Hive table according to the key argument (here label).
35 |   ## The.counts function is then executed on each of the 2 splits and it returns the counts for each level of the variables.
36 |   
37 |   .counts <- function(keys, data, risk_vars){
38 |     formula <- as.formula(paste("~", paste(risk_vars, collapse = "+")))
39 |     summary <- rxSummary(formula = formula, data = data, byTerm = TRUE)
40 |     Summary_Counts <- summary$categorical
41 |     return(Summary_Counts)  
42 |   }
43 |   
44 |   counts_by_label_list <- rxExecBy(inData = Tagged_Processed_hivefactors,
45 |                                    keys = c("label"),
46 |                                    func = .counts,
47 |                                    funcParams = list(risk_vars = risk_vars))
48 |   
49 |   # Get the 2 lists of count tables, one for each label. 
50 |   ## We use the $keys value to know which split corresponded to label = 0 and which one to label = 1.
51 |   fraud_key <- ifelse(unlist(counts_by_label_list[[1]]$keys) == 1, 1, 2)
52 |   non_fraud_key <- ifelse(fraud_key == 1, 2, 1) 
53 |   Fraud_Counts_list <- counts_by_label_list[[fraud_key]]$result
54 |   Non_Fraud_Counts_list <- counts_by_label_list[[non_fraud_key]]$result
55 |   
56 |   # Renaming column names accordingly to the label. 
57 |   names(Fraud_Counts_list) <- lapply(Fraud_Counts_list, FUN = function(x){colnames(x)[1]})
58 |   names(Non_Fraud_Counts_list) <- lapply(Non_Fraud_Counts_list, FUN = function(x){colnames(x)[1]})
59 |   Fraud_Counts_list <- lapply(Fraud_Counts_list, FUN = function(df){setNames(df, c(colnames(df)[1],"fraudCount"))})
60 |   Non_Fraud_Counts_list <- lapply(Non_Fraud_Counts_list, FUN = function(df){setNames(df, c(colnames(df)[1],"nonFraudCount"))})
61 |   
62 |   # Merging the results into 1 list of data frames. 
63 |   Counts_list <- mapply(FUN = function(df1, df2){merge(df1, df2, all = TRUE)}, Fraud_Counts_list, Non_Fraud_Counts_list, SIMPLIFY = FALSE)
64 |   
65 |   # Replace NA with 0 (case when a level was not present for one of the labels).
66 |   Counts_list <- lapply(Counts_list, FUN = function(df){df[is.na(df)] <- 0; return(df)})
67 |   
68 |   # Create the risk tables.
69 |   ## Function for 1 data frame in the Counts_list. 
70 |   compute_risk_values <- function(df){
71 |     # Compute the smoothed odds for every level of the variable. 
72 |     df$Odds <- (df$fraudCount + smooth1)/(df$nonFraudCount + df$fraudCount + smooth2)
73 |     # Compute the log of the smoothed odds ratio. This is the risk value.
74 |     df$Risk <- log(df$Odds/(1-df$Odds))
75 |     return(df[, c(1,5)])
76 |   }
77 |   
78 |   ## Apply compute_risk_values to every table of the Counts_list.
79 |   Risk_list <- lapply(Counts_list, FUN = compute_risk_values)
80 |   
81 |   # Save it to the LocalModelsDir for future use. 
82 |   saveRDS(Risk_list, file.path(LocalModelsDir, "Risk_list.rds"))
83 |   
84 |   print("Creating the Risk Tables finished!")
85 |   print(sprintf("Risk tables created and saved on the edge node at %s", file.path(LocalModelsDir, "Risk_list.rds")))
86 |   
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/RSparkCluster/step7_training.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will train a gradient boosted trees (GBT) model on input data.
 3 | 
 4 | ## Input : 1. LocalWorkDir and HDFSWorkDir: working directories on HDFS and local edge node.  
 5 | ##         2. Input_Data_Xdf: training data.
 6 | 
 7 | ## Output: Trained random forest model object.
 8 | 
 9 | ##########################################################################################################################################
10 | 
11 | training <- function(HDFSWorkDir,
12 |                      LocalWorkDir,
13 |                      Input_Data_Xdf)
14 | {
15 |   
16 |   # Load MicrosoftML library for rxFastTrees. 
17 |   library("MicrosoftML")
18 |   
19 |   # Define the intermediate directory holding the input data.  
20 |   HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp")
21 |   
22 |   # Define the directory where Risk tables will be loaded from. 
23 |   LocalModelsDir <- file.path(LocalWorkDir, "model")
24 |   
25 |   Tagged_Training_Processed_Features_Xdf <- RxXdfData(file.path(HDFSIntermediateDir, Input_Data_Xdf), fileSystem = RxHdfsFileSystem())
26 |   
27 |   # Make equations
28 |   print("Making equations for training ...")
29 |   variables_all <- rxGetVarNames(Tagged_Training_Processed_Features_Xdf)
30 |   variables_to_remove <- c("label", "accountid", "transactionid", "transactiondatetime", "transactiondate","transactiontime",
31 |                            "transactioncurrencycode", "localhour", "ipstate", "ippostcode","ipcountrycode", "browserlanguage",
32 |                            "accountpostalcode", "accountstate", "accountcountry", "paymentbillingpostalcode", "paymentbillingstate",
33 |                            "paymentbillingcountrycode","paymentbillingaddress", "paymentbillingname", "accountaddress", "accountownername", "shippingaddress")
34 |  
35 |   training_variables <- variables_all[!(variables_all %in% variables_to_remove)]
36 |   equation <- paste("label ~ ", paste(training_variables, collapse = "+", sep=""), sep="")
37 |   
38 |   # Train the GBT model.
39 |   print("Training random forest model...")
40 |   #rxSetComputeContext('local')
41 |   #boosted_fit <- rxFastTrees(formula = as.formula(equation),
42 |   #                            data = Tagged_Training_Processed_Features_Xdf,
43 |   #                           type = c("binary"),
44 |   #                           numTrees = 100,
45 |   #                           learningRate = 0.2,
46 |   #                           splitFraction = 5/24,
47 |   #                            featureFraction = 1,
48 |   #                           minSplit = 10,
49 |   #                           unbalancedSets = TRUE,
50 |   #                           randomSeed = 5)
51 |   
52 |   boosted_fit <- rxDForest(formula = as.formula(equation),
53 |                             data = Tagged_Training_Processed_Features_Xdf,
54 |                             nTree = 2, 
55 |                             timesToRun = 20,
56 |                             seed = 5,
57 |                             method = "class",
58 |                             scheduleOnce = TRUE, 
59 |                             computeOobError=-1 )
60 |   
61 |   # Save the fitted model to the local edge node 
62 |   saveRDS(boosted_fit, file = paste(LocalModelsDir, "/gbt_model.rds", sep = ""))
63 |   print("Training finished!")
64 |   print(paste("Model is saved on the edge node under ", LocalModelsDir, sep=""))
65 | }


--------------------------------------------------------------------------------
/RSparkCluster/step8_prediction.R:
--------------------------------------------------------------------------------
 1 | ##########################################################################################################################################
 2 | ## This R script will do batch scoring and evaluation
 3 | 
 4 | ## Input: 1. LocalWorkDir and HDFSWorkDir: working directories on HDFS and local edge node.
 5 | ##        2. Input_Data_Xdf: input data name of xdf file to be scored.
 6 | ##        3. Stage: "Dev" for development, "Prod" for batch scoring, "Web" for web scoring. 
 7 | ## Output: Scored data set.
 8 | 
 9 | ##########################################################################################################################################
10 | 
11 | prediction <- function(HDFSWorkDir,
12 |                        LocalWorkDir,
13 |                        Input_Data_Xdf,
14 |                        Stage)
15 | {
16 |   
17 |   # Load the Microsoft ML library for rxPredict on the GBT model. 
18 |   library("MicrosoftML")
19 |   
20 |   # Define the intermediate directory holding the input data.  
21 |   HDFSIntermediateDir <- file.path(HDFSWorkDir,"temp")
22 |   
23 |   # Get the GBT model. 
24 |   if(Stage == "Dev" | Stage == "Prod"){
25 |     # Define the directory where the model will be loaded from. 
26 |     LocalModelsDir <- file.path(LocalWorkDir, "model")
27 |     
28 |     # Import the model from LocalModelsDir
29 |     boosted_fit <- readRDS(file.path(LocalModelsDir,"gbt_model.rds"))
30 |     
31 |   }else{
32 |     boosted_fit <- model_objects$boosted_fit
33 |   }
34 |   
35 |   print("Scoring the Random Forest...")
36 |   
37 |   # Pointer to the Xdf data to be scored
38 |   Score_Data_Xdf <- RxXdfData(file.path(HDFSIntermediateDir,Input_Data_Xdf), fileSystem = RxHdfsFileSystem())
39 |   
40 |   # Pointer to the Xdf data of output 
41 |   Predict_Score_Xdf <- RxXdfData(file.path(HDFSIntermediateDir,"PredictScore"), fileSystem = RxHdfsFileSystem())
42 |   
43 |   # Make predictions. 
44 |   rxPredict(modelObject = boosted_fit,
45 |             data = Score_Data_Xdf,
46 |             type = c("prob"),
47 |             outData = Predict_Score_Xdf,
48 |             overwrite = TRUE,
49 |             extraVarsToWrite = c("accountid", "transactionid", "transactiondate","transactiontime", "transactionamountusd", "label"))
50 |   
51 |   if(Stage == "Dev"){
52 |     # Save the Predictions data as a Hive table to be used in PowerBI for visualizations (only used in the Dev Stage). 
53 |     Predict_Score_hive <- RxHiveData(table = "PredictScore") 
54 |     rxDataStep(inData = Predict_Score_Xdf, outFile = Predict_Score_hive, overwrite = TRUE)
55 |   }
56 |   
57 |   print("Scoring Finished!")
58 | }


--------------------------------------------------------------------------------
/Resources/ActionScripts/ConfigureSQL.ps1:
--------------------------------------------------------------------------------
  1 | [CmdletBinding()]
  2 | param(
  3 | [parameter(Mandatory=$true, Position=1)]
  4 | [string]$serverName,
  5 | 
  6 | [parameter(Mandatory=$true, Position=2)]
  7 | [string]$SolutionName,
  8 | 
  9 | [parameter(Mandatory=$true, Position=3)]
 10 | [string]$InstallPy,
 11 | 
 12 | [parameter(Mandatory=$true, Position=4)]
 13 | [string]$InstallR
 14 | )
 15 | 
 16 | 
 17 | 
 18 | $db = $dbName
 19 | 
 20 | $dataList = ("Account_Info", "Fraud_Transactions", "Untagged_Transactions")
 21 | 
 22 | ##########################################################################
 23 | 
 24 | # Create Database and BaseTables
 25 | 
 26 | #########################################################################
 27 | 
 28 | ####################################################################
 29 | # Check to see If SQL Version is at least SQL 2017 and Not SQL Express 
 30 | ####################################################################
 31 | 
 32 | 
 33 | $query = 
 34 | "select 
 35 |         case 
 36 |             when 
 37 |                 cast(left(cast(serverproperty('productversion') as varchar), 4) as numeric(4,2)) >= 14 
 38 |                 and CAST(SERVERPROPERTY ('edition') as varchar) Not like 'Express%' 
 39 |             then 'Yes'
 40 |         else 'No' end as 'isSQL17'"
 41 | 
 42 | $isCompatible = Invoke-Sqlcmd -ServerInstance $ServerName -Database Master -Query $query
 43 | $isCompatible = $isCompatible.Item(0)
 44 | if ($isCompatible -eq 'Yes' -and $InstallPy -eq 'Yes') {
 45 |     Write-Host 
 46 |     ("This Version of SQL is Compatible with SQL Py")
 47 | 
 48 |     ## Create Py Database
 49 |     Write-Host 
 50 |     ("Creating SQL Database for Py")
 51 | 
 52 |     Write-Host 
 53 |     ("Using $ServerName SQL Instance") 
 54 | 
 55 |     ## Create PY Server DB
 56 |     $dbName = $db + "_Py"
 57 |     $SqlParameters = @("dbName=$dbName")
 58 | 
 59 |     $CreateSQLDB = "$ScriptPath\CreateDatabase.sql"
 60 | 
 61 |     $CreateSQLObjects = "$ScriptPath\CreateSQLObjectsPy.sql"
 62 |     Write-Host 
 63 |     ("Calling Script to create the  $dbName database") 
 64 | 
 65 |     invoke-sqlcmd -inputfile $CreateSQLDB -serverinstance $ServerName -database master -Variable $SqlParameters
 66 | 
 67 | 
 68 |     Write-Host 
 69 |     ("SQLServerDB $dbName Created")
 70 | 
 71 |     invoke-sqlcmd "USE $dbName;" 
 72 | 
 73 |     Write-Host 
 74 |     ("Calling Script to create the objects in the $dbName database")
 75 |     
 76 |     invoke-sqlcmd -inputfile $CreateSQLObjects -serverinstance $ServerName -database $dbName
 77 | 
 78 |     Write-Host 
 79 |     ("SQLServerObjects Created in $dbName Database")
 80 | 
 81 |     $OdbcName = "obdc" + $dbname
 82 |  ## Create ODBC Connection for PowerBI to Use 
 83 |     Add-OdbcDsn -Name $OdbcName -DriverName "ODBC Driver 13 for SQL Server" -DsnType 'System' -Platform '64-bit' -SetPropertyValue @("Server=$ServerName", "Trusted_Connection=Yes", "Database=$dbName") -ErrorAction SilentlyContinue -PassThru
 84 | 
 85 | }
 86 | else 
 87 | {
 88 |     if ($isCompatible -eq 'Yes' -and $InstallPy -eq 'Yes') 
 89 |     {
 90 |     Write-Host 
 91 |     ("This Version of SQL is not compatible with Py , Py Code and DB's will not be Created")
 92 |     }
 93 |     else 
 94 |     {
 95 |     Write-Host 
 96 |     ("There is not a py version of this solution")
 97 |     }
 98 | }
 99 | 
100 |  
101 | 
102 | 
103 | If ($InstallR -eq 'Yes')
104 | { 
105 |     Write-Host 
106 |     ("Creating SQL Database for R")
107 |     
108 | 
109 |     $dbName = $db + "_R"
110 | 
111 | ## Create RServer DB 
112 |     $SqlParameters = @("dbName=$dbName")
113 | 
114 |     $CreateSQLDB = "$ScriptPath\CreateDatabase.sql"
115 | 
116 |     $CreateSQLObjects = "$ScriptPath\CreateSQLObjectsR.sql"
117 |     Write-Host 
118 |     ("Calling Script to create the  $dbName database") 
119 |     invoke-sqlcmd -inputfile $CreateSQLDB -serverinstance $ServerName -database master -Variable $SqlParameters
120 | 
121 | 
122 |     Write-Host 
123 |     ("SQLServerDB $dbName Created")
124 |     invoke-sqlcmd "USE $dbName;" 
125 | 
126 |     Write-Host 
127 |     ("Calling Script to create the objects in the $dbName database")
128 |     invoke-sqlcmd -inputfile $CreateSQLObjects -serverinstance $ServerName -database $dbName
129 | 
130 | 
131 |     Write-Host 
132 |     ("SQLServerObjects Created in $dbName Database")
133 | 
134 | 
135 | ###Configure Database for R 
136 |     Write-Host 
137 |     ("Configuring $SolutionName Solution for R")
138 | 
139 | $dbName = $db + "_R" 
140 | 
141 | ## Create ODBC Connection for PowerBI to Use 
142 |     $OdbcName = "obdc" + $dbname
143 | ## Create ODBC Connection for PowerBI to Use 
144 |     Add-OdbcDsn -Name $OdbcName -DriverName "ODBC Driver 13 for SQL Server" -DsnType 'System' -Platform '64-bit' -SetPropertyValue @("Server=$ServerName", "Trusted_Connection=Yes", "Database=$dbName") -ErrorAction SilentlyContinue -PassThru
145 | 
146 | 
147 | ##########################################################################
148 | # Deployment Pipeline
149 | ##########################################################################
150 | 
151 | $RStart = Get-Date
152 | try
153 | {
154 |     Write-Host 
155 |     ("Import CSV File(s). This Should take about 30 Seconds Per File")
156 |   
157 |     # upload csv files into SQL tables
158 |     foreach ($dataFile in $dataList) 
159 |     {
160 |         $destination = $SolutionData + $dataFile + ".csv" 
161 |         $tableName = $DBName + ".dbo." + $dataFile
162 |         $tableSchema = $dataPath + "\" + $dataFile + ".xml"
163 |         $dataSet = Import-Csv $destination
164 |         Write-Host 
165 |         ("Loading $dataFile.csv into SQL Table") 
166 |         Write-SqlTableData -InputData $dataSet  -DatabaseName $dbName -Force -Passthru -SchemaName dbo -ServerInstance $ServerName -TableName $dataFile
167 |         Write-Host 
168 |         ("$datafile table loaded from CSV File(s).")
169 |     }
170 | }
171 | catch
172 |     {
173 |     Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
174 |     Write-Host -ForegroundColor Red $Error[0].Exception 
175 | throw
176 | }
177 |     Write-Host 
178 |     ("Finished loading .csv File(s).")
179 | 
180 |     Write-Host 
181 |     ("Training Model and Scoring Data...")
182 | 
183 |     $query = "EXEC Initial_Run_Once_R"
184 |     SqlServer\Invoke-Sqlcmd -ServerInstance LocalHost -Database $dbName -Query $query -ConnectionTimeout  0 -QueryTimeout 0
185 | 
186 |     $Rend = Get-Date
187 | 
188 |     $Duration = New-TimeSpan -Start $RStart -End $Rend 
189 |     Write-Host 
190 |     ("R Server Configured in $Duration")
191 | }
192 | ELSE 
193 | {
194 | Write-Host 
195 | ("There is not a R Version for this Solution so R will not be Installed")
196 | }
197 | 
198 | 
199 | ###Conifgure Database for Py 
200 | if ($isCompatible -eq 'Yes'-and $InstallPy -eq 'Yes')
201 | 
202 | {
203 |     {
204 |     $PyStart = get-date
205 |     Write-Host 
206 |     ("Configuring $SolutionName Solution for Py")
207 |     $dbname = $db + "_Py"
208 | 
209 | ##########################################################################
210 | # Deployment Pipeline Py
211 | ##########################################################################
212 | 
213 | 
214 |     try
215 |         {
216 |         Write-Host ("Import CSV File(s). This Should take about 30 Seconds Per File")
217 | 
218 |         # upload csv files into SQL tables
219 |         foreach ($dataFile in $dataList) 
220 |             {
221 |             $destination = $SolutionData + $dataFile + ".csv" 
222 |             $tableName = $DBName + ".dbo." + $dataFile
223 |             $tableSchema = $dataPath + "\" + $dataFile + ".xml"
224 |             $dataSet = Import-Csv $destination
225 |             Write-Host 
226 |             ("Loading $dataFile.csv into SQL Table") 
227 |             Write-SqlTableData -InputData $dataSet  -DatabaseName $dbName -Force -Passthru -SchemaName dbo -ServerInstance $ServerName -TableName $dataFile   
228 |             Write-Host 
229 |             ("$datafile table loaded from CSV File(s).")
230 |             }
231 |         }
232 |     catch 
233 |         {
234 |         Write-Host -ForegroundColor DarkYellow "Exception in populating database tables:"
235 |         Write-Host -ForegroundColor Red $Error[0].Exception 
236 |         throw
237 |         }
238 |     Write-Host 
239 |     ("Finished loading .csv File(s).")
240 |     }
241 |     
242 |     Write-Host 
243 |     ("Training Model and Scoring Data...")
244 |     $query = "EXEC Inital_Run_Once_Py"
245 |     SqlServer\Invoke-Sqlcmd -ServerInstance LocalHost -Database $dbName -Query $query -ConnectionTimeout  0 -QueryTimeout 0
246 | 
247 |     $Pyend = Get-Date
248 | 
249 |     $Duration = New-TimeSpan -Start $PyStart -End $Pyend 
250 |     Write-Host 
251 |     ("Py Server Configured in $Duration")
252 | }
253 | 
254 | 


--------------------------------------------------------------------------------
/Resources/ActionScripts/CreateDatabase.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | BEGIN
  6 | 	DECLARE  
  7 | 		@DbName VARCHAR(400) = N'$(dbName)',
  8 | 		@ServerName varchar(100) = (SELECT CAST(SERVERPROPERTY('ServerName') as Varchar)),
  9 | 		@InstanceName varchar(100) = (SELECT CAST(SERVERPROPERTY('InstanceName') as Varchar)),
 10 | 		@UI varchar(100),
 11 | 		@Qry VARCHAR(MAX) 
 12 | 
 13 | 		
 14 | 		----Create Needed SQLRUsergroup Name , 
 15 | 		----if Default Instance UI = {ServerName}\SQLRUserGroup 
 16 | 		----if Named Instance {ServerName}\SQLRUserGroup{InstanceName} 
 17 | 		
 18 | 		If @InstanceName is null 
 19 | 			BEGIN 
 20 | 				SET @UI = @ServerName + '\SQLRUserGroup' 
 21 | 			END 
 22 | 
 23 | 		If @InstanceName is Not null 
 24 | 			BEGIN 
 25 | 				SET @UI = @ServerName + '\SQLRUserGroup' + @InstanceName
 26 | 			END 
 27 | 
 28 | 
 29 | 
 30 | 	SET @Qry = 
 31 | 		(' 
 32 | 		EXEC msdb.dbo.sp_delete_database_backuphistory @database_name = N''<DBName>''
 33 | 		USE [master]
 34 | 		ALTER DATABASE <DBName> SET  SINGLE_USER WITH ROLLBACK IMMEDIATE
 35 | 		USE [master]
 36 | 		DROP DATABASE <DBName>
 37 | 		')
 38 | 
 39 | 
 40 | 	--If DB Already Exists , Drop it and recreate it 
 41 | 	IF EXISTS(select * from sys.databases where name = @DbName)
 42 | 	
 43 | 	BEGIN 
 44 | 		SET @Qry = (REPLACE(@Qry,'<dbName>',@DbName) )
 45 | 		EXEC (@Qry) 
 46 | 	END 
 47 | 
 48 | 	
 49 | 	DECLARE @Query VARCHAR(MAX)=''
 50 | ---Find Default Database File Path and Create DB there 
 51 | 	DECLARE @DbFilePath VARCHAR(400) = (SELECT top 1 LEFT(physical_name, (LEN(physical_name) - CHARINDEX('\',REVERSE(physical_name)))) + '\' as BasePath FROM sys.master_files WHERE type_desc = 'ROWS')
 52 | 
 53 | --Find Default Log File Path and Create Log there
 54 | 	DECLARE @LogFilePath VARCHAR(400) = (SELECT top 1 LEFT(physical_name, (LEN(physical_name) - CHARINDEX('\',REVERSE(physical_name)))) + '\' as BasePath FROM sys.master_files WHERE type_desc = 'LOG')
 55 | 
 56 | 
 57 | 	IF NOT EXISTS(select * from sys.databases where name = @DbName)
 58 | 	BEGIN
 59 | 		SET @Query = @Query + 'CREATE DATABASE '+@DbName +' ON  PRIMARY '
 60 | 		SET @Query = @Query + '( NAME = '''+@DbName +''', FILENAME = '''+@DbFilePath+@DbName +'.mdf'' , SIZE = 73728KB , MAXSIZE = UNLIMITED, FILEGROWTH = 1024KB ) '
 61 | 		SET @Query = @Query + ' LOG ON '
 62 | 		SET @Query = @Query + '( NAME = '''+@DbName +'_log'', FILENAME = '''+@LogFilePath+@DbName +'_log.ldf'' , SIZE = 1024KB , MAXSIZE = 2048GB , FILEGROWTH = 1024KB)'
 63 | 		exec(@query)
 64 | 	END
 65 | 
 66 | 	DECLARE @Alter VARCHAR(MAX) 
 67 | 	SET @Alter = 
 68 | 	(
 69 | 	'ALTER DATABASE <db> SET COMPATIBILITY_LEVEL = 130
 70 | 	IF (1 = FULLTEXTSERVICEPROPERTY(''IsFullTextInstalled''))
 71 | 	begin
 72 | 		EXEC <db>.[dbo].[sp_fulltext_database] @action = ''enable''
 73 | 	end
 74 | 	ALTER DATABASE <db> SET ANSI_NULL_DEFAULT OFF 
 75 | 	ALTER DATABASE <db> SET ANSI_NULLS OFF 
 76 | 	ALTER DATABASE <db> SET ANSI_PADDING OFF 
 77 | 	ALTER DATABASE <db> SET ANSI_WARNINGS OFF 
 78 | 	ALTER DATABASE <db> SET ARITHABORT OFF 
 79 | 	ALTER DATABASE <db> SET AUTO_CLOSE OFF 
 80 | 	ALTER DATABASE <db> SET AUTO_SHRINK OFF 
 81 | 	ALTER DATABASE <db> SET AUTO_UPDATE_STATISTICS ON 
 82 | 	ALTER DATABASE <db> SET CURSOR_CLOSE_ON_COMMIT OFF 
 83 | 	ALTER DATABASE <db> SET CURSOR_DEFAULT  GLOBAL 
 84 | 	ALTER DATABASE <db> SET CONCAT_NULL_YIELDS_NULL OFF 
 85 | 	ALTER DATABASE <db> SET NUMERIC_ROUNDABORT OFF 
 86 | 	ALTER DATABASE <db> SET QUOTED_IDENTIFIER OFF 
 87 | 	ALTER DATABASE <db> SET RECURSIVE_TRIGGERS OFF 
 88 | 	ALTER DATABASE <db> SET  ENABLE_BROKER 
 89 | 	ALTER DATABASE <db> SET AUTO_UPDATE_STATISTICS_ASYNC OFF 
 90 | 	ALTER DATABASE <db> SET DATE_CORRELATION_OPTIMIZATION OFF 
 91 | 	ALTER DATABASE <db> SET TRUSTWORTHY OFF 
 92 | 	ALTER DATABASE <db> SET ALLOW_SNAPSHOT_ISOLATION OFF 
 93 | 	ALTER DATABASE <db> SET PARAMETERIZATION SIMPLE 
 94 | 	ALTER DATABASE <db> SET READ_COMMITTED_SNAPSHOT OFF 
 95 | 	ALTER DATABASE <db> SET HONOR_BROKER_PRIORITY OFF 
 96 | 	ALTER DATABASE <db> SET RECOVERY FULL 
 97 | 	ALTER DATABASE <db> SET  MULTI_USER 
 98 | 	ALTER DATABASE <db> SET PAGE_VERIFY CHECKSUM  
 99 | 	ALTER DATABASE <db> SET DB_CHAINING OFF 
100 | 	ALTER DATABASE <db> SET FILESTREAM( NON_TRANSACTED_ACCESS = OFF ) 
101 | 	ALTER DATABASE <db> SET TARGET_RECOVERY_TIME = 60 SECONDS 
102 | 	ALTER DATABASE <db> SET DELAYED_DURABILITY = DISABLED 
103 | 	EXEC sys.sp_db_vardecimal_storage_format N''<db>'', N''ON''
104 | 	ALTER DATABASE <db> SET QUERY_STORE = OFF
105 | 	ALTER DATABASE <db> SET  READ_WRITE'
106 | 	)
107 | 	SET @Alter = (REPLACE(@Alter,'<db>',@DbName)) 
108 | 	EXEC (@Alter) 
109 | 
110 | 	----CREATE USER SQLRUserGroup on SQL Server
111 | 
112 | 	SET @Qry = 
113 | 	'
114 | 	IF NOT EXISTS (SELECT name FROM master.sys.server_principals where name = ''<ui>'')
115 | 	BEGIN CREATE LOGIN [<ui>] FROM WINDOWS WITH DEFAULT_DATABASE=[master], DEFAULT_LANGUAGE=[us_english] END
116 | 	'
117 | 	SET @Qry = REPLACE(@qry,'<ui>', @ui)
118 | 	
119 | 	EXEC (@Qry)
120 | 	--SELECT @Qry
121 | 
122 | 
123 | 	----Give SQLRUserGroup Rights To Database(s)
124 | 	SET @Qry = 
125 | 	'
126 | 	USE [<db>]
127 | 	CREATE USER [<ui>] FOR LOGIN [<ui>]
128 | 
129 | 	ALTER USER [<ui>] WITH DEFAULT_SCHEMA=NULL
130 | 
131 | 	ALTER AUTHORIZATION ON SCHEMA::[db_datareader] TO [<ui>]
132 | 
133 | 	ALTER AUTHORIZATION ON SCHEMA::[db_datawriter] TO [<ui>]
134 | 
135 | 	ALTER AUTHORIZATION ON SCHEMA::[db_ddladmin] TO [<ui>]
136 | 
137 | 	ALTER ROLE [db_datareader] ADD MEMBER [<ui>]
138 | 
139 | 	ALTER ROLE [db_datawriter] ADD MEMBER [<ui>]
140 | 
141 | 	ALTER ROLE [db_ddladmin] ADD MEMBER [<ui>]
142 | 	'
143 | 	SET @Qry = REPLACE(REPLACE(@qry,'<ui>', @ui),'<db>',@DbName) 
144 | 	
145 | 	EXEC (@Qry)
146 | 	--SELECT @Qry
147 | 
148 | END 
149 | 


--------------------------------------------------------------------------------
/Resources/ActionScripts/CreateSQLObjectsR.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Resources/ActionScripts/CreateSQLObjectsR.sql


--------------------------------------------------------------------------------
/Resources/ActionScripts/createShortcuts.ps1:
--------------------------------------------------------------------------------
 1 | <#
 2 | 
 3 | .SYNOPSIS
 4 |  Script to create help short cut and solution folder shortcut.
 5 | 
 6 |  .PARAMETER helpfile
 7 |  path to the help url file.
 8 |  
 9 | .PARAMETER solutionPath
10 | path to the solution folder with data and source.
11 | 
12 | #>
13 | [CmdletBinding()]
14 | param(
15 | [parameter(Mandatory=$true, Position=1, ParameterSetName = "LCR")]
16 | [ValidateNotNullOrEmpty()] 
17 | [string]$helpfile,
18 | 
19 | [parameter(Mandatory=$true, Position=2, ParameterSetName = "LCR")]
20 | [ValidateNotNullOrEmpty()] 
21 | [string]$solutionPath
22 | )
23 | 
24 | # find the desktop 
25 | $desktop = [Environment]::GetFolderPath("Desktop")
26 | 
27 | $desktop = $desktop + '\'
28 | 
29 | 
30 | #create the help link in startup program 
31 | 
32 | $startmenu = [Environment]::GetFolderPath("StartMenu")
33 | $startupfolder = $startmenu + '\Programs\Startup\'
34 | # We create this since the user startup folder is only created after first login 
35 | # Alternative is to add is to all user startup
36 | mkdir $startupfolder
37 | #copy 
38 | $down = $helpfile
39 | Write-Host $down
40 | Write-Host $startmenu
41 | ls $startmenu
42 | Write-Host $startupfolder
43 | ls $startupfolder
44 | cp -Verbose $down $startupfolder
45 | cp -Verbose $down $desktop
46 | 
47 | #create shortcut to solution folder on desktop
48 | $WsShell = New-Object -ComObject WScript.Shell
49 | $shortcut = $WsShell.CreateShortcut($desktop + "Fraud.lnk")
50 | $shortcut.TargetPath = $solutionPath
51 | $shortcut.Save()


--------------------------------------------------------------------------------
/Resources/ActionScripts/frauddetection_Help.url:
--------------------------------------------------------------------------------
1 | [InternetShortcut]
2 | URL=https://microsoft.github.io/r-server-fraud-detection/Typical.html
3 | IDList=
4 | HotKey=0
5 | [{000214A0-0000-0000-C000-000000000046}]
6 | Prop3=19,11
7 | 


--------------------------------------------------------------------------------
/Resources/ActionScripts/hdisetup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script used to setup an HDInsight Cluster deployed from Cortana Analytics Gallery
 4 | # WARNING: This script is only meant to be run from the solution template deployment process.
 5 | 
 6 | # put R code in users home directory
 7 | git clone  --branch master --single-branch  https://github.com/Microsoft/r-server-fraud-detection.git  fraud
 8 | cp fraud/RSparkCluster/* /home/$1
 9 | chmod 777 /home/$1/*.R
10 | rm -rf fraud
11 | sed -i "s/XXYOURPW/$2/g" /home/$1/*.R
12 | 
13 | # Configure edge node as one-box setup for R Server Operationalization
14 | /usr/local/bin/dotnet /usr/lib64/microsoft-r/rserver/o16n/9.1.0/Microsoft.RServer.Utils.AdminUtil/Microsoft.RServer.Utils.AdminUtil.dll -silentoneboxinstall "$2"
15 | 
16 | # turn off telemetry 
17 | sed -i 's/options(mds.telemetry=1)/options(mds.telemetry=0)/g' /usr/lib64/microsoft-r/3.3/lib64/R/etc/Rprofile.site
18 | sed -i 's/options(mds.logging=1)/options(mds.logging=0)/g' /usr/lib64/microsoft-r/3.3/lib64/R/etc/Rprofile.site
19 | 


--------------------------------------------------------------------------------
/Resources/exampleuser.sql:
--------------------------------------------------------------------------------
 1 | --
 2 | -- remove old rdemo user and login from master
 3 | --
 4 | USE [master]
 5 | GO
 6 | IF EXISTS (SELECT name  FROM sys.database_principals WHERE name = 'rdemo')
 7 | BEGIN
 8 | 	PRINT 'Deleting old rdemo user from master'
 9 |     DROP USER [rdemo]
10 | END
11 | GO
12 | IF EXISTS (SELECT name  FROM master.sys.server_principals WHERE name = 'rdemo')
13 | BEGIN
14 | 	PRINT 'Deleting old rdemo login from master'
15 | 	DROP LOGIN [rdemo]
16 | END
17 | GO
18 | --
19 | -- create new rdemo login in master
20 | --
21 | USE [master]
22 | GO
23 | PRINT 'Creating rdemo login in master'
24 | CREATE LOGIN [rdemo] WITH PASSWORD=N'D@tascience', CHECK_EXPIRATION=OFF, CHECK_POLICY=OFF;
25 | CREATE USER [rdemo] FOR LOGIN [rdemo] 
26 | --ALTER ROLE [db_rrerole] ADD MEMBER [rdemo]
27 | ALTER ROLE [db_owner] ADD MEMBER [rdemo]
28 | GO
29 | 
30 | exec sp_addrolemember 'db_owner', 'rdemo'
31 | exec sp_addrolemember 'db_ddladmin', 'rdemo'
32 | exec sp_addrolemember 'db_accessadmin', 'rdemo'
33 | exec sp_addrolemember 'db_datareader', 'rdemo'
34 | exec sp_addrolemember 'db_datawriter', 'rdemo'
35 | exec sp_addsrvrolemember @loginame= 'rdemo', @rolename = 'sysadmin'  
36 | GO 
37 | 


--------------------------------------------------------------------------------
/Resources/images/fraud.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Resources/images/fraud.jpg


--------------------------------------------------------------------------------
/Resources/readme.md:
--------------------------------------------------------------------------------
1 | Scripts in the Resources folder should only be run once through the template deployment process. They are not meant to be run by users as it assumes database and users don't already exist.
2 | 
3 | # Contributing
4 | 
5 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
6 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SQLR/CreateRiskTable.sql:
--------------------------------------------------------------------------------
 1 | /* 
 2 | This script will create stored procedure to create risk table for each input variable 
 3 | 
 4 | input parameters:
 5 | @name = the name of the variable to generate risk table for
 6 | @table_name = the name of the output risk table
 7 | */
 8 | 
 9 | set ansi_nulls on
10 | go
11 | 
12 | set quoted_identifier on
13 | go
14 | 
15 | DROP PROCEDURE IF EXISTS CreateRiskTable
16 | GO
17 | 
18 | create procedure CreateRiskTable 
19 | @name varchar(max),
20 | @table_name varchar(max)
21 | as
22 | begin
23 | declare @filltablesql nvarchar(max)
24 | declare @droptablesql nvarchar(max)
25 | declare @removenullconstrain nvarchar(max)
26 | declare @addprimarykey nvarchar(max)
27 | 
28 | /* drop corresponding table if it already exists */
29 | set @droptablesql = 'DROP TABLE IF EXISTS ' + @table_name
30 | exec sp_executesql @droptablesql
31 | 
32 | /* create risk table */
33 | set @filltablesql = 'select ' + @name + ' , log(odds/(1-odds)) as risk 
34 |             into .dbo.' + @table_name + 
35 | 			' from (select distinct ' + @name + ' ,cast((sum(label)+10) as float)/cast((sum(label)+sum(1-label)+100) as float) as odds 
36 | 			from Tagged_Training_Processed group by ' + @name + ' ) temp'
37 | 
38 | /* example: when @name=localHour, @table_name=Risk_LocalHour, @sql is the following:
39 | select localHour , log(odds/(1-odds)) as risk 
40 |             into Risk_LocalHour from (select distinct localHour ,cast((sum(label)+10) as float)/cast((sum(label)+sum(1-label)+100) as float) as odds 
41 | 			from Tagged_Training group by localHour ) temp
42 | */
43 | 
44 | exec sp_executesql @filltablesql
45 | end


--------------------------------------------------------------------------------
/SQLR/ParseString.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script creates the stored procedure to:
 3 | 1. ingest a string and store it into a temporary table
 4 | 2. parse the string and output the parsed string to a sql table 
 5 | */
 6 | 
 7 | set ansi_nulls on
 8 | go
 9 | 
10 | set quoted_identifier on
11 | go
12 | 
13 | DROP PROCEDURE IF EXISTS ParseStr
14 | GO
15 | 
16 | create procedure ParseStr @inputstring VARCHAR(MAX)
17 | as
18 | begin
19 | 
20 | /* Reformat the long string into XML format whose elements can be retrieved by location index */
21 | declare @parsequery nvarchar(max)
22 | set @parsequery = '
23 | DECLARE @tmp table ( ID int Identity(1,1)  ,[Name] nvarchar(max))
24 | INSERT into @tmp SELECT ''' + @inputstring + '''
25 | drop table if exists Parsed_String
26 | ;WITH tmp AS
27 | ( 
28 |     SELECT
29 |         CAST(''<M>'' + REPLACE([Name], '','' , ''</M><M>'') + ''</M>'' AS XML) 
30 |         AS [NameParsed]
31 |     FROM  @tmp 
32 | )
33 | SELECT
34 |      case when [NameParsed].value(''/M[1]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[1]'', ''varchar (100)'') end As [transactionID],
35 |      case when [NameParsed].value(''/M[2]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[2]'', ''varchar (100)'') end As [accountID],
36 |      case when [NameParsed].value(''/M[3]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[3]'', ''varchar (100)'') end As [transactionAmountUSD],
37 | 	 case when [NameParsed].value(''/M[4]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[4]'', ''varchar (100)'') end As transactionAmount,
38 |      case when [NameParsed].value(''/M[5]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[5]'', ''varchar (100)'') end As [transactionCurrencyCode],
39 |      case when [NameParsed].value(''/M[6]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[6]'', ''varchar (100)'') end As [transactionCurrencyConversionRate],
40 | 	 case when [NameParsed].value(''/M[7]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[7]'', ''varchar (100)'') end As [transactionDate],
41 |      case when [NameParsed].value(''/M[8]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[8]'', ''varchar (100)'') end As [transactionTime],
42 |      case when [NameParsed].value(''/M[9]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[9]'', ''varchar (100)'') end As [localHour],
43 | 	 case when [NameParsed].value(''/M[10]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[10]'', ''varchar (100)'') end As [transactionScenario],
44 |      case when [NameParsed].value(''/M[11]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[11]'', ''varchar (100)'') end As [transactionType],
45 |      case when [NameParsed].value(''/M[12]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[12]'', ''varchar (100)'') end As [transactionMethod],
46 | 	 case when [NameParsed].value(''/M[13]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[13]'', ''varchar (100)'') end As [transactionDeviceType],
47 |      case when [NameParsed].value(''/M[14]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[14]'', ''varchar (100)'') end As [transactionDeviceId],
48 |      case when [NameParsed].value(''/M[15]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[15]'', ''varchar (100)'') end As [transactionIPaddress],
49 | 	 case when [NameParsed].value(''/M[16]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[16]'', ''varchar (100)'') end As [ipState],     
50 | 	 case when [NameParsed].value(''/M[17]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[17]'', ''varchar (100)'') end As [ipPostcode],
51 |      case when [NameParsed].value(''/M[18]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[18]'', ''varchar (100)'') end As [ipCountryCode],
52 |      case when [NameParsed].value(''/M[19]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[19]'', ''varchar (100)'') end As [isProxyIP],
53 | 	 case when [NameParsed].value(''/M[20]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[20]'', ''varchar (100)'') end As [browserType],
54 |      case when [NameParsed].value(''/M[21]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[21]'', ''varchar (100)'') end As [browserLanguage],
55 |      case when [NameParsed].value(''/M[22]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[22]'', ''varchar (100)'') end As [paymentInstrumentType],
56 | 	 case when [NameParsed].value(''/M[23]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[23]'', ''varchar (100)'') end As [cardType],
57 | 	 case when [NameParsed].value(''/M[24]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[24]'', ''varchar (100)'') end As [cardNumberInputMethod],
58 |      case when [NameParsed].value(''/M[25]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[25]'', ''varchar (100)'') end As [paymentInstrumentID],
59 |      case when [NameParsed].value(''/M[26]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[26]'', ''varchar (100)'') end As [paymentBillingAddress],
60 | 	 case when [NameParsed].value(''/M[27]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[27]'', ''varchar (100)'') end As [paymentBillingPostalCode],
61 |      case when [NameParsed].value(''/M[28]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[28]'', ''varchar (100)'') end As [paymentBillingState],
62 |      case when [NameParsed].value(''/M[29]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[29]'', ''varchar (100)'') end As [paymentBillingCountryCode],
63 | 	 case when [NameParsed].value(''/M[30]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[30]'', ''varchar (100)'') end As [paymentBillingName],
64 |      case when [NameParsed].value(''/M[31]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[31]'', ''varchar (100)'') end As [shippingAddress],
65 |      case when [NameParsed].value(''/M[32]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[32]'', ''varchar (100)'') end As [shippingPostalCode],
66 | 	 case when [NameParsed].value(''/M[33]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[33]'', ''varchar (100)'') end As [shippingCity],
67 | 	 case when [NameParsed].value(''/M[34]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[34]'', ''varchar (100)'') end As [shippingState],
68 |      case when [NameParsed].value(''/M[35]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[35]'', ''varchar (100)'') end As [shippingCountry],
69 |      case when [NameParsed].value(''/M[36]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[36]'', ''varchar (100)'') end As [cvvVerifyResult],
70 | 	 case when [NameParsed].value(''/M[37]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[37]'', ''varchar (100)'') end As [responseCode],
71 |      case when [NameParsed].value(''/M[38]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[38]'', ''varchar (100)'') end As [digitalItemCount],
72 |      case when [NameParsed].value(''/M[39]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[39]'', ''varchar (100)'') end As [physicalItemCount],
73 | 	 case when [NameParsed].value(''/M[40]'', ''varchar (100)'')=''NULL'' then NULL else [NameParsed].value(''/M[40]'', ''varchar (100)'') end As [purchaseProductType]
74 | into Parsed_String  
75 | FROM tmp'
76 | exec sp_executesql @parsequery
77 | end


--------------------------------------------------------------------------------
/SQLR/ScoreOneTrans.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script creates the stored procedure to score one transaction by invoking the following store procedure:
 3 | 1. ParseStr: parse the input string and save to a sql table
 4 | 2. PredictR:  preprocess, feature engineer, and scoring the parsed transaction
 5 | */
 6 | 
 7 | set ansi_nulls on
 8 | go
 9 | 
10 | set quoted_identifier on
11 | go
12 | 
13 | DROP PROCEDURE IF EXISTS ScoreOneTrans
14 | GO
15 | 
16 | create procedure ScoreOneTrans @inputstring VARCHAR(MAX)
17 | as
18 | begin
19 | 
20 | /* invoke ParseStr */
21 | declare @invokeParseStr nvarchar(max)
22 | set @invokeParseStr ='
23 | exec ParseStr ''' + @inputstring + ''''
24 | exec sp_executesql @invokeParseStr
25 | 
26 | /* invoke PredictR */
27 | declare @invokePredictR nvarchar(max)
28 | set @invokePredictR ='
29 | exec PredictR ''Parsed_String'', ''Predict_Score_Single_Transaction'',''1''
30 | '
31 | exec sp_executesql @invokePredictR
32 | SELECT  [Probability.1]  FROM [Fraud].[dbo].[Predict_Score_Single_Transaction]
33 | 
34 | end 


--------------------------------------------------------------------------------
/SQLR/SortAcctTable.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create stored procedure to 
 3 | 1. create recordDateTime column for Account_Info table
 4 | 2. sort the table in account, recordDateTime with descent order
 5 | */
 6 | 
 7 | set ansi_nulls on
 8 | go
 9 | 
10 | set quoted_identifier on
11 | go
12 | 
13 | DROP PROCEDURE IF EXISTS sortAcctTable
14 | GO
15 | 
16 | create procedure sortAcctTable @table nvarchar(max)
17 | as
18 | begin
19 | 
20 | declare @dropTable nvarchar(max) 
21 | set @dropTable = '
22 | drop table if exists ' + @table + '_Sort'
23 | exec sp_executesql @dropTable
24 | 
25 | declare @sortAcctTableQuery nvarchar(max) 
26 | set @sortAcctTableQuery = '
27 | select *,
28 | convert(datetime,stuff(stuff(stuff(concat(transactionDate,dbo.FormatTime(transactionTime)), 9, 0, '' ''), 12, 0, '':''), 15, 0, '':'')) as recordDateTime
29 | into ' + @table + '_Sort from ' + @table + '
30 | order by accountID, recordDateTime desc
31 | '
32 | exec sp_executesql @sortAcctTableQuery
33 | end


--------------------------------------------------------------------------------
/SQLR/Step0_CreateTables.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | The script will create the following tables:
 3 | 1. table for untagged transactions
 4 | 2. table for account information
 5 | 3. table for fraud transactions
 6 | 4. table storing historical transactions which will be used for calculating aggregates
 7 | */
 8 | 
 9 | set ansi_nulls on
10 | go
11 | 
12 | set quoted_identifier on
13 | go
14 | 
15 | drop table if exists Untagged_Transactions
16 | create table Untagged_Transactions (
17 | transactionID varchar(255),
18 | accountID varchar(255),
19 | transactionAmountUSD varchar(255),
20 | transactionAmount varchar(255),
21 | transactionCurrencyCode varchar(255),
22 | transactionCurrencyConversionRate varchar(255),
23 | transactionDate varchar(255),
24 | transactionTime varchar(255),
25 | localHour varchar(255),
26 | transactionScenario varchar(255),
27 | transactionType varchar(255),
28 | transactionMethod varchar(255),
29 | transactionDeviceType varchar(255),
30 | transactionDeviceId varchar(255),
31 | transactionIPaddress varchar(255),
32 | ipState varchar(255),
33 | ipPostcode varchar(255),
34 | ipCountryCode varchar(255),
35 | isProxyIP varchar(255),
36 | browserType varchar(255),
37 | browserLanguage varchar(255),
38 | paymentInstrumentType varchar(255),
39 | cardType varchar(255),
40 | cardNumberInputMethod varchar(255),
41 | paymentInstrumentID varchar(255),
42 | paymentBillingAddress varchar(255),
43 | paymentBillingPostalCode varchar(255),
44 | paymentBillingState varchar(255),
45 | paymentBillingCountryCode varchar(255),
46 | paymentBillingName varchar(255),
47 | shippingAddress varchar(255),
48 | shippingPostalCode varchar(255),
49 | shippingCity varchar(255),
50 | shippingState varchar(255),
51 | shippingCountry varchar(255),
52 | cvvVerifyResult varchar(255),
53 | responseCode varchar(255),
54 | digitalItemCount varchar(255),
55 | physicalItemCount varchar(255),
56 | purchaseProductType varchar(255)
57 | );
58 | 
59 | drop table if exists Account_Info
60 | create table Account_Info (
61 | accountID varchar(255),
62 | transactionDate varchar(255),
63 | transactionTime varchar(255),  
64 | accountOwnerName varchar(255),
65 | accountAddress varchar(255),
66 | accountPostalCode varchar(255),
67 | accountCity varchar(255),
68 | accountState varchar(255),
69 | accountCountry varchar(255),
70 | accountOpenDate varchar(255),
71 | accountAge varchar(255),
72 | isUserRegistered varchar(255),
73 | paymentInstrumentAgeInAccount varchar(255),
74 | numPaymentRejects1dPerUser varchar(255)
75 | );
76 | 
77 | drop table if exists Fraud
78 | create table Fraud (
79 | transactionID varchar(255),
80 | accountID varchar(255),
81 | transactionAmount varchar(255),
82 | transactionCurrencyCode varchar(255),
83 | transactionDate varchar(255), 
84 | transactionTime varchar(255),
85 | localHour varchar(255),
86 | transactionDeviceId varchar(255),
87 | transactionIPaddress varchar(255)
88 | );
89 | 
90 | drop table if exists Transaction_History
91 | create table Transaction_History
92 | (
93 | accountID varchar(255),
94 | transactionID varchar(255),
95 | transactionDateTime datetime,
96 | transactionAmountUSD varchar(255)
97 | ); 
98 | 
99 | 


--------------------------------------------------------------------------------
/SQLR/Step10A_Evaluation.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | This script will create stored procedure to generate fraud account level metrics
  3 | 
  4 | parameters:
  5 | @table= the scored data to be evaluated
  6 | */
  7 | 
  8 | set ansi_nulls on
  9 | go
 10 | 
 11 | set quoted_identifier on
 12 | go
 13 | 
 14 | DROP PROCEDURE IF EXISTS dbo.EvaluateR
 15 | GO
 16 | 
 17 | create procedure dbo.EvaluateR @table nvarchar(max)
 18 | as
 19 | begin
 20 | 
 21 | /* create table to store the result */
 22 | if exists 
 23 | (select * from sysobjects where name like 'Performance') 
 24 | truncate table Performance
 25 | else
 26 | create table Performance ( 
 27 | ADR varchar(255),
 28 | PCT_NF_Acct varchar(255),
 29 | Dol_Frd varchar(255),
 30 | Do_NF varchar(255),
 31 | VDR varchar(255),
 32 | Acct_FP varchar(255),
 33 | PCT_Frd varchar(255),
 34 | PCT_NF varchar(255),
 35 | AFPR varchar(255),
 36 | TFPR varchar(255)
 37 | );
 38 | 
 39 | /* specify the query to select data to be evaluated. this query will be used as input for following R script */
 40 | declare @GetScoreData nvarchar(max) 
 41 | set @GetScoreData =  'select accountID, transactionDateTime, transactionAmountUSD, label, [Probability.1] from ' + @table + ' order by accountID, transactionDateTime'
 42 | 
 43 | /* R script to generate account level metrics */
 44 | insert into Performance
 45 | exec sp_execute_external_script @language = N'R',
 46 |                                   @script = N'
 47 | ####################################################################################################
 48 | ## Fraud account level metrics
 49 | ####################################################################################################
 50 | # Implement account-level performance metrics and transaction-level metrics.
 51 | # ADR -- Fraud account detection rate
 52 | # VDR -- Value detection rate. The percentage of values saved.
 53 | # AFPR -- Account-level false positive ratio.
 54 | # ROC  -- Transaction-level ROC 
 55 | # $ROC -- Dollar weighted ROC
 56 | # TFPR -- Transaction level false positive ratio.
 57 | # sampling rate are taken into consideration to derive performance on original unsampled dataset.
 58 | # contactPeriod is in the unit of days, indicating the lag before a customer is contacted again 
 59 | # to verify high-score transactions are legitimate. 
 60 | scr2stat <-function(dataset, contactPeriod, sampleRateNF,sampleRateFrd)
 61 |  {
 62 |   #scr quantization/binning into 1000 equal bins
 63 |   
 64 |   #accout level score is the maximum of trans scores of that account
 65 |   #all transactions after the first fraud transaction detected are value savings
 66 |   #input score file needs to be acct-date-time sorted   
 67 |   dataset$"Scored Probabilities" <- dataset$Probability.1
 68 |   
 69 |   fields = names(dataset)
 70 |   if(! ("accountID" %in% fields)) 
 71 |   {print ("Error: Need accountID column!")}
 72 |   if(! ("transactionDateTime" %in% fields)) 
 73 |   {print ("Error: Need transactionDateTime column!")}
 74 |   if(! ("transactionAmountUSD" %in% fields))
 75 |   {print ("Error: Need transactionAmountUSD column!")}
 76 |   if(! ("Scored Probabilities" %in% fields))
 77 |   {print ("Error: Need Scored Probabilities column!")}
 78 |   
 79 |   nRows = dim(dataset)[1];
 80 |   
 81 |   nBins = 1000; 
 82 |   
 83 |   #1. first calculate the perf stats by score band  
 84 |   
 85 |   prev_acct =dataset$accountID[1]
 86 |   prev_score = 0
 87 |   is_frd_acct = 0
 88 |   max_scr = 0	
 89 |   
 90 |   
 91 |   scr_hash=matrix(0, nBins,10)	
 92 |   
 93 |   f_scr_rec = vector("numeric",nBins)
 94 |   #nf_scr_rec = matrix(0, nBins,2)  #count, datetime
 95 |   nf_scr_rec_count = vector("numeric",nBins)
 96 |   nf_scr_rec_time = vector("numeric",nBins)
 97 | 
 98 |   
 99 |   for (r in 1:nRows)
100 | 
101 | 
102 |   {
103 |     acct = as.character(dataset$accountID[r])
104 |     dolamt = as.double(dataset$transactionAmountUSD[r])
105 |     label = dataset$label[r]
106 |     score = dataset$"Scored Probabilities"[r]
107 |     datetime = dataset$transactionDateTime[r]
108 |     
109 |     if(score == 0)
110 |     { 
111 |       score = score + 0.00001
112 |       print ("The following account has zero score!")
113 |       print (paste(acct,dolamt,datetime,sep=" "));
114 |     }
115 |     
116 |     if(label == 2) next
117 |     
118 |     
119 |     if (acct != prev_acct){
120 |       scr_bin = ceiling(max_scr*nBins)
121 |       
122 |       
123 |       if (is_frd_acct) {
124 |         scr_hash[,5] = 	scr_hash[,5] + f_scr_rec   #vdr
125 |         scr_hash[scr_bin,1] = scr_hash[scr_bin,1] + 1   #adr
126 |       }
127 |       else {
128 |         scr_hash[,6] =  scr_hash[,6] + as.numeric(nf_scr_rec_count)  #FP with contact period, a FP could be considered as multiple
129 |         scr_hash[scr_bin,2] = scr_hash[scr_bin,2]+1;   #a FP account considered one acct  		
130 |       }
131 |       
132 |       f_scr_rec = vector("numeric",nBins)
133 |       
134 |       nf_scr_rec_count = vector("numeric",nBins)
135 |       nf_scr_rec_time = vector("numeric",nBins)
136 |       
137 |       is_frd_acct = 0;
138 |       total_nf_dol = 0;
139 |       total_frd_dol = 0;
140 |       max_scr = 0;
141 |     }
142 |     
143 |     if (score > max_scr) {
144 |       max_scr = score;
145 |     }
146 |     
147 |     #find out the bin the current acct falls in. 
148 |     tran_scr_bin = ceiling(score*nBins)
149 |     
150 |     
151 |     #dollar weighted ROC and regular ROC
152 |     if(label == 1){
153 |       scr_hash[tran_scr_bin,3] = scr_hash[tran_scr_bin,3]+dolamt;
154 |       scr_hash[tran_scr_bin,7] = scr_hash[tran_scr_bin,7]+1;
155 |       is_frd_acct = 1;
156 |     }
157 |     else{
158 |       scr_hash[tran_scr_bin,4] = scr_hash[tran_scr_bin,4]+dolamt;		
159 |       scr_hash[tran_scr_bin,8] = scr_hash[tran_scr_bin,8]+1;  	
160 |     }
161 |     
162 |     #ADR/VDR
163 |     if(label == 1)
164 |     {
165 |       #ADR
166 |       f_scr_rec[tran_scr_bin] = 1
167 |       
168 |       #VDR
169 |       #If a higher score appeared before the current score, then this is also savings for the higher score.
170 |       #Once a fraud transaction is discovered, all subsequent approved transactons are savings.
171 |       for(i in  1: ceiling(max_scr*nBins))
172 |       {
173 |         f_scr_rec[i] = f_scr_rec[i] + dolamt
174 |       }
175 |     }
176 |     else
177 |     { 
178 |       #False Positive Accounts (FP) with recontact period
179 |       #check if there is any earlier dates for the same or lower score
180 |       #update the count and dates when within recontact period
181 |       
182 |       #for(i in  1: floor(max_scr*nBins))
183 |       for(i in  1: tran_scr_bin)
184 |       {
185 |         
186 |         prev_time = nf_scr_rec_time[i]
187 |         #print(paste(i, tran_scr_bin, sep=" "))
188 |         #print(paste(acct, datetime, sep=" "))
189 |         #print(prev_time)
190 |         if( prev_time > 0)
191 |         {
192 | 		  timeDiff = difftime(strptime(datetime,"%Y-%m-%d %H:%M:%S"),strptime(prev_time,"%Y-%m-%d %H:%M:%S"), units="days" ) 
193 |           if(timeDiff >= contactPeriod)
194 |           {
195 |             nf_scr_rec_count[i] = nf_scr_rec_count[i] +1
196 |             nf_scr_rec_time[i] = datetime
197 |           }
198 |         }
199 |         else
200 |         {
201 |           nf_scr_rec_count[i] = nf_scr_rec_count[i] +1
202 |           nf_scr_rec_time[i] = datetime
203 |         }
204 |         
205 |       }
206 |       
207 |     }  
208 |     
209 |     prev_acct = acct;
210 |     
211 |   }
212 |   
213 |   
214 |   #1 -- #Frd Acct
215 |   #2 -- #NF  Acct with infinite recontact period
216 |   #3 -- $Frd Tran
217 |   #4 -- $NF  Tran
218 |   #5 -- $Frd Saving
219 |   #6 -- #NF Acct with finite recontact period
220 |   #7 -- #Frd Tran
221 |   #8 -- #NF Tran
222 |   #9 -- AFPR
223 |   #10 --TFPR
224 |   
225 |   #2. now calculate the cumulative perf counts
226 |   
227 |   # 5, 6 already in cumulative during previous calculation
228 |   
229 |   for (i in (nBins-1):1){
230 |     
231 |     for(j in c(1:4,7:8)){
232 |       scr_hash[i,j] = scr_hash[i,j]+scr_hash[i+1,j];
233 |     }
234 |   }
235 |   
236 |   #3 calculate AFPR, TFPR:
237 |   scr_hash[,9] = scr_hash[,6]/(scr_hash[,1]+0.0001)
238 |   scr_hash[,10] = scr_hash[,8]/(scr_hash[,7]+0.0001)
239 |   
240 |   #print(scr_hash)
241 |   
242 |   #4. now calculate the ADR/VDR, ROC percentage	 	
243 |   for(j in c(1:5,7:8)){
244 |     scr_hash[,j] = scr_hash[,j]/scr_hash[1,j];
245 |   }
246 |   
247 |   #5. Adjust for sampling rate
248 |   for (j in c(1, 3, 5 ,7))
249 |   {
250 |     scr_hash[,j]= scr_hash[,j]/sampleRateFrd
251 |   }
252 |   
253 |   for (j in c(2, 4, 6 ,8))
254 |   {
255 |     scr_hash[,j]= scr_hash[,j]/sampleRateNF
256 |   }
257 |   
258 |   for (j in c(9, 10))
259 |   {
260 |     scr_hash[,j]= scr_hash[,j]/sampleRateNF*sampleRateFrd
261 |   }
262 |   
263 |   
264 |   perf.df = as.data.frame(scr_hash)
265 |   colnames(perf.df) = c(''ADR'',''PCT NF Acct'',''Dol Frd'', ''Dol NF'', ''VDR'', ''Acct FP(recontact period)'', ''PCT Frd'', ''PCT NF'',''AFPR'',''TFPR'')
266 |   return (perf.df)	
267 |  }
268 |  scored_data <- InputDataSet
269 |  scored_data$transactionDateTime <- as.character(scored_data$transactionDateTime)
270 |  perf <- scr2stat(scored_data,contactPeriod=30, sampleRateNF=1,sampleRateFrd=1)
271 |  OutputDataSet <- as.data.frame(perf)
272 | ',
273 |   @input_data_1 = @GetScoreData
274 | ;
275 | end


--------------------------------------------------------------------------------
/SQLR/Step10B_Evaluation_AUC.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create stored procedure to calculate AUC
 3 | 
 4 | parameters:
 5 | @table= the scored data to be evaluated
 6 | */
 7 | 
 8 | set ansi_nulls on
 9 | go
10 | 
11 | set quoted_identifier on
12 | go
13 | 
14 | DROP PROCEDURE IF EXISTS dbo.EvaluateR_auc 
15 | GO
16 | 
17 | create procedure dbo.EvaluateR_auc @table nvarchar(max)
18 | as
19 | begin
20 | 
21 | /* create table to store AUC value */
22 | if exists 
23 | (select * from sysobjects where name like 'Performance_Auc') 
24 | truncate table Performance_Auc
25 | else
26 | create table Performance_Auc ( 
27 | AUC float
28 | );
29 | 
30 | /* specify the query to select data to be evaluated. this query will be used as input for following R script */
31 | declare @GetScoreData nvarchar(max) 
32 | set @GetScoreData =  'select * from ' + @table
33 | 
34 | /* R script to calculate AUC */
35 | insert into Performance_Auc
36 | exec sp_execute_external_script @language = N'R',
37 |                                   @script = N'
38 |  Predictions <- InputDataSet
39 |  Predictions$label <- as.numeric(as.character(Predictions$label))
40 | 
41 |  # Compute the AUC. 
42 |  ROC <- rxRoc(actualVarName = "label", predVarNames = "Probability.1", data = Predictions, numBreaks = 1000)
43 |  AUC <- rxAuc(ROC)
44 |  OutputDataSet <- as.data.frame(AUC)
45 | ',
46 |   @input_data_1 = @GetScoreData
47 | ;
48 | end


--------------------------------------------------------------------------------
/SQLR/Step1_MergeAcctInfo.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create stored procedure to merge untagged transactions with account level infomations
 3 | */
 4 | 
 5 | set ansi_nulls on
 6 | go
 7 | 
 8 | set quoted_identifier on
 9 | go
10 | 
11 | DROP PROCEDURE IF EXISTS MergeAcctInfo
12 | GO
13 | 
14 | create procedure MergeAcctInfo @table nvarchar(max)
15 | as
16 | begin
17 | 
18 | declare @droptable nvarchar(max) 
19 | set @droptable = 'drop table if exists ' + @table + '_Acct'
20 | exec sp_executesql @droptable
21 | 
22 | /* Merge with AccountInfo_Sort table */
23 | declare @MergeQuery nvarchar(max) 
24 | set @MergeQuery =  
25 | '
26 | select t1.*,
27 |        t2.accountOwnerName,
28 | 	   t2.accountAddress,
29 | 	   t2.accountPostalCode,
30 | 	   t2.accountCity,
31 | 	   t2.accountState,
32 | 	   t2.accountCountry,
33 | 	   t2.accountOpenDate,
34 | 	   t2.accountAge,
35 | 	   t2.isUserRegistered,
36 | 	   t2.paymentInstrumentAgeInAccount,
37 | 	   t2.numPaymentRejects1dPerUser
38 | into ' + @table + '_Acct ' +
39 | 'from 
40 |  (select *, 
41 |        convert(datetime,stuff(stuff(stuff(concat(transactionDate,dbo.FormatTime(transactionTime)), 9, 0, '' ''), 12, 0, '':''), 15, 0, '':'')) as transactionDateTime
42 |   from ' + @table + ') as t1
43 |  outer apply 
44 |  (select top 1 * -- the top 1 is the maximum transactionDateTime up to current transactionDateTime
45 |   from Account_Info_Sort as t
46 |   where t.accountID = t1.accountID and t.recordDateTime <= t1.transactionDateTime) as t2
47 | where t1.accountID = t2.accountID
48 | '
49 | 
50 | exec sp_executesql @MergeQuery
51 | end


--------------------------------------------------------------------------------
/SQLR/Step2_Tagging.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | This script will create stored procedure to do the followings: 
  3 | 1. uniform transactionTime to 6 digits
  4 | 2. remove duplicated rows
  5 | 3. tagging on account level
  6 | 
  7 | input parameters:
  8 | @untaggedtable = table of untagged transactions
  9 | @fraudtable = table of fraud transactions
 10 | */
 11 | 
 12 | set ansi_nulls on
 13 | go
 14 | 
 15 | set quoted_identifier on
 16 | go
 17 | 
 18 | DROP PROCEDURE IF EXISTS Tagging
 19 | GO
 20 | 
 21 | create procedure Tagging
 22 | @untaggedtable varchar(max),
 23 | @fraudtable varchar(max)
 24 | as
 25 | begin
 26 | 
 27 | DROP TABLE IF EXISTS Tagged;
 28 | 
 29 | /***********************************************************************/
 30 | /* reformat transactionTime and create transactionDateTime for fraud transactions*/
 31 | /**********************************************************************/
 32 | /* ##table is a global temporary table which will be written only once to temporary database */ 
 33 | declare @maketransactionDateTime nvarchar(max)
 34 | set @maketransactionDateTime = 
 35 | '
 36 | select *,
 37 |   convert(datetime,stuff(stuff(stuff(concat(transactionDate,dbo.FormatTime(transactionTime)), 9, 0, '' ''), 12, 0, '':''), 15, 0, '':'')) as transactionDateTime
 38 | into ##Formatted_Fraud
 39 | from ' + @fraudtable
 40 | 
 41 | exec sp_executesql @maketransactionDateTime
 42 | /*****************************************************************************************************************/
 43 | /* remove duplicate based on keys: transactionID, accountID, transactionDate, transactionDate, transactionAmount */
 44 | /*****************************************************************************************************************/
 45 | /* sometimes an entire transaction might be divided into multiple sub-transactions. thus, even transactionID, accountID, transactionDate/Time are same, the amount might be different */
 46 | declare @removeduplicates1 nvarchar(max)
 47 | set @removeduplicates1 = 
 48 | ';WITH cte_1
 49 |      AS (SELECT ROW_NUMBER() OVER (PARTITION BY transactionID, accountID, transactionDateTime, transactionAmount
 50 |                                        ORDER BY transactionID ASC) RN 
 51 |          FROM ' + @untaggedtable + ')
 52 | DELETE FROM cte_1
 53 | WHERE  RN > 1;'
 54 | exec sp_executesql @removeduplicates1
 55 | 
 56 | ;WITH cte_2
 57 |      AS (SELECT ROW_NUMBER() OVER (PARTITION BY transactionID, accountID, transactionDate, transactionDateTime, transactionAmount
 58 |                                        ORDER BY transactionID ASC) RN 
 59 |          FROM ##Formatted_Fraud)
 60 | DELETE FROM cte_2
 61 | WHERE  RN > 1;
 62 | 
 63 | 
 64 | /*********************************************************************************************************************/
 65 | /* tagging on account level:  
 66 |    if accountID can't be found in fraud dataset => tag as 0, non fraud
 67 |    if accountID found in fraud dataset but transactionDateTime is out of the fraud time range => tag as 2, pre-fraud
 68 |    if accountID found in fraud dataset and transactionDateTime is within the fraud time range => tag as 1, fraud */
 69 | /**********************************************************************************************************************/
 70 | /* convert fraud to account level and create start and end date time */
 71 | select accountID, min(transactionDateTime) as startDateNTime,  max(transactionDateTime) as endDateNTime
 72 | into ##Fraud_Account
 73 | from ##Formatted_Fraud 
 74 | group by accountID
 75 | 
 76 | 
 77 | /* Tagging */ 
 78 | declare @tagging nvarchar(max)
 79 | set @tagging = 
 80 | 'select t.*, 
 81 | 	   case 
 82 |          when (sDT is not null and tDT >= sDT and tDT <= eDT) then 1
 83 | 		 when (sDT is not null and tDT < sDT) then 2
 84 | 		 when (sDT is not null and tDT > eDT) then 2
 85 | 		 when sDT is null then 0
 86 | 	   end as label
 87 | into Tagged
 88 | from 
 89 | (select t1.*,
 90 |   t1.transactionDateTime as tDT,
 91 |   t2.startDateNTime as sDT,
 92 |   t2.endDateNTime as eDT
 93 |  from ' + @untaggedtable + ' as t1
 94 |  left join
 95 |  ##Fraud_Account as t2
 96 |  on t1.accountID = t2.accountID
 97 |  ) t'
 98 | exec sp_executesql @tagging
 99 | 
100 | drop table ##Fraud_Account
101 | drop table ##Formatted_Fraud 
102 | end


--------------------------------------------------------------------------------
/SQLR/Step3_SplitData.sql:
--------------------------------------------------------------------------------
 1 | /* 
 2 | This script will create stored procedure to split data on account level 
 3 | 
 4 | input parameter:
 5 | @table = table to be splitted
 6 | */
 7 | 
 8 | set ansi_nulls on
 9 | go
10 | 
11 | set quoted_identifier on
12 | go
13 | 
14 | DROP PROCEDURE IF EXISTS SplitData
15 | GO
16 | 
17 | create procedure SplitData @table varchar(max)
18 | as
19 | begin
20 | 
21 | 
22 | /* hash accountID into 100 bins and split */
23 | 
24 | declare @hashacctNsplit nvarchar(max)
25 | set @hashacctNsplit ='
26 | DROP TABLE IF EXISTS Tagged_Training
27 | DROP TABLE IF EXISTS Tagged_Testing
28 | DROP TABLE IF EXISTS Hash_Id
29 | 
30 | select accountID,
31 | abs(CAST(CAST(HashBytes(''MD5'', accountID) AS VARBINARY(64)) AS BIGINT) % 100) as hashCode 
32 | into Hash_Id
33 | from ' + @table + '
34 | 
35 | select * into Tagged_Training
36 | from ' +@table + '
37 | where accountID in (select accountID from Hash_Id where hashCode <= 70)
38 | 
39 | select * into Tagged_Testing
40 | from ' +@table + '
41 | where accountID in (select accountID from Hash_Id where hashCode > 70)'
42 | 
43 | exec sp_executesql @hashacctNsplit
44 | 
45 | end


--------------------------------------------------------------------------------
/SQLR/Step4_Preprocess.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create stored procedure to do preprocessing including:
 3 | 1. fill missing values with 0
 4 | 2. remove transactions with negative transaction amount
 5 | 3. remove transactions with invalide transactionData and time
 6 | 4. remove prefraud: label == 2
 7 | 
 8 | input parameters:
 9 | @table = table need to be preprocessed 
10 | */
11 | 
12 | set ansi_nulls on
13 | go
14 | 
15 | set quoted_identifier on
16 | go
17 | 
18 | DROP PROCEDURE IF EXISTS Preprocess
19 | GO
20 | 
21 | create procedure Preprocess @table nvarchar(max)
22 | as
23 | begin
24 | 
25 | /* drop view if exists */
26 | declare 
27 | @sql_dropview nvarchar(max) = '';
28 | set @sql_dropview = '
29 | DROP VIEW IF EXISTS ' + @table + '_Processed'
30 | exec sp_executesql @sql_dropview;
31 | 
32 | /* create a veiw to do preprocessing */
33 | declare @sql_process nvarchar(max) = '';
34 | set @sql_process = '
35 | create view ' + @table + '_Processed as
36 | select
37 | label,
38 | accountID,
39 | transactionID,
40 | transactionDateTime,
41 | isnull(isProxyIP, ''0'') as isProxyIP, 
42 | isnull(paymentInstrumentType, ''0'') as paymentInstrumentType,
43 | isnull(cardType, ''0'') as cardType,
44 | isnull(paymentBillingAddress, ''0'') as paymentBillingAddress,
45 | isnull(paymentBillingPostalCode, ''0'') as paymentBillingPostalCode,
46 | isnull(paymentBillingCountryCode, ''0'') as paymentBillingCountryCode,
47 | isnull(paymentBillingName, ''0'') as paymentBillingName,
48 | isnull(accountAddress, ''0'') as accountAddress,
49 | isnull(accountPostalCode, ''0'') as accountPostalCode,
50 | isnull(accountCountry, ''0'') as accountCountry,
51 | isnull(accountOwnerName, ''0'') as accountOwnerName,
52 | isnull(shippingAddress, ''0'') as shippingAddress,
53 | isnull(transactionCurrencyCode, ''0'') as transactionCurrencyCode,
54 | isnull(localHour,''-99'') as localHour,
55 | isnull(ipState, ''0'') as ipState,
56 | isnull(ipPostCode, ''0'') as ipPostCode,
57 | isnull(ipCountryCode, ''0'') as ipCountryCode,
58 | isnull(browserLanguage, ''0'') as browserLanguage,
59 | isnull(paymentBillingState, ''0'') as paymentBillingState,
60 | isnull(accountState, ''0'') as accountState,
61 | case when isnumeric(transactionAmountUSD)=1 then cast(transactionAmountUSD as float) else 0 end as transactionAmountUSD,
62 | case when isnumeric(digitalItemCount)=1 then cast(digitalItemCount as float) else 0 end as digitalItemCount,
63 | case when isnumeric(physicalItemCount)=1 then cast(physicalItemCount as float) else 0 end as physicalItemCount,
64 | case when isnumeric(accountAge)=1 then cast(accountAge as float) else 0 end as accountAge,
65 | case when isnumeric(paymentInstrumentAgeInAccount)=1 then cast(paymentInstrumentAgeInAccount as float) else 0 end as paymentInstrumentAgeInAccount,
66 | case when isnumeric(numPaymentRejects1dPerUser)=1 then cast(numPaymentRejects1dPerUser as float) else 0 end as numPaymentRejects1dPerUser,
67 | isUserRegistered = case when isUserRegistered like ''%[0-9]%'' then ''0'' else isUserRegistered end
68 | from ' + @table + '
69 | where cast(transactionAmountUSD as float) >= 0 and   
70 |       (case when transactionDateTime is null then 1 else 0 end) = 0 and
71 | 	  label < 2' 
72 | 
73 | exec sp_executesql @sql_process
74 | end
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/SQLR/Step5_Save2History.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create stored procedure to do the followings:
 3 | 1. truncate historical table if truncateflag = '1'
 4 | 2. save transactions to historical table
 5 | 
 6 | input parameters:
 7 | @table = table of transactions wanted to save into historical table
 8 | @truncateflag = indicate whether the historical table need to be truncated: '1'=yes, '0'=no
 9 | */
10 | 
11 | set ansi_nulls on
12 | go
13 | 
14 | set quoted_identifier on
15 | go
16 | 
17 | DROP PROCEDURE IF EXISTS Save2TransactionHistory
18 | GO
19 | 
20 | create procedure Save2TransactionHistory @table nvarchar(max), 
21 |                                          @truncateflag nvarchar(max) 
22 | as
23 | begin
24 | 
25 | /* truncate historical table if truncateflag = '1' */
26 | declare @truncatetable nvarchar(max) = '';
27 | set @truncatetable = 'if cast(' + @truncateflag + ' as int) = 1 truncate table Transaction_History'
28 | exec sp_executesql @truncatetable
29 | 
30 | /* insert transactions into historical table */
31 | declare @sql_save2history nvarchar(max) = '';
32 | set @sql_save2history ='
33 | insert into Transaction_History
34 | select accountID, transactionID, transactionDateTime, transactionAmountUSD from ' + @table + ';'
35 | exec sp_executesql @sql_save2history
36 | 
37 | end


--------------------------------------------------------------------------------
/SQLR/Step6_CreateRiskTables.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create stored procedure to create all risk tables
 3 | */
 4 | 
 5 | set ansi_nulls on
 6 | go
 7 | 
 8 | set quoted_identifier on
 9 | go
10 | 
11 | DROP PROCEDURE IF EXISTS CreateRiskTable_ForAll
12 | GO
13 | 
14 | create procedure CreateRiskTable_ForAll
15 | as
16 | begin
17 | 
18 | /* create a table to store names of variables and risk tables. will be used as reference in the loop later */ 
19 | if exists 
20 | (select * from sysobjects where name like 'Risk_Var') 
21 | truncate table Risk_Var
22 | else
23 | create table dbo.Risk_Var (ID int,var_names varchar(255), table_names varchar(255));
24 | 
25 | insert into Risk_Var values (1, 'transactionCurrencyCode', 'Risk_TransactionCurrencyCode');
26 | insert into Risk_Var values (2, 'localHour', 'Risk_LocalHour');
27 | insert into Risk_Var values (3, 'ipState', 'Risk_IpState');
28 | insert into Risk_Var values (4, 'ipPostCode', 'Risk_IpPostCode');
29 | insert into Risk_Var values (5, 'ipCountryCode', 'Risk_IpCountryCode');
30 | insert into Risk_Var values (6, 'browserLanguage', 'Risk_BrowserLanguage');
31 | insert into Risk_Var values (7, 'paymentBillingPostalCode', 'Risk_PaymentBillingPostalCode');
32 | insert into Risk_Var values (8, 'paymentBillingState', 'Risk_PaymentBillingState');
33 | insert into Risk_Var values (9, 'paymentBillingCountryCode', 'Risk_PaymentBillingCountryCode');
34 | insert into Risk_Var values (10, 'accountPostalCode', 'Risk_AccountPostalCode');
35 | insert into Risk_Var values (11, 'accountState', 'Risk_AccountState');
36 | insert into Risk_Var values (12, 'accountCountry', 'Risk_AccountCountry');
37 | 
38 | /* create all risk tables by looping over all variables in reference table and executing CreateRiskTable stored procedure */
39 | DECLARE @name_1 NVARCHAR(100)
40 | DECLARE @name_2 NVARCHAR(100)
41 | DECLARE @getname CURSOR
42 | 
43 | SET @getname = CURSOR FOR
44 | SELECT var_names,
45 | 	   table_names
46 | FROM   Risk_Var
47 | OPEN @getname
48 | FETCH NEXT
49 | FROM @getname INTO @name_1,@name_2
50 | WHILE @@FETCH_STATUS = 0
51 | BEGIN
52 |     EXEC CreateRiskTable @name_1,@name_2 -- create risk table by calling stored procedure CreateRiskTable
53 |     FETCH NEXT
54 |     FROM @getname INTO @name_1, @name_2
55 | END
56 | 
57 | CLOSE @getname
58 | DEALLOCATE @getname
59 | end


--------------------------------------------------------------------------------
/SQLR/Step7_FeatureEngineer.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | This script will create stored procedure to do the following feature engineering:
  3 | 1. create mismatch flags
  4 | 2. convert categorical variables to numerical by assigning risk values based on risk tables
  5 | 3. calculate aggregates
  6 | 
  7 | input parameters:
  8 | @table = the table need to be feature engineered
  9 | */
 10 | 
 11 | set ansi_nulls on
 12 | go
 13 | 
 14 | set quoted_identifier on
 15 | go
 16 | 
 17 | DROP PROCEDURE IF EXISTS FeatureEngineer
 18 | GO
 19 | 
 20 | create procedure FeatureEngineer @table nvarchar(max)
 21 | as
 22 | begin
 23 | 
 24 | /* create mismatch flags and assign risk values */ 
 25 | declare 
 26 | @sql_dropview1 nvarchar(max) = '';
 27 | set @sql_dropview1 = '
 28 | DROP VIEW IF EXISTS ' + @table + '_Features1'
 29 | exec sp_executesql @sql_dropview1;
 30 | 
 31 | declare @sql_fe1 nvarchar(max) = '';
 32 | set @sql_fe1 = 'create view ' + @table + '_Features1 as
 33 | select t.label,t.accountID,t.transactionID,t.transactionDateTime,
 34 | t.transactionAmountUSD,
 35 | t.digitalItemCount,
 36 | t.physicalItemCount,
 37 | t.isProxyIP,
 38 | t.paymentInstrumentType,
 39 | t.cardType,
 40 | t.isUserRegistered,
 41 | t.accountAge,
 42 | t.paymentInstrumentAgeInAccount,
 43 | t.numPaymentRejects1dPerUser,
 44 | case when t.transactionAmountUSD > 150 then ''1'' else ''0'' end as isHighAmount,
 45 | case when t.paymentBillingAddress = t.accountAddress then ''0'' else ''1'' end as acctBillingAddressMismatchFlag,
 46 | case when t.paymentBillingPostalCode = t.accountPostalCode then ''0'' else ''1'' end as acctBillingPostalCodeMismatchFlag,
 47 | case when t.paymentBillingCountryCode = t.accountCountry then ''0'' else ''1'' end as acctBillingCountryMismatchFlag,
 48 | case when t.paymentBillingName = t.accountOwnerName then ''0'' else ''1'' end as acctBillingNameMismatchFlag,
 49 | case when t.shippingAddress = t.accountAddress then ''0'' else ''1'' end as acctShippingAddressMismatchFlag,
 50 | case when t.shippingAddress = t.paymentBillingAddress then ''0'' else ''1'' end as shippingBillingAddressMismatchFlag,
 51 | isnull(ac.risk,0) as accountCountryRisk,
 52 | isnull(apc.risk,0) as accountPostalCodeRisk,
 53 | isnull(actst.risk,0) as accountStateRisk,
 54 | isnull(bl.risk,0) as browserLanguageRisk,
 55 | isnull(ic.risk,0) as ipCountryCodeRisk,
 56 | isnull(ipc.risk,0) as ipPostCodeRisk,
 57 | isnull(ips.risk,0) as ipStateRisk,
 58 | isnull(lh.risk,0) as localHourRisk,
 59 | isnull(pbcc.risk,0) as paymentBillingCountryCodeRisk,
 60 | isnull(pbpc.risk,0) as paymentBillingPostalCodeRisk,
 61 | isnull(pbst.risk,0) as paymentBillingStateRisk,
 62 | isnull(tcc.risk,0) as transactionCurrencyCodeRisk
 63 | from ' +@table + ' as t
 64 | left join Risk_AccountCountry as ac on ac.accountCountry = t.accountCountry
 65 | left join Risk_AccountPostalCode as apc on apc.accountPostalCode = t.accountPostalCode
 66 | left join Risk_AccountState as actst on actst.accountState = t.accountState
 67 | left join Risk_BrowserLanguage as bl on bl.browserLanguage = t.browserLanguage
 68 | left join Risk_IpCountryCode as ic on ic.ipCountryCode = t.ipCountryCode
 69 | left join Risk_IpPostCode as ipc on ipc.ipPostCode = t.ipPostCode
 70 | left join Risk_IpState as ips on ips.ipState = t.ipState
 71 | left join Risk_LocalHour as lh on lh.localHour = t.localHour
 72 | left join Risk_PaymentBillingCountryCode as pbcc on pbcc.paymentBillingCountryCode = t.paymentBillingCountryCode
 73 | left join Risk_PaymentBillingPostalCode as pbpc on pbpc.paymentBillingPostalCode = t.paymentBillingPostalCode
 74 | left join Risk_PaymentBillingState as pbst on pbst.paymentBillingState = t.paymentBillingState
 75 | left join Risk_TransactionCurrencyCode as tcc on tcc.transactionCurrencyCode = t.transactionCurrencyCode
 76 | '
 77 | exec sp_executesql @sql_fe1;
 78 | 
 79 | /* create aggregates on the fly */
 80 | declare 
 81 | @sql_dropview nvarchar(max) = '';
 82 | set @sql_dropview = '
 83 | DROP VIEW IF EXISTS ' + @table + '_Features'
 84 | exec sp_executesql @sql_dropview;
 85 | 
 86 | declare @sql_fe nvarchar(max) = '';
 87 | set @sql_fe = 'create view ' + @table + '_Features as
 88 | select * from ' + @table + '_Features1 as t
 89 | outer apply
 90 | (select 
 91 | isnull(sum(case when t2.transactionDateTime > last24Hours then cast(t2.transactionAmountUSD as float) end),0) as sumPurchaseAmount1dPerUser,
 92 | isnull(count(case when t2.transactionDateTime > last24Hours then t2.transactionAmountUSD end),0) as sumPurchaseCount1dPerUser,
 93 | isnull(sum(cast(t2.transactionAmountUSD as float)),0) as sumPurchaseAmount30dPerUser,
 94 | isnull(count(t2.transactionAmountUSD),0) as sumPurchaseCount30dPerUser
 95 | from Transaction_History as t2
 96 | cross apply (values(t.transactionDateTime, DATEADD(hour, -24, t.transactionDateTime), DATEADD(day, -30, t.transactionDateTime))) as c(transactionDateTime, last24Hours, last30Days)
 97 | where t2.accountID = t.accountID and t2.transactionDateTime < t.transactionDateTime and t2.transactionDateTime > last30Days
 98 | ) as a1'
 99 | 
100 | exec sp_executesql @sql_fe;
101 | end
102 | 
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/SQLR/Step8_Training.sql:
--------------------------------------------------------------------------------
 1 | /* 
 2 | This script will create stored procedure to do the following:
 3 | 1. down sample the majority
 4 | 2. train a gradient boosted tree model
 5 | 3. save the trained model into a sql table 
 6 | 
 7 | input parameters:
 8 | @table = the table used as training set
 9 | */
10 | 
11 | set ansi_nulls on
12 | go
13 | 
14 | set quoted_identifier on
15 | go
16 | 
17 | DROP PROCEDURE IF EXISTS TrainModelR
18 | GO
19 | 
20 | create procedure TrainModelR @table nvarchar(max)
21 | as
22 | begin
23 | 
24 | /* Create an empty table to be filled with the trained models */
25 | if exists 
26 | (select * from sysobjects where name like 'Trained_Model') 
27 | truncate table Trained_Model
28 | else
29 | create table Trained_Model ( 
30 |  id varchar(200) not null,
31 |  value varbinary(max)
32 |  --,constraint unique_id3 unique(id)
33 | );
34 | 
35 | /* down sample the majority by: 
36 | 1. sort the data by label and accountID in descent order 
37 | 2. select the top 10000 rows
38 | */
39 | declare @GetTrainData nvarchar(max) 
40 | set @GetTrainData =  'select * from ' + @table
41 | 
42 | /*Get the database name*/
43 | DECLARE @database_name nvarchar(max) = db_name();
44 | 
45 | /* R script to train GBT model and save the trained model into a sql table */
46 | execute sp_execute_external_script
47 |   @language = N'R',
48 |   @script = N' 
49 |   # define the connection string
50 |   connection_string <- paste("Driver=SQL Server;Server=localhost;Database=", database_name, ";Trusted_Connection=true;", sep="")
51 | 
52 |   # Set the Compute Context to SQL for faster training.
53 |   sql <- RxInSqlServer(connectionString = connection_string)
54 |   rxSetComputeContext(sql)
55 | 
56 |   ## Point to testing data in sql server
57 |   train_sql <- RxSqlServerData(sqlQuery = sprintf("%s", inquery),
58 | 							   connectionString = connection_string,
59 | 							   stringsAsFactors = TRUE)
60 | 
61 |   ## make equations
62 |   variables_all <- rxGetVarNames(train_sql)
63 |   variables_to_remove <- c("label", "accountID", "transactionID", "transactionDateTime")
64 |   training_variables <- variables_all[!(variables_all %in% variables_to_remove)]
65 |   equation <- paste("label ~ ", paste(training_variables, collapse = "+", sep=""), sep="")
66 | 
67 |   ## train GBT model
68 |   library("MicrosoftML")
69 |   boosted_fit <- rxFastTrees(formula = as.formula(equation),
70 |                              data = train_sql,
71 |                              type = c("binary"),
72 |                              numTrees = 100,
73 |                              learningRate = 0.2,
74 |                              splitFraction = 5/24,
75 |                              featureFraction = 1,
76 |                              minSplit = 10,
77 | 							 unbalancedSets = TRUE,
78 | 							 randomSeed = 5)
79 | 
80 |   ## save the trained model in sql server 
81 |   # set the compute context to local for tables exportation to SQL
82 |   rxSetComputeContext("local")
83 |   # Open an Odbc connection with SQL Server. 
84 |   OdbcModel <- RxOdbcData(table = "Trained_Model", connectionString = connection_string) 
85 |   rxOpen(OdbcModel, "w") 
86 |   # Write the model to SQL.  
87 |   rxWriteObject(OdbcModel, "Gradient Boosted Tree", boosted_fit)
88 |  
89 |   '
90 |   , @params = N' @inquery nvarchar(max), @database_name varchar(max)'
91 |   , @inquery = @GetTrainData
92 |   , @database_name = @database_name
93 |  ;
94 | end


--------------------------------------------------------------------------------
/SQLR/Step9_Prediction.sql:
--------------------------------------------------------------------------------
  1 | /* 
  2 | This script will create the stored procedure to do the following:
  3 | 1. uniform transactionTime to 6 digits if necessary
  4 | 2. preprocess data 
  5 | 3. save transaction data to historical table
  6 | 4. feature engineering
  7 | 5. scoring 
  8 | 5. save the scored data set to a sql table
  9 | 
 10 | input parameters:
 11 | @inputtable = the table of data to be scored
 12 | @outputtable = the table stores the scored data
 13 | @getacctflag = the flag to indicate if merge with accountInfo table is needed: '1'=yes, '0'=no
 14 | */
 15 | 
 16 | set ansi_nulls on
 17 | go
 18 | 
 19 | set quoted_identifier on
 20 | go
 21 | 
 22 | DROP PROCEDURE IF EXISTS PredictR
 23 | GO
 24 | 
 25 | create procedure PredictR @inputtable nvarchar(max),
 26 |                           @outputtable nvarchar(max),
 27 |                           @getacctflag nvarchar(max)
 28 | as
 29 | begin
 30 | 
 31 | /* merge with accountInfo table if getacctflag = '1' */
 32 | declare @mergeacct nvarchar(max) = '';
 33 | set @mergeacct = 'if cast(' + @getacctflag + ' as int) = 1 
 34 | begin
 35 |  EXEC MergeAcctInfo ' + @inputtable + '
 36 | end'
 37 | exec sp_executesql @mergeacct
 38 | 
 39 | /* select @inputtable into @table_acct if getacctflag = '0' */
 40 | declare @renametable nvarchar(max) = '';
 41 | set @renametable = 
 42 | 'if cast(' + @getacctflag + ' as int) = 0 
 43 | begin
 44 |   drop table if exists ' + @inputtable + '_Acct
 45 |   select * into ' + @inputtable + '_Acct from ' + @inputtable + '
 46 | end'
 47 | exec sp_executesql @renametable
 48 | 
 49 | /* add a fake label if label doesn't exist */
 50 | declare @addlabel nvarchar(max) = '';
 51 | set @addlabel = '
 52 | IF NOT EXISTS(SELECT 1 FROM sys.columns 
 53 |           WHERE Name = N''label''
 54 |           AND Object_ID = Object_ID(N''' + @inputtable + '_Acct''))
 55 | BEGIN
 56 |     alter table ' + @inputtable + '_Acct add label int not null default(-1)
 57 | END'
 58 | exec sp_executesql @addlabel
 59 | 
 60 | /* preprocessing by calling the stored procedure 'Preprocess' */
 61 | declare @preprocess nvarchar(max)
 62 | set @preprocess = 'exec Preprocess ' + @inputtable + '_Acct'
 63 | exec sp_executesql @preprocess
 64 | 
 65 | /* save transactions to history table */
 66 | declare @sql_save2history nvarchar(max)
 67 | set @sql_save2history = 'exec Save2TransactionHistory ' + @inputtable + '_Acct_Processed, ''0'''
 68 | exec sp_executesql @sql_save2history
 69 | 
 70 | /* feature engineering by calling the stored procedure 'FeatureEngineer' */
 71 | declare @fe_query nvarchar(max) 
 72 | set @fe_query = 'exec FeatureEngineer ' + @inputtable + '_Acct_Processed'
 73 | exec sp_executesql @fe_query
 74 | 
 75 | /* specify the query to select data to be scored. This query will be used as input to following R script */
 76 | declare @GetData2Score nvarchar(max) 
 77 | set @GetData2Score =  'select * from ' + @inputtable + '_Acct_Processed_Features where label<=1';
 78 | 
 79 | /* Get the database name*/
 80 | DECLARE @database_name varchar(max) = db_name();
 81 | 
 82 | /* R script to do scoring and save scored dataset into sql table */
 83 | exec sp_execute_external_script @language = N'R',
 84 |                                   @script = N'
 85 | ## Get the trained model
 86 | # Define connectioin string
 87 | connection_string <- paste("Driver=SQL Server;Server=localhost;Database=", database_name, ";Trusted_Connection=true;", sep="")
 88 | # Create an Odbc connection with SQL Server using the name of the table storing the model
 89 | OdbcModel <- RxOdbcData(table = "Trained_Model", connectionString = connection_string) 
 90 | # Read the model from SQL.  
 91 | boosted_fit <- rxReadObject(OdbcModel, "Gradient Boosted Tree")
 92 | 
 93 | ## Point to testing data in sql server
 94 | test_sql <- RxSqlServerData(sqlQuery = sprintf("%s", inquery),
 95 | 							connectionString = connection_string,
 96 | 							stringsAsFactors = TRUE)
 97 | 
 98 | ## Specify the pointer to output table
 99 | Predictions_gbt_sql <- RxSqlServerData(table = outputtable, connectionString = connection_string)
100 | 
101 | ## Set the Compute Context to SQL.
102 | sql <- RxInSqlServer(connectionString = connection_string)
103 | #rxSetComputeContext(sql) 
104 | 
105 | ## Scoring
106 | library("MicrosoftML")
107 | rxPredict(modelObject = boosted_fit,
108 |           data = test_sql,
109 | 		  outData = Predictions_gbt_sql,
110 | 		  overwrite = T,
111 | 		  extraVarsToWrite = c("accountID", "transactionID", "transactionDateTime", "transactionAmountUSD", "label"))
112 | 
113 | '
114 |  , @params = N' @inquery nvarchar(max), @database_name varchar(max), @outputtable nvarchar(max)'
115 |  , @inquery = @GetData2Score
116 |  , @database_name = @database_name
117 |  , @outputtable = @outputtable
118 |  ;
119 | end


--------------------------------------------------------------------------------
/SQLR/UtilityFunctions.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | This script will create functions which will be used
 3 | */
 4 | 
 5 | set ansi_nulls on
 6 | go
 7 | 
 8 | set quoted_identifier on
 9 | go
10 | 
11 | /* create the function to uniform transactionTime to 6 digits */
12 | IF object_id(N'FormatTime', N'FN') IS NOT NULL
13 |     DROP FUNCTION FormatTime
14 | GO
15 | 
16 | create function dbo.FormatTime (@strTime varchar(255) ) 
17 | returns varchar(255)
18 | as
19 | begin
20 |   declare @strTimeNew varchar(255)
21 |   set @strTimeNew = 
22 |   case
23 |     when len(@strTime) = 5 then concat('0',@strTime)
24 |     when len(@strTime) = 4 then concat('00',@strTime)
25 |     when len(@strTime) = 3 then concat('000',@strTime)
26 |     when len(@strTime) = 2 then concat('0000',@strTime)
27 |     when len(@strTime) = 1 then concat('00000',@strTime)
28 |    else @strTime
29 |   end
30 |   return(@strTimeNew)
31 | end
32 | go
33 | 


--------------------------------------------------------------------------------
/SQLR/createuser.sql:
--------------------------------------------------------------------------------
 1 | :on error exit
 2 | --
 3 | -- remove old $(username) user and login from master.
 4 | -- $(username) and $(password) is substituted by Invoke-SqlCmd
 5 | -- through environment variables.
 6 | --
 7 | USE [master]
 8 | GO
 9 | IF EXISTS (SELECT name  FROM sys.database_principals WHERE name = '$(username)')
10 | BEGIN
11 |     PRINT 'Deleting old $(username) user from master'
12 |     DROP USER [$(username)]
13 | END
14 | GO
15 | IF EXISTS (SELECT name  FROM master.sys.server_principals WHERE name = '$(username)')
16 | BEGIN
17 |     PRINT 'Deleting old $(username) login from master'
18 |     DROP LOGIN [$(username)]
19 | END
20 | GO
21 | --
22 | -- create new $(username) login in master
23 | --
24 | USE [master]
25 | GO
26 | PRINT 'Creating $(username) login in master'
27 | CREATE LOGIN [$(username)] WITH PASSWORD=N'$(password)', CHECK_EXPIRATION=OFF, CHECK_POLICY=OFF;
28 | CREATE USER [$(username)] FOR LOGIN [$(username)] 
29 | --ALTER ROLE [db_rrerole] ADD MEMBER [$(username)]
30 | ALTER ROLE [db_owner] ADD MEMBER [$(username)]
31 | GO
32 | 
33 | exec sp_addrolemember 'db_owner', '$(username)'
34 | exec sp_addrolemember 'db_ddladmin', '$(username)'
35 | exec sp_addrolemember 'db_accessadmin', '$(username)'
36 | exec sp_addrolemember 'db_datareader', '$(username)'
37 | exec sp_addrolemember 'db_datawriter', '$(username)'
38 | exec sp_addsrvrolemember @loginame= '$(username)', @rolename = 'sysadmin'  
39 | GO 
40 | 
41 | -- Enable implied authentification so a connection string can be automatically created in R codes embedded into SQL SP. 
42 | USE [master]
43 | GO
44 | DECLARE @host_name nvarchar(100) 
45 | SET @host_name = (SELECT HOST_NAME())
46 | DECLARE @sql nvarchar(max);
47 | SELECT @sql = N'
48 | CREATE LOGIN [' + @host_name + '\SQLRUserGroup] FROM WINDOWS WITH DEFAULT_DATABASE=[master]';
49 | EXEC sp_executesql @sql;
50 | 
51 | 
52 | -- Increase memory allocated to R. 
53 | USE [master]
54 | GO
55 | SELECT * FROM sys.resource_governor_resource_pools WHERE name = 'default'  
56 | SELECT * FROM sys.resource_governor_external_resource_pools WHERE name = 'default'  
57 | ALTER EXTERNAL RESOURCE POOL "default" WITH (max_memory_percent = 100);  
58 | ALTER RESOURCE GOVERNOR reconfigure;  
59 | 
60 | 


--------------------------------------------------------------------------------
/SQLR/readme.md:
--------------------------------------------------------------------------------
 1 | <img src="../Resources/images/fraud.jpg" align="right" width="50%">
 2 | 
 3 | # Fraud Detection 
 4 | ## Implemented on SQL Server 2016 R Services and HDInsight Spark
 5 | 
 6 | > Discover more examples at [Microsoft Machine Learning Server](https://github.com/Microsoft/ML-Server)
 7 | 
 8 | For all documentation, visit the [Fraud Detection website](https://microsoft.github.io/r-server-fraud-detection/).
 9 | 
10 | **NOTE:** Please don't use "Download ZIP" to get this repository, as it will change the line endings in the data files. Use "git clone" to get a local copy of this repository. 
11 | 
12 | # Contributing
13 | 
14 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
15 | 


--------------------------------------------------------------------------------
/Website/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "r-sql",
 3 |   "version": "1.0.0",
 4 |   "description": "r sql server",
 5 |   "main": "server.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "keywords": [
10 |     "r",
11 |     "sql",
12 |     "server"
13 |   ],
14 |   "author": "Sean Wells <sean.wells.sc@gmail.com>",
15 |   "license": "MIT",
16 |   "dependencies": {
17 |     "express": "^4.13.4",
18 |     "express-handlebars": "^3.0.0",
19 |     "tedious": "^1.13.2"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/Website/public/css/myCSS.css:
--------------------------------------------------------------------------------
 1 | @charset "UTF-8";
 2 | body {
 3 |   padding-top: 10px;
 4 | }
 5 | .start {
 6 |   padding: 40px 15px;
 7 |   text-align: center;
 8 | }
 9 | .bdba{
10 | 	color:#DD4814;
11 | }
12 | .form-control-inline {
13 |     min-width: 0;
14 |     width: auto;
15 |     display: inline;
16 | }


--------------------------------------------------------------------------------
/Website/public/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/Website/public/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/Website/public/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/Website/public/img/bracelet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/img/bracelet.jpg


--------------------------------------------------------------------------------
/Website/public/img/earrings.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/img/earrings.jpg


--------------------------------------------------------------------------------
/Website/public/img/heart.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/img/heart.jpg


--------------------------------------------------------------------------------
/Website/public/img/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/img/logo.gif


--------------------------------------------------------------------------------
/Website/public/img/progress.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/img/progress.gif


--------------------------------------------------------------------------------
/Website/public/img/ring.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/Website/public/img/ring.jpg


--------------------------------------------------------------------------------
/Website/public/js/customize.js:
--------------------------------------------------------------------------------
 1 | // JavaScript Document
 2 | 			
 3 | (function(R) {
 4 | 
 5 | 	$("#submitBtn").click(function(){		
 6 | 		exeScript()
 7 | 	}); 
 8 | 	
 9 | 		/* Execute the repository script to get score for this claim */
10 | 	var exeScript = function() {
11 | 		
12 | 		/* callback configuration */
13 | 		var callback = {
14 | 			scope : this,
15 | 
16 | 			// success callback
17 | 			success : function(result) {
18 | 				//area.className = '';
19 | 				var objs = result.deployr.response.workspace.objects;
20 | 				score = objs[0].value;
21 | 				
22 | 				// Use the score from the script to display the appropriate message
23 | 				if (score < 3) {
24 | 				$("#resultArea").html(' Thank you for submitting your claim. It has been fast tracked for processing.');
25 | 				$("#resultArea").removeClass('alert-danger');
26 | 				$("#resultArea").addClass('alert-success');
27 | 			} else {
28 | 				$("#resultArea").html('Thank you for submitting your claim. Please allow 2-4 weeks for review.');
29 | 				$("#resultArea").removeClass('alert-success');
30 | 				$("#resultArea").addClass('alert-danger');				
31 | 			}
32 | 				
33 | 			$("#resultArea").fadeIn();
34 | 
35 | 				
36 | 			},
37 | 			// failure callback
38 | 			failure : function(result) {
39 | 				var msg = result;
40 | 			    
41 | 				if (result.deployr) {
42 | 					msg = result.deployr.response.error;
43 | 					$("#resultArea").html(msg);
44 | 				}
45 | 			}
46 | 		};
47 | 
48 | 		/* configuration input for repository script execution */
49 | 		
50 | 		//inputList gathers up all the form values and formats them for DeployR
51 | 		var inputList = [];
52 | 		$(".form-control").each(function() {
53 | 		  inputList.push(R.RDataFactory.createString($(this).attr("id"), $(this).val() || ' '));
54 | 		});
55 | 		
56 | 		//send all the form values as inputs, and retrieve 'score' from the script execution
57 | 		
58 | 		var scriptConfig = {
59 | 			filename : 'insuranceFraud',
60 |                         author : 'sheri',
61 | 						inputs : inputList,
62 | 					robjects: ['score'],
63 | 					preloadfilename: 'rtsScoreFraud.R',
64 | 					preloadfileauthor: 'sheri',
65 | 					blackbox: true
66 | 		};
67 | 		
68 | 		// execute RScript
69 | 		R.DeployR.repositoryScriptExecute(scriptConfig, callback);
70 | 	};
71 | })(window.Revolution);


--------------------------------------------------------------------------------
/Website/public/js/scoreClaim.js:
--------------------------------------------------------------------------------
 1 | // JavaScript Document
 2 | var scoreClaim = function(id, amt){ 
 3 |     //first get the rest of the data for this id
 4 |     record = lookupData(id, amt)
 5 |     // call /predict to get res.prob, the probability of returning the shipment
 6 |     $.ajax({
 7 |     url: '/predict',
 8 |     type: 'GET',
 9 |     data: { record: record },
10 |     contentType:"application/json; charset=utf-8",
11 |     error: function(xhr, error){
12 |         console.log(xhr); console.log(error);
13 |     }, 
14 |     success: function(res) { 
15 |        console.log("AccountID: " + id  + " transactionAmt: " + amt )
16 |        console.log("Predicted probability: " + res.pred )
17 |             // now use the probability to display one of two message 
18 |             if (res.pred > 0.5) {  //problem with this order; 
19 |                 $("#resultArea").html('There is a problem with this order.  Please call 800-555-2222 for more information');
20 |                         $("#resultArea").removeClass('alert-success');
21 |                         $("#resultArea").addClass('alert-danger');
22 |                         
23 | 
24 |                     } else { // no problem with the order
25 |                 $("#resultArea").html('Thank you for submitting your order. You will receive an email with tracking information shortly.');
26 |                         $("#resultArea").removeClass('alert-danger');
27 |                         $("#resultArea").addClass('alert-success');
28 |                     }
29 |             // make sure result is visible
30 |             $("#resultArea").removeClass('hide');
31 |             $("#resultArea").addClass('show');		
32 |             // remove the "click here for status" section
33 |             $("#status").removeClass('show');   
34 |             $("#status").addClass('hide');
35 |             $("#resultArea").fadeIn();
36 |         }   
37 |         
38 |        });	
39 | }	
40 | 
41 | var lookupData = function(custID, amt){
42 |  amt = parseFloat(amt.replace(/,/g, ''));
43 | // the rest of the record would be looked up in a customer database.
44 | // for this demo we are simply supplying that info directly for our four test accounts
45 | var custData;
46 | 
47 | switch(custID) {
48 |     case 'A1055521358474530':       
49 |         custData = 'USD,NULL,20130409,102958,14,A,P,NULL,NULL,NULL,92.97,dubayy,0,ae,FALSE,NULL,en-US,CREDITCARD,AMEX,NULL,NULL,NULL,33071,FL,US,NULL,NULL,NULL,NULL,NULL,NULL,M,NULL,0,4,NULL';
50 |         break;
51 |     case 'A914800341525449':
52 |         custData = 'USD,NULL,20130409,122427,7,A,P,NULL,NULL,NULL,108.49,massachusetts,2118,us,FALSE,NULL,en-US,CREDITCARD,VISA,NULL,NULL,NULL,1702,MA,US,NULL,NULL,NULL,NULL,NULL,NULL,M,NULL,1,0,NULL';
53 |         break;
54 |     case 'A1688852355371910':
55 |         custData = 'USD,NULL,20130409,110900,6,A,P,NULL,NULL,NULL,99.47,florida,32114,us,FALSE,NULL,en-US,CREDITCARD,VISA,NULL,NULL,NULL,32746,FL,US,NULL,NULL,NULL,NULL,NULL,NULL,M,NULL,1,0,NULL';
56 |         break;
57 |     default:
58 |         custData = 'USD,NULL,20130409,104848,NULL,A,P,NULL,NULL,NULL,121.242,maharashtra,411001,in,FALSE,NULL,en-US,CREDITCARD,VISA,NULL,NULL,NULL,98033,WA,US,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,3,0,NULL';
59 |         break;        
60 | } 
61 | 
62 | var record = 'xxxTRANSID,'+ custID + ',' + amt + ',' + amt + ',' + custData;
63 | 
64 | return(record);
65 | }
66 | 


--------------------------------------------------------------------------------
/Website/public/js/startUp.js:
--------------------------------------------------------------------------------
  1 | // JavaScript Document
  2 | 
  3 | $(document).ready ( function () {
  4 | 			$("#resultArea").hide();
  5 |             $("#status").hide();
  6 |             // show the login dialog on startup
  7 |             $('#loginDlg').modal('show');
  8 |       
  9 |        $('#selAccount').change(function() {
 10 |             $("#claimantID").val( ($(this).val()) );
 11 |             });
 12 | 
 13 | 		$("#resetBtn").click(function(){
 14 |         //	empty the table and change the purchase back to 0.
 15 |             $("#myTable > tbody").html("");
 16 |              $("#status").removeClass('show');   
 17 |              $("#status").addClass('hide');
 18 |              // hide the result area
 19 |              $("#resultArea").addClass('hide');
 20 |              $("#resultArea").removeClass('show');
 21 |             document.getElementById('result').innerHTML = "Total Purchase: $0" 
 22 | 			$("#resultArea").fadeOut();
 23 | 		});	
 24 |         
 25 |        
 26 |         
 27 |         $("#submitBtn").click(function(){ 
 28 |             acctID =  $("#claimantID").val(); 
 29 |          // check to make sure there is an item in the cart      
 30 |         if ($('#myTable tr').length > 0 ) {
 31 |             // also make sure the account id is present.
 32 |             if (acctID !== '') {
 33 |                 // hide the status message and call scoreClaim
 34 |                 $("#status").removeClass('hide');
 35 |                 $("#status").addClass('show');
 36 |                 $("#status").fadeIn();
 37 |                 // clear the result area
 38 |                 $("#resultArea").addClass('hide');
 39 |                 $("#resultArea").removeClass('show');
 40 |                 var amt = recalc();
 41 |                 scoreClaim( acctID, amt);  
 42 |                 } else {
 43 |                     // no account ID present
 44 |                 $("#status").removeClass('show');   
 45 |                 $("#status").addClass('hide');
 46 |                 $("#resultArea").html('Please Login and try again.');
 47 |                 $("#resultArea").removeClass('alert-success');
 48 |                 $("#resultArea").addClass('alert-danger');
 49 |                 $("#resultArea").fadeIn();
 50 |                 }
 51 |         } else {
 52 |                    // no items in the cart
 53 |                 $("#status").removeClass('show');   
 54 |                 $("#status").addClass('hide');
 55 |                 $("#resultArea").html('You must have at least one item before you can Purchase.');
 56 |                 $("#resultArea").removeClass('alert-success');
 57 |                 $("#resultArea").addClass('alert-danger');
 58 |                 $("#resultArea").fadeIn();
 59 |             }
 60 | 	}); 
 61 |     
 62 |     
 63 |     $(".addItem").click (function(){
 64 |         // Adding items to the cart - just hardcoding a few items here
 65 |         switch (this.id) {
 66 |             case "heart":
 67 |                 contents = '<tr><td ><img class="img-rounded" src="img/heart.jpg" height="60px" width="60px"></td>'
 68 |                 contents = contents + '<td>Black and White Diamond Heart</td>'
 69 |                 contents = contents + '<td>$<span class="val">130</span></td>'
 70 |             break;
 71 |             case "earrings":
 72 |                 contents = '<tr><td ><img class="img-rounded" src="img/earrings.jpg" height="60px" width="60px"></td>'
 73 |                 contents = contents + '<td>Diamond Pave Earrings</td>'
 74 |                 contents = contents + '<td>$<span class="val">569</span></td>'
 75 |             break;
 76 |             case "bracelet":
 77 |                 contents = '<tr><td ><img class="img-rounded" src="img/bracelet.jpg" height="60px" width="60px"></td>'
 78 |                 contents = contents + '<td>Diamond Tennis Bracelet</td>'
 79 |                 contents = contents + '<td>$<span class="val">360</span></td>'
 80 |             break;  
 81 |             case "ring":
 82 |                 contents = '<tr><td ><img class="img-rounded" src="img/ring.jpg" height="60px" width="60px"></td>'
 83 |                 contents = contents + '<td>Diamond Engagement Ring</td>'
 84 |                 contents = contents + '<td>$<span class="val">2100</span></td>'
 85 |             break;                      
 86 |         }
 87 |         contents = contents + '<td><button class="deleteMe btn btn-sm " >x</button></td></tr>'
 88 | 		$('#myTable > tbody:last-child').append(contents);
 89 | 	    recalc()
 90 |         });
 91 |      
 92 |      // can't use $(".deleteMe").click here because the items are dynamically added, not all present at the start.    
 93 |      $(document).on('click', 'button.deleteMe', function () { 
 94 |         $(this).closest('tr').remove();
 95 |         recalc();
 96 |      });
 97 |         
 98 | 		function formatTotal(x) {
 99 | 			x = Math.round(x);
100 |     		return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
101 | 		}
102 |         
103 |         function recalc(){
104 |             // iterate through all the values in the table (class="val")
105 |                var resultVal = 0.0;
106 |                $(".val").each ( function() {
107 | 				   var itemval = $(this).text();
108 | 				   itemval = itemval.replace('', "0"); 
109 |                    resultVal += parseFloat ( itemval.replace(/\s/g,'').replace(',','.'));
110 |                 });
111 | 				resultVal = formatTotal(resultVal);
112 |                 document.getElementById('result').innerHTML = "Total Purchase $" + resultVal;
113 |                 return(resultVal)
114 |         }
115 |     
116 |             });
117 |             


--------------------------------------------------------------------------------
/Website/readme.md:
--------------------------------------------------------------------------------
 1 | <img src="../Resources/images/fraud.jpg" align="right" width="50%">
 2 | 
 3 | # Fraud Detection 
 4 | ## Implemented on SQL Server 2016 R Services and HDInsight Spark
 5 | 
 6 | > Discover more examples at [Microsoft Machine Learning Server](https://github.com/Microsoft/ML-Server)
 7 | 
 8 | 
 9 | Deploy this solution from Cortana Intelligence Gallery with [SQL Server](https://aka.ms/fraud-detection) or [HDInsight Spark Cluster](https://aka.ms/fraud-detection-hdi).
10 | 
11 | For all documentation, visit the [Fraud Detection website](https://microsoft.github.io/r-server-fraud-detection/).
12 | 
13 | **NOTE:** Please don't use "Download ZIP" to get this repository, as it will change the line endings in the data files. Use "git clone" to get a local copy of this repository. 
14 | 
15 | # Contributing
16 | 
17 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
18 | 


--------------------------------------------------------------------------------
/Website/server.js:
--------------------------------------------------------------------------------
  1 | var express = require('express');
  2 | var Connection = require('tedious').Connection;
  3 | var Request = require('tedious').Request;
  4 | var TYPES = require('tedious').TYPES;
  5 | 
  6 | var fs = require('fs');
  7 | var util = require('util');
  8 | var logFileName = __dirname + '/debug.log';
  9 | 
 10 | var app = express();
 11 | var exphbs  = require('express-handlebars');
 12 | app.engine('handlebars', exphbs({defaultLayout: 'main'}));
 13 | app.set('view engine', 'handlebars');
 14 | 
 15 | app.use(express.static('public'));
 16 | 
 17 | 
 18 | 
 19 | 
 20 | //
 21 | // DB Connection
 22 | //
 23 | var args = process.argv.slice(2);
 24 | if (args.length>0) {
 25 |     var user = args[0];
 26 |     var pw = args[1];    
 27 | }
 28 | else {
 29 |     var user = 'XXYOURSQLUSER';
 30 |     var pw = 'XXYOURSQLPW'; 
 31 | }
 32 | 
 33 | 
 34 | var con = new Connection({ //fix this with fraud db info
 35 | 	userName: user,
 36 |     password: pw,
 37 |     server: 'localhost',
 38 |     // When you connect to Azure SQL Database, you need encrypt: true
 39 |      options: {  encrypt: true, database: 'Fraud_R' }
 40 | });
 41 | 
 42 | con.on('connect', function(err) {
 43 | 	console.log('DB Connection ' + (err ? '~~~ Failure ~~~' : 'Success'));
 44 |     if (err) console.log(err);
 45 | });
 46 | 
 47 | //
 48 | // Put your routes here
 49 | //
 50 | 
 51 | // Home Page
 52 | app.get('/', function (req, res) {
 53 |             res.render('home') 
 54 | });
 55 | 
 56 | // Kill the server
 57 | app.get('/kill', function (req, res) {
 58 |      setTimeout(() => process.exit(), 500);
 59 | });
 60 | 
 61 | 
 62 | // predict function, called from scoreClaim.js
 63 | 
 64 | app.get('/predict', function (req, res) {
 65 |     var request = new Request('ScoreOneTrans', function(err, rowCount) {
 66 |     if (err) {
 67 |         console.log(err);
 68 |         }  
 69 |        // console.log("Rows Returned: " + rowCount )      
 70 |     });
 71 |     
 72 |     var record = req.query.record;
 73 |     console.log (record)
 74 |     request.on('row', function(col) {
 75 |           if (col[0].value === null) {
 76 |             console.log('NULL');
 77 |           } else {
 78 |             // values to return - the predicted probability
 79 |             value = col[0].value;   
 80 |           }
 81 | 
 82 |          res.json({ pred: value });
 83 |          request.on('doneInProc', function(rowCount, more) { 
 84 |             console.log(rowCount + ' rows returned'); 
 85 |       }); 
 86 |         
 87 |     });  
 88 |     // pass the entire record to the stored procedure
 89 |     request.addParameter('inputstring', TYPES.VarChar, record);
 90 |     con.callProcedure(request); 
 91 |     con.close;
 92 |   
 93 |     
 94 | });
 95 | 
 96 | //log to file
 97 | var logFile = fs.createWriteStream(logFileName, { flags: 'a' });
 98 | var logProxy = console.log;
 99 | console.log = function (d) { //
100 |     logFile.write(util.format(new Date() + ": " + d || '') + '\r\n');
101 |     logProxy.apply(this, arguments);
102 | };
103 | 
104 | app.listen(3000, function () {
105 |   console.log('Example app listening on port 3000!');
106 | });


--------------------------------------------------------------------------------
/Website/startMe.vbs:
--------------------------------------------------------------------------------
1 | user = WScript.Arguments.Item(0)
2 | pw = WScript.Arguments.Item(1) 
3 | 
4 | cmd = "node server.js " + user + " " + pw
5 | 
6 | CreateObject("Wscript.Shell").Run cmd, 0


--------------------------------------------------------------------------------
/Website/views/home.handlebars:
--------------------------------------------------------------------------------
  1 | <div class="container start">
  2 |      <div class="row">
  3 |          <div class="col-md-9"><!-- main div -->
  4 |         <p></p><p></p>
  5 |  
  6 |     
  7 |     <div class="panel panel-info">
  8 |         <div class="panel-heading">
  9 |         <h3 class="panel-title"><span class="glyphicon glyphicon-shopping-cart"></span> Your Cart</h3>
 10 |         </div>
 11 |         <div class="panel-body">
 12 | 
 13 |         <table id="myTable" class="table">
 14 |             <tbody>
 15 |             </tbody>
 16 |             </table>
 17 |             
 18 |             <button class="btn btn-info" data-toggle='modal' href='#shopMore'> 
 19 |                 <span class="glyphicon glyphicon-plus-sign"></span> Add Items</button>
 20 |         </div>
 21 |     </div>
 22 | 
 23 | 
 24 |          </div> <!-- /main div -->
 25 |    	   <div class=" col-md-3 ">
 26 |          
 27 |           <div class="form-group">
 28 |             <div class="col-sm-offset-3 col-sm-9">
 29 |                 <button class="btn btn-info" data-toggle='modal' href='#loginDlg'>Login to Your Account</button>
 30 |                 <div class="form-group"><!-- ACCOUNT ID -->
 31 |                 <label for="claimantID" class="control-label">Account ID</label><br/>
 32 |                 <input type="text" class="form-control input-sm" id="claimantID" placeholder="Account ID" value="A1055521358474530">
 33 |                 </div>
 34 | 
 35 |                 <div id='result' >Total Purchase: $0</div>
 36 |                 <p>&nbsp;</p>
 37 |                 <button class="btn btn-info" id="submitBtn">Purchase</button>
 38 |                 <button class="btn btn-default" id="resetBtn">Reset</button>
 39 |                 <p>&nbsp;</p>
 40 |                 <div id='resultArea' class="alert alert-success" > </div>
 41 |                 <div id="status" class="hide">Processing your order... <br/>
 42 |                 </div>
 43 |             
 44 |                 </div>
 45 |             </div>
 46 |    	   </div>
 47 |         
 48 | 
 49 |     
 50 |     </div> <!-- /.row -->
 51 | </div> <!-- /container -->
 52 |     
 53 |     
 54 |     <!-- SHOPPING DIALOG -->
 55 |     <div class="modal fade" id="shopMore" tabindex="-1" role="dialog" aria-labelledby="Select Items" aria-hidden="true">
 56 |     <div class="modal-dialog" style="width: 80%;">
 57 |       <div class="modal-content">
 58 |         <div class="modal-header">
 59 |           <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
 60 |           <h1 class="modal-title" id="dlgTitle">Select Items to Purchase</h1>
 61 |         </div>
 62 |         <div class="modal-body" id="dlgContent">
 63 |             <div class="container">
 64 |                 <div class="row">
 65 |                     <!--ITEM -->                   
 66 |                     <div class="col-md-3">
 67 |                         <div class="thumbnail">
 68 |                             <img src="img/heart.jpg" alt="Sample Image">
 69 |                             <div class="caption">
 70 |                                 <h3>Black and White Diamond Heart</h3>
 71 |                                 <p>Price: $130</p>
 72 |                                  <button class="btn btn-info addItem" id="heart" data-dismiss="modal" aria-hidden="true">
 73 |                                      <span class="glyphicon glyphicon-shopping-cart"></span>Add to Cart
 74 |                                  </button>
 75 |                             </div>
 76 |                         </div>
 77 |                     </div>                    
 78 |                     <!--ITEM -->     
 79 |                     <div class="col-md-3">
 80 |                         <div class="thumbnail">
 81 |                             <img src="img/earrings.jpg" alt="Sample Image">
 82 |                             <div class="caption">
 83 |                                 <h3>Diamond Pave Earrings</h3>
 84 |                                 <p>Price: $569</p>
 85 |                                  <button class="btn btn-info addItem" id="earrings" data-dismiss="modal" aria-hidden="true">
 86 |                                      <span class="glyphicon glyphicon-shopping-cart"></span>Add to Cart
 87 |                                  </button>
 88 |                             </div>
 89 |                         </div>
 90 |                     </div>
 91 |                     
 92 |                     <!--ITEM -->
 93 |                     <div class="col-md-3">
 94 |                         <div class="thumbnail">
 95 |                             <img src="img/bracelet.jpg" alt="Sample Image">
 96 |                             <div class="caption">
 97 |                                 <h3>Diamond Tennis Bracelet</h3>
 98 |                                 <p>Price: $360 </p>
 99 |                                  <button class="btn btn-info addItem" id="bracelet" data-dismiss="modal" aria-hidden="true">
100 |                                      <span class="glyphicon glyphicon-shopping-cart"></span>Add to Cart
101 |                                  </button>
102 |                             </div>
103 |                         </div>
104 |                     </div>                   
105 |                     <!--ITEM -->                   
106 |                     <!--ITEM -->
107 |                     <div class="col-md-3">
108 |                         <div class="thumbnail">
109 |                             <img src="img/ring.jpg" alt="Sample Image">
110 |                             <div class="caption">
111 |                                 <h3>Diamond Engagement Ring</h3>
112 |                                 <p>Price: $2100 </p>
113 |                                  <button class="btn btn-info addItem" id="ring" data-dismiss="modal" aria-hidden="true">
114 |                                      <span class="glyphicon glyphicon-shopping-cart"></span>Add to Cart
115 |                                  </button>
116 |                             </div>
117 |                         </div>
118 |                     </div>                   
119 |                     <!--ITEM -->                  
120 |                   
121 |                 </div>
122 |             </div>
123 |             <hr>
124 |             <div class="container">
125 |                 <div class="row pull-right">
126 |                 <button type="button" class="btn btn-default" data-dismiss="modal" aria-hidden="true">Close</button>
127 |                 </div>
128 |             </div>
129 |            
130 |                 
131 |         </div>
132 |       </div>
133 |     </div>
134 |     </div>
135 |     <!-- /SHOPPING DIALOG -->
136 | 
137 | 
138 | 
139 | 
140 |     <!-- LOGIN DIALOG -->
141 |     <div class="modal fade" id="loginDlg" tabindex="-1" role="dialog" aria-labelledby="Login to Your Account" aria-hidden="true">
142 |     <div class="modal-dialog" style="width: 90%;">
143 |       <div class="modal-content">
144 |         <div class="modal-header">
145 |           <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
146 |           <h1 class="modal-title" id="dlgTitle">Login to Your Account</h1>
147 |         </div>
148 |         <div class="modal-body" id="dlgContent">
149 |             <div class="container">
150 |                 <div class="row">
151 |                     <div class="col-md-8">
152 |                     <h2>Online Fraud Detection Demo</h2> 
153 |                     <p>This site demonstrates how a website might interact with a fraud prediction score.  When the user purchases from this site, the transaction is sent first to SQL Server to predict a score.  When the score is high, the transaction has a high probability of being fraudulent.  </p>
154 |                     <p>After you click <code>OK</code> below, try adding some items to your cart and purchase them. 
155 |                      Then use the <code>Login to Your Account</code> button on the shopping page to return here to select a different account, and try the purchase again.</p>
156 |                     </div>
157 |                     <div class="col-md-4">
158 |                         <h2>Try an Account</h2>
159 |                          <p>Here are a few different accounts to try out for this demo.</p>
160 |                         <p>Select an account to try:&nbsp;
161 |                         <select  id=selAccount class="form-control form-group-lg form-control-inline">
162 |                         <option selected='y' value="A1055521358474530">A1055521358474530</option>
163 |                         <option value="A914800341525449">A914800341525449</option>
164 |                         <option value="A1688852355371910">A1688852355371910</option>
165 |                         <option value="A1688853633815290">A1688853633815290</option>
166 |                         </select>             
167 |                      </div>
168 |                     <p></p>
169 |                 </div>
170 |             </div>
171 |             <hr>
172 |             <div class="container">
173 |                 <div class="row pull-right">
174 |                 <button type="button" class="btn btn-default" data-dismiss="modal" aria-hidden="true">OK</button>
175 |                 </div>
176 |             </div>
177 |             
178 |                 
179 |         </div>
180 |       </div>
181 |     </div>
182 |     </div>
183 |     <!-- /LOGIN DIALOG -->
184 |     


--------------------------------------------------------------------------------
/Website/views/layouts/main.handlebars:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <meta name="description" content="">
 7 |     <meta name="author" content="">
 8 | 
 9 |     <title>Joseph's Mart</title>
10 | <!-- Bootstrap core CSS -->
11 | <link rel="stylesheet" href="css/bootstrap.min.css">
12 | 
13 | 
14 |     <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
15 |     <!--[if lt IE 9]>
16 |       <script src="../../assets/js/html5shiv.js"></script>
17 |       <script src="../../assets/js/respond.min.js"></script>
18 |     <![endif]-->
19 | 
20 |     <!-- Custom styles for this template -->
21 |     <link href="css/bootswatch.css" rel="stylesheet" type="text/css">   
22 |     <link href="css/myCSS.css" rel="stylesheet" type="text/css">
23 |     <script src="js/jquery-3.2.1.min.js"></script>
24 | 
25 |  <style>
26 |  #result {
27 | 	 color:#933;
28 | 	 font-weight:bolder;
29 |  }
30 |  #status {
31 |      text-align: center;
32 |  }
33 |  #resultArea {
34 |      text-align: center;
35 |  }
36 |  </style>
37 | </head>
38 | 
39 |   <body >
40 |   <div class="container">
41 |       <!-- NAVBAR  ================================================== -->
42 |     <img src="img/logo.gif">
43 |     <div class="navbar navbar-inverse">
44 |         <div class="navbar-header">
45 |         <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
46 |             <span class="sr-only">Toggle navigation</span>
47 |             <span class="icon-bar"></span>
48 |             <span class="icon-bar"></span>
49 |             <span class="icon-bar"></span>
50 |         </button>
51 |         <a class="navbar-brand" href="#">Online Shopping</a>
52 |         </div>
53 |         <div class="collapse navbar-collapse">
54 | 
55 |             <ul class="nav navbar-nav navbar-right">
56 |                 <li class="active"><a href="#contact">Shopping Cart</a></li>
57 |             </ul>
58 |         </div>
59 |       </div>
60 | <!-- /NAVBAR  ================================================== -->
61 | 
62 | 
63 |     {{{body}}}
64 |     
65 | 
66 | 
67 |       <!-- FOOTER -->
68 |       <div class="container">
69 |       <footer class="navbar ">
70 |           <p></p>
71 |           <hr/>
72 |         <p>Demo website for a fake company&middot; <a href="#">Privacy</a> &middot; <a href="#">Terms</a></p>
73 |       </footer>
74 |       </div>
75 |     </div>
76 |     <!-- /.container -->
77 | 
78 |     <!-- Bootstrap core JavaScript -->
79 |     <!-- Placed at the end of the document so the pages load faster -->
80 |     
81 | <script src="js/boostrap.min.js"></script>
82 | 
83 | <script type="text/javascript" src="js/scoreClaim.js"></script>
84 | <script type="text/javascript" src="js/startUp.js"></script> 
85 | </body>
86 | </html>


--------------------------------------------------------------------------------
/onlinefraud.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/onlinefraud.pbix


--------------------------------------------------------------------------------
/onlinefraudHDI.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/r-server-fraud-detection/4ff1bc6740a9384d80dbf3cc2c97e374a47248af/onlinefraudHDI.pbix


--------------------------------------------------------------------------------