├── DESCRIPTION
├── MD5
├── NAMESPACE
├── R
    ├── acquireAuditData.R
    ├── ada.R
    ├── ada_gui.R
    ├── appendLibLog.R
    ├── asRules.rpart.R
    ├── associate.R
    ├── biclust.R
    ├── binning.R
    ├── clara.R
    ├── cluster.R
    ├── comcat.R
    ├── ctree.R
    ├── data.R
    ├── errorMatrix.R
    ├── evaluate.R
    ├── ewkm.R
    ├── execute.R
    ├── executeBoxPlot2.R
    ├── executeExploreGGRaptR.R
    ├── executeHistPlot2.R
    ├── executeLogTab.R
    ├── executeModelAda.R
    ├── executeModelGlm.R
    ├── executeModelRF.R
    ├── executeModelRxBTrees.r
    ├── executeModelRxDForest.R
    ├── executeModelRxDTree.R
    ├── executeModelRxGlm.r
    ├── executeModelXGB.R
    ├── executePairsPlotSelect2.R
    ├── explore.R
    ├── export.R
    ├── fancyRpartPlot.R
    ├── ggVarImp.R
    ├── hclust.R
    ├── help.R
    ├── kmeans.R
    ├── loadLibs.R
    ├── loadTooltips.R
    ├── log.R
    ├── model.R
    ├── nnet.R
    ├── normVarNames.R
    ├── projects.R
    ├── psfchart.R
    ├── random_forest.R
    ├── rattle.R
    ├── rattleInfo.R
    ├── report.R
    ├── riskchart.R
    ├── rocChart.R
    ├── rpart.R
    ├── survival.R
    ├── test.R
    ├── textminer.R
    ├── textview.R
    ├── transform.R
    ├── unloadLibs.R
    ├── xgb.R
    ├── xgboostFormula.R
    └── zzz.R
├── build
    └── vignette.rds
├── data
    ├── audit.RData
    ├── locationsAUS.RData
    ├── weather.RData
    ├── weatherAUS.RData
    └── wine.RData
├── inst
    ├── CITATION
    ├── NEWS
    ├── arff
    │   ├── audit.arff
    │   └── weather.arff
    ├── csv
    │   ├── audit.csv
    │   ├── dvdtrans.csv
    │   └── weather.csv
    ├── doc
    │   ├── rattle.R
    │   ├── rattle.Rnw
    │   └── rattle.pdf
    ├── etc
    │   ├── Rlogo.png
    │   ├── gpl-license
    │   ├── rattle.glade
    │   ├── rattle.ui
    │   ├── rattle_macosx.ui
    │   ├── textviews.xml
    │   └── tooltips.xml
    ├── extdata
    │   └── audit.xlsx
    ├── odt
    │   └── data_summary.odt
    └── po
    │   ├── de
    │       └── LC_MESSAGES
    │       │   └── R-rattle.mo
    │   ├── es
    │       └── LC_MESSAGES
    │       │   └── R-rattle.mo
    │   ├── fr
    │       └── LC_MESSAGES
    │       │   └── R-rattle.mo
    │   ├── id
    │       └── LC_MESSAGES
    │       │   └── R-rattle.mo
    │   ├── ja
    │       └── LC_MESSAGES
    │       │   └── R-rattle.mo
    │   ├── no
    │       └── LC_MESSAGES
    │       │   └── R-rattle.mo
    │   └── zh_CN
    │       └── LC_MESSAGES
    │           └── R-rattle.mo
├── man
    ├── acquireAuditData.Rd
    ├── asRules.Rd
    ├── asRules.rpart.Rd
    ├── audit.Rd
    ├── binning.Rd
    ├── calcInitialDigitDistr.Rd
    ├── calculateAUC.Rd
    ├── centers.hclust.Rd
    ├── comcat.Rd
    ├── drawTreeNodes.Rd
    ├── drawTreesAda.Rd
    ├── errorMatrix.Rd
    ├── evaluateRisk.Rd
    ├── fancyRpartPlot.Rd
    ├── genPlotTitleCmd.Rd
    ├── ggVarImp.Rd
    ├── grouper.Rd
    ├── internal.Rd
    ├── listAdaVarsUsed.Rd
    ├── listTreesAda.Rd
    ├── listVersions.Rd
    ├── modalvalue.Rd
    ├── plotOptimalLine.Rd
    ├── plotRisk.Rd
    ├── printRandomForests.Rd
    ├── randomForest2Rules.Rd
    ├── rattle.Rd
    ├── rattle.print.summary.multinom.Rd
    ├── rattleInfo.Rd
    ├── riskchart.Rd
    ├── savePlotToFile.Rd
    ├── setupDataset.Rd
    ├── treeset.randomForest.Rd
    ├── weather.Rd
    ├── weatherAUS.Rd
    ├── whichNumerics.Rd
    └── wine.Rd
└── vignettes
    └── rattle.Rnw


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rattle
 2 | Type: Package
 3 | Title: Graphical User Interface for Data Science in R
 4 | Version: 5.5.1
 5 | Date: 2022-03-20
 6 | Authors@R: c(person("Graham", "Williams", 
 7 | 	   	    role=c("aut", "cph", "cre"),
 8 | 	            email="Graham.Williams@togaware.com"),
 9 | 	     person("Mark", "Vere Culp", role="cph"),
10 |              person("Ed", "Cox", role="ctb"), 
11 | 	     person("Anthony", "Nolan", role="ctb"),
12 | 	     person("Denis", "White", role="cph"),
13 | 	     person("Daniele", "Medri", role="ctb"),
14 | 	     person("Akbar", "Waljee", role="ctb", 
15 | 	     	    comment="OOB AUC for Random Forest"),
16 |              person("Brian", "Ripley", role="cph",
17 | 	            comment="print.summary.nnet"),
18 | 	     person("Jose", "Magana", role="ctb",
19 |                     comment="ggpairs plots"),
20 |              person("Surendra", "Tipparaju", role="ctb",
21 | 	            comment="initial RevoScaleR/XDF"),
22 |              person("Durga", "Prasad Chappidi", role="ctb",
23 | 	            comment="initial RevoScaleR/XDF"),
24 |              person("Dinesh", "Manyam Venkata", role="ctb",
25 | 	            comment="initial RevoScaleR/XDF"),
26 |              person("Mrinal", "Chakraborty", role="ctb",
27 | 	            comment="initial RevoScaleR/XDF"),
28 |              person("Fang", "Zhou", role="ctb",
29 | 	            comment="initial xgboost"),
30 |              person("Cameron", "Chisholm", role="ctb",
31 | 	            comment="risk plot on risk chart"))
32 | Depends: R (>= 3.5.0), tibble, bitops
33 | Imports: stats, utils, ggplot2, grDevices, graphics, magrittr, methods,
34 |         stringi, stringr, tidyr, dplyr, XML, rpart.plot
35 | Suggests: pmml (>= 1.2.13), colorspace, ada, amap, arules, arulesViz,
36 |         biclust, cairoDevice, cba, cluster, corrplot, descr, doBy,
37 |         e1071, ellipse, fBasics, foreign, fpc, gdata, ggdendro, gplots,
38 |         grid, gridExtra, gtools, Hmisc, janitor, kernlab, Matrix, mice,
39 |         nnet, party, plyr, psych, RGtk2, randomForest, RColorBrewer,
40 |         readxl, reshape, ROCR, RODBC, rpart, scales, SnowballC,
41 |         survival, timeDate, tm, xgboost
42 | Description: The R Analytic Tool To Learn Easily (Rattle) provides a 
43 |   collection of utilities functions for the data scientist. A
44 |   Gnome (RGtk2) based graphical interface is included with 
45 |   the aim to provide a simple and intuitive introduction to R 
46 |   for data science, allowing a user to quickly load data from a CSV file 
47 |   (or via ODBC), transform and explore the data, 
48 |   build and evaluate models, and export models as PMML (predictive
49 |   modelling markup language) or as scores. A key aspect of the GUI
50 |   is that all R commands are logged and commented through the log tab.
51 |   This can be saved as a standalone R script file and as
52 |   an aid for the user to 
53 |   learn R or to copy-and-paste directly into R itself.
54 |   Note that RGtk2 and cairoDevice have been archived on CRAN.
55 |   See <https://rattle.togaware.com> for installation instructions.
56 | License: GPL (>= 2)
57 | LazyLoad: yes
58 | LazyData: yes
59 | URL: https://rattle.togaware.com/
60 | NeedsCompilation: no
61 | Packaged: 2022-03-20 00:54:54 UTC; gjw
62 | Author: Graham Williams [aut, cph, cre],
63 |   Mark Vere Culp [cph],
64 |   Ed Cox [ctb],
65 |   Anthony Nolan [ctb],
66 |   Denis White [cph],
67 |   Daniele Medri [ctb],
68 |   Akbar Waljee [ctb] (OOB AUC for Random Forest),
69 |   Brian Ripley [cph] (print.summary.nnet),
70 |   Jose Magana [ctb] (ggpairs plots),
71 |   Surendra Tipparaju [ctb] (initial RevoScaleR/XDF),
72 |   Durga Prasad Chappidi [ctb] (initial RevoScaleR/XDF),
73 |   Dinesh Manyam Venkata [ctb] (initial RevoScaleR/XDF),
74 |   Mrinal Chakraborty [ctb] (initial RevoScaleR/XDF),
75 |   Fang Zhou [ctb] (initial xgboost),
76 |   Cameron Chisholm [ctb] (risk plot on risk chart)
77 | Maintainer: Graham Williams <Graham.Williams@togaware.com>
78 | Repository: CRAN
79 | Date/Publication: 2022-03-21 13:10:02 UTC
80 | 


--------------------------------------------------------------------------------
/MD5:
--------------------------------------------------------------------------------
  1 | dfde49a87bd8d229ac4b67c30ed5cb83 *DESCRIPTION
  2 | c65d174f5ca6dd0ff225e5fe1d189228 *NAMESPACE
  3 | 7150923073a603c1114d5f79c4c833bc *R/acquireAuditData.R
  4 | af4de58c7fee7bd5e3d4f5b359683d0a *R/ada.R
  5 | a563bb6edf034934e75f2885223f9663 *R/ada_gui.R
  6 | 6250a0cc8caa2346890b9871aeb5258b *R/appendLibLog.R
  7 | f13cbeb275e4a56c93eee9b96f14eaab *R/asRules.rpart.R
  8 | 00622b1bf950a37914a6761790eca6d7 *R/associate.R
  9 | 4ca37efa0619973496b4a89425d129dd *R/biclust.R
 10 | 35b51e36a0bff1343963391c9e4d69b0 *R/binning.R
 11 | 027d5c4a56fc74c6989fba15417e44a9 *R/clara.R
 12 | 69aded1b1021fac79a1114be2513fffa *R/cluster.R
 13 | d5310cdbd25ce45b7e723ae79a375277 *R/comcat.R
 14 | 4e90fe79e27d3b193b94461b4f9a01a7 *R/ctree.R
 15 | 405e9db3188bffa143b782eabea02c14 *R/data.R
 16 | f56a37bc43d7fc103d8805d9e60266c6 *R/errorMatrix.R
 17 | 032dd5633630e1862c5e3dc58e037a8f *R/evaluate.R
 18 | b22d75b6e5b04d59de03eaa79f8a2124 *R/ewkm.R
 19 | 45bf5b579f45c2214dc1596d8006765e *R/execute.R
 20 | 4f8b94bc38a249628cfa73aa55c0e4d6 *R/executeBoxPlot2.R
 21 | 40d678b83004a4e8439f79bf09271a90 *R/executeExploreGGRaptR.R
 22 | 7bb54b05b9081d5b7fa0e17a721db7fc *R/executeHistPlot2.R
 23 | ab1d71b812c5b403fd91d0259373ba9e *R/executeLogTab.R
 24 | 5f0572134fd0b9086acafed51477caf3 *R/executeModelAda.R
 25 | 9c66f88ae6ce325842942937dfcdaf41 *R/executeModelGlm.R
 26 | 6576b5d8f639e570ed5d76c13350e3a7 *R/executeModelRF.R
 27 | 7554b8864fbbbb5478f1c7591c87091f *R/executeModelRxBTrees.r
 28 | c25a6a6e2008d3bb96d072171ab86906 *R/executeModelRxDForest.R
 29 | 751dd36be37e49c314e94245dac18b79 *R/executeModelRxDTree.R
 30 | b04967b2339bc546d47b1e427e91d031 *R/executeModelRxGlm.r
 31 | 9d6a4f8979894a32ebc7cb1a181e9b05 *R/executeModelXGB.R
 32 | 0715f6946819c12b80888bf7bce83d41 *R/executePairsPlotSelect2.R
 33 | 7d9d7b0fc05c868182e09b0f4a31928b *R/explore.R
 34 | eb49e240027d0576ed599c5d87c515b8 *R/export.R
 35 | 887c99b05df4aa3af4fb34224b893537 *R/fancyRpartPlot.R
 36 | d75adcc7b03ec298fd468c409a6f3b44 *R/ggVarImp.R
 37 | e67b4ad415dc83bf9c94dcd68ebf5570 *R/hclust.R
 38 | f9059408ed78651ca318db13174c23c0 *R/help.R
 39 | 88fadcb1dc8d16d1b58c48f676d23648 *R/kmeans.R
 40 | f84fce3659bd1b80082c7ad3353f87b5 *R/loadLibs.R
 41 | ab71d282d13a6cf9078a306b63c6318f *R/loadTooltips.R
 42 | cc707ff9d9c132f04d6b444b9cdf5300 *R/log.R
 43 | 2aa8a9465bdbcfed998821e04005ede9 *R/model.R
 44 | 2fdd806efa77d6985bdf8883e1481d17 *R/nnet.R
 45 | ceafaa22c93f40fdbdf494c71bfaba22 *R/normVarNames.R
 46 | 12e4d56b8d2a89e568a5583980581afe *R/projects.R
 47 | 6b48c4bcc350deca1732d5eafb5302eb *R/psfchart.R
 48 | f3dbc0f3e5a897c94663210714aec205 *R/random_forest.R
 49 | d81b242b056dfd868ad361a1bf9d77de *R/rattle.R
 50 | 73e575345a55eb688e4ef3e4f1a76a75 *R/rattleInfo.R
 51 | 1287a0651ea622a1dba2c1ee85cfc502 *R/report.R
 52 | 00a8b93dcd5f84ef91528eac1aef53e5 *R/riskchart.R
 53 | b905bcf42c7d0d01b679913b3435b54b *R/rocChart.R
 54 | 6f3de3d34845405536a55cc850ecd70a *R/rpart.R
 55 | dc7925f6991d9bb6c7ae1545e4fad9ed *R/survival.R
 56 | eae67386ff40a92e679a370f4c0645ab *R/test.R
 57 | bbaccdfb16e5be19680e3b909a11dc70 *R/textminer.R
 58 | 168d61cb2e51eb392b58e469b3474705 *R/textview.R
 59 | e723b068b7aaf38721788132cc1a996d *R/transform.R
 60 | 637bc5dc741a58b20266569883f429dd *R/unloadLibs.R
 61 | 34b5ee0498869b617d68b994aef46797 *R/xgb.R
 62 | b908ee73a14897a69baa2568c1602bc7 *R/xgboostFormula.R
 63 | 70e0b3f9bffd8d1426d36fc9f5867e10 *R/zzz.R
 64 | 82e7b4e845baa408ac394a2612f0f446 *build/vignette.rds
 65 | 7d4cd53924a0de7e823fcab1f79eb9f3 *data/audit.RData
 66 | e61807f15364f42b919b7a7aac8bf50e *data/locationsAUS.RData
 67 | f4ee9d0d3a0e5cd54e7b247e6b293a67 *data/weather.RData
 68 | 223e0030ea8656c55213a3c4958f81cf *data/weatherAUS.RData
 69 | b238b94883a795bdb34beca7a3200109 *data/wine.RData
 70 | 07b206873b335f3b59e1b339946a101b *inst/CITATION
 71 | 9b0c1eea2aa96fa37b6936311ea39ba8 *inst/NEWS
 72 | 7bb0f7665aff9ba1cd4615a7b3130f04 *inst/arff/audit.arff
 73 | 8fcf1db9883917a9682d315200d75d3b *inst/arff/weather.arff
 74 | 074d01593d19414e3b04ed5e5540b697 *inst/csv/audit.csv
 75 | a2f3aac92bd6389bf4f6404fab1f25f9 *inst/csv/dvdtrans.csv
 76 | 42eb6df45078d0da4a619279f93e6cfb *inst/csv/weather.csv
 77 | eb51a60ca3f95d06477c88e4a9fee7f2 *inst/doc/rattle.R
 78 | 097dc50ec6cd37e023ded2a67d2e650a *inst/doc/rattle.Rnw
 79 | a3de9948a0b4e025a8e6a2aa246a9e92 *inst/doc/rattle.pdf
 80 | 7381224c65138a2acdf3a8346f8275c4 *inst/etc/Rlogo.png
 81 | 7d7c232d655fd1c91af00d34b00de5df *inst/etc/gpl-license
 82 | 7633af88abaa7b6df08c0895f08bd4bb *inst/etc/rattle.glade
 83 | 957f06081b8c8ed9840f9a52ab88ef09 *inst/etc/rattle.ui
 84 | 77d262eb3e1f817d9a1e97f9d644c9a9 *inst/etc/rattle_macosx.ui
 85 | 8cdab95c921b90ea2d12042fce84bc89 *inst/etc/textviews.xml
 86 | 4d22c4dd38e4afd82f3edcc697deb669 *inst/etc/tooltips.xml
 87 | 556dd7c7897ebe95cdf24c642639f9c9 *inst/extdata/audit.xlsx
 88 | 6a187fbea9822787879c33c899b4a679 *inst/odt/data_summary.odt
 89 | f7f970509860caafd2cd8af603186ecd *inst/po/de/LC_MESSAGES/R-rattle.mo
 90 | a9e5b6844d8ed5c9139c553a865f8572 *inst/po/es/LC_MESSAGES/R-rattle.mo
 91 | 755e570e0af45f1203f0846976dc2f2e *inst/po/fr/LC_MESSAGES/R-rattle.mo
 92 | 313b8ddd254a47c8b37b21fdcedb5b0a *inst/po/id/LC_MESSAGES/R-rattle.mo
 93 | 334e3682ee7587c54d737bda46151722 *inst/po/ja/LC_MESSAGES/R-rattle.mo
 94 | c02e1f16560a877a4d138c7f8ae60fe0 *inst/po/no/LC_MESSAGES/R-rattle.mo
 95 | 8898f72e15a7589f2828758bfb36f231 *inst/po/zh_CN/LC_MESSAGES/R-rattle.mo
 96 | 9438af1222ef0076dfe7747e33fb4996 *man/acquireAuditData.Rd
 97 | f98da230d559a00d983e1c5c3f1a2dfd *man/asRules.Rd
 98 | f4ccc84132a7ff6ade803da6d6f744d7 *man/asRules.rpart.Rd
 99 | 480efc0108c5ee421792bba2a19d12a4 *man/audit.Rd
100 | cc1f637becd94aa98175655ef7ca3500 *man/binning.Rd
101 | 2da47f56b82c0828936e9d92f28f7a92 *man/calcInitialDigitDistr.Rd
102 | 6ba7d6537415ff497777e4ff1e7360e0 *man/calculateAUC.Rd
103 | fd670c34ad0561b3b0108dfc9f0803b1 *man/centers.hclust.Rd
104 | 1a1f6e6d51246367a8ad79e775d9fb3f *man/comcat.Rd
105 | 9fb42f8c02fff5ae04f3ca38bc13e7fc *man/drawTreeNodes.Rd
106 | 4c7bb92a1f56761f2cfaf4e96def953c *man/drawTreesAda.Rd
107 | 6d2ec516d9a4fbd28218da1bfe97e94c *man/errorMatrix.Rd
108 | 38290cd7f879eceb2819a042dd156b8c *man/evaluateRisk.Rd
109 | 98a3f1e88289c663442312eb7666df51 *man/fancyRpartPlot.Rd
110 | 705ba3e6c5adf0311e8c51047bbaa786 *man/genPlotTitleCmd.Rd
111 | 53ca4d75c3cfaf9fd2d28d8d256f2921 *man/ggVarImp.Rd
112 | 2d1ce0ede5cd159ccbba46fa048ca62c *man/grouper.Rd
113 | 0001b09aaa6b511106242d26bb274132 *man/internal.Rd
114 | 63016b3c880bc270ab2ee753ea46bbfe *man/listAdaVarsUsed.Rd
115 | 40ea64e53023df0338c5581e3d8fe6b5 *man/listTreesAda.Rd
116 | 22fd24014699705d8183ddeb4d435f45 *man/listVersions.Rd
117 | eb1992d89f735861c40024bbb35efb63 *man/modalvalue.Rd
118 | 8338e98d812f479868a014df53f07460 *man/plotOptimalLine.Rd
119 | c4523d09988e2b0224e37c10f1b910a2 *man/plotRisk.Rd
120 | bf0c085ccb878f91d150d16399b258dc *man/printRandomForests.Rd
121 | 6c9885484c1360a411772d1b076a02f1 *man/randomForest2Rules.Rd
122 | eac079318b7a129830ad7e854489a4a7 *man/rattle.Rd
123 | 311fb9a850423fb53b797d85df0ae196 *man/rattle.print.summary.multinom.Rd
124 | 28f9ebcf0637ab7e101e72631db50571 *man/rattleInfo.Rd
125 | 956b40566e48f36850c8424bed8cd2a1 *man/riskchart.Rd
126 | 5d7020f0d56c4afea3cf7beb3335afe9 *man/savePlotToFile.Rd
127 | 57d7c001abceeeabffd0686e7763b282 *man/setupDataset.Rd
128 | 909e484a0497278ee205266ddee4c1bd *man/treeset.randomForest.Rd
129 | 2b539c44caa6c21d76c4c2243ffaf3f2 *man/weather.Rd
130 | 30d907ae3dbb53cfe6734f719c759214 *man/weatherAUS.Rd
131 | 881626c19b22ed6589d33ab010c82560 *man/whichNumerics.Rd
132 | be3dda89cea8d00f6502bbaff814778c *man/wine.Rd
133 | 097dc50ec6cd37e023ded2a67d2e650a *vignettes/rattle.Rnw
134 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | export(
 2 |     acquireAuditData,
 3 |     asRules,
 4 |     benfordDistr,
 5 |     binning,
 6 |     calcInitialDigitDistr,
 7 |     calculateAUC,
 8 |     centers.hclust,
 9 |     comcat,
10 |     copyPlotToClipboard,
11 |     digitDistr,
12 |     drawTreesAda,
13 |     drawTreeNodes,
14 |     errorMatrix,
15 |     evaluateRisk,
16 |     fancyRpartPlot,
17 |     generateAprioriSummary,
18 |     genPlotTitleCmd,
19 |     ggVarImp,
20 |     importance,
21 |     listAdaVarsUsed,
22 |     listTreesAda,
23 |     listVersions,
24 |     modalvalue,
25 |     normVarNames,
26 |     plotDigitFreq,
27 |     plotOptimalLine,
28 |     plotRisk,
29 |     predict.kmeans,
30 |     predict.hclust,
31 |     printPlot,
32 |     printRandomForests,
33 |     print.summary.nnet,
34 |     psfchart,
35 |     randomForest2Rules,
36 |     rattle,
37 |     rattleInfo,
38 |     rattle.print.summary.multinom,
39 |     rescale.by.group,
40 |     riskchart,
41 |     rocChart,
42 |     # 120117 Remove for now - could be harmful.
43 |     # overwritePackageFunction,
44 |     savePlotToFile,
45 |     setupDataset,
46 |     toga,
47 |     treeset.randomForest,
48 |     whichNumerics,
49 |     xgboost,
50 |     crs,crv
51 |     )
52 | 
53 | exportPattern("_") # Needed for the Glade interface
54 | 
55 | S3method(asRules, rpart)
56 | S3method(ggVarImp, randomForest)
57 | S3method(ggVarImp, rpart)
58 | S3method(ggVarImp, rxDForest)
59 | S3method(ggVarImp, xgb.Booster)
60 | S3method(ggVarImp, xgb.formula)
61 | S3method(predict, xgb.formula)
62 | S3method(print, xgb.formula)
63 | S3method(importance, xgb.formula)
64 | S3method(xgboost, formula)
65 | S3method(predict, hclust)
66 | S3method(predict, kmeans)
67 | S3method(print.summary, nnet)
68 | 
69 | #import(RGtk2) # Not required but will be used if available.
70 | import(stats)
71 | import(utils)
72 | import(grDevices)
73 | import(graphics)
74 | import(methods)	  # For possibleExtends() formal classes
75 | import(tibble)
76 | 
77 | importFrom(magrittr, "%>%")
78 | importFrom(magrittr, "%<>%")
79 | importFrom(stringi, "%s+%")
80 | 
81 | importFrom(bitops, cksum)
82 | 


--------------------------------------------------------------------------------
/R/acquireAuditData.R:
--------------------------------------------------------------------------------
  1 | # Rattle: A GUI for Data Mining in R
  2 | #
  3 | # AUDIT DATASET
  4 | #
  5 | # Time-stamp: <Wednesday 2021-04-21 13:17:11 AEST Graham Williams>
  6 | #
  7 | # Copyright (c) 2009-2014 Togaware Pty Ltd
  8 | #
  9 | # This file is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | #
 24 | ########################################################################
 25 | #
 26 | # Generate an audit dataset that is fictional but illustrates the typcial
 27 | # financial audit.
 28 | 
 29 | acquireAuditData <- function(write.to.file=FALSE)
 30 | {
 31 |   if (!file.exists('survey.csv'))
 32 |   {
 33 |     UCI <- "https://archive.ics.uci.edu/ml"
 34 |     REPOS <- "machine-learning-databases"
 35 |     survey.url <- sprintf("%s/%s/adult/adult.data", UCI, REPOS)
 36 |     download.file(survey.url, "survey.data")
 37 |     survey <- read.csv("survey.data", header=F, strip.white=TRUE,
 38 |                        na.strings="?",
 39 |                        col.names=c("Age", "Workclass", "fnlwgt", 
 40 |                          "Education", "Education.Num", "Marital.Status", 
 41 |                          "Occupation", "Relationship", "Race", "Gender", 
 42 |                          "Capital.Gain", "Capital.Loss", 
 43 |                          "Hours.Per.Week", "Native.Country", 
 44 |                          "Salary.Group"))
 45 |     write.table(survey, "survey.csv", sep=",", row.names=F)
 46 |   }
 47 | 
 48 |   survey <- read.csv("survey.csv")
 49 | 
 50 |   audit <- survey[,c(1,2,4,6,7,8,10,12,13,14,11,15)]
 51 | 
 52 |   colnames(audit)[2] <- "Employment"
 53 |   colnames(audit)[4] <- "Marital"
 54 |   colnames(audit)[6] <- "Income"
 55 |   colnames(audit)[8] <- "Deductions"
 56 |   colnames(audit)[9] <- "Hours"
 57 |   colnames(audit)[10] <- "Accounts"
 58 |   colnames(audit)[11] <- "Adjustment"
 59 |   colnames(audit)[12] <- "Adjusted"
 60 |   
 61 |   audit$Adjusted <- as.integer(audit$Adjusted)-1
 62 | 
 63 |   # Make sure most productive cases have an adjustment
 64 | 
 65 |   adj <- audit[audit$Adjusted==0 & audit$Adjustment != 0, 'Adjustment']
 66 |   a <- length(adj)
 67 |   m <- length(audit[audit$Adjusted==1 & audit$Adjustment==0,'Adjusted'])
 68 |   r <- m%/%a*a
 69 | 
 70 |   set.seed(12345)
 71 |   audit[audit$Adjusted==1 & audit$Adjustment==0, 'Adjustment'][sample(m, r)] <-
 72 |     as.integer(adj*(rnorm(r) + 2))
 73 | 
 74 |   # Make sure no nonproductive case has an adjustment
 75 | 
 76 |   audit[audit$Adjusted==0 & audit$Adjustment!=0,'Adjustment'] <- 0
 77 | 
 78 |   # Tidyup ForeignAccounts
 79 | 
 80 |   levels(audit$Accounts)[6] <- "NewZealand"
 81 |   levels(audit$Accounts)[8] <- "Singapore"
 82 |   levels(audit$Accounts)[15] <- "Holand"
 83 |   levels(audit$Accounts)[28] <- "Fiji"
 84 |   levels(audit$Accounts)[33] <- "Malaysia"
 85 |   levels(audit$Accounts)[35] <- "Vietnam"
 86 |   levels(audit$Accounts)[38] <- "Indonesia"
 87 |   levels(audit$Accounts)[39] <- "UnitedStates"
 88 | 
 89 |   # Tidyup Employment
 90 | 
 91 |   levels(audit$Employment)[1] <- "PSFederal"
 92 |   levels(audit$Employment)[2] <- "PSLocal"
 93 |   levels(audit$Employment)[3] <- "Unemployed"
 94 |   levels(audit$Employment)[5] <- "SelfEmp"
 95 |   levels(audit$Employment)[6] <- "Consultant"
 96 |   levels(audit$Employment)[7] <- "PSState"
 97 |   levels(audit$Employment)[8] <- "Volunteer"
 98 |   
 99 |   # Tidyup Marital
100 |   
101 |   levels(audit$Marital)[2] <- "Married"
102 |   levels(audit$Marital)[3] <- "Married"
103 |   levels(audit$Marital)[4] <- "Absent"
104 |   levels(audit$Marital)[5] <- "Unmarried"
105 |   
106 |   # Tidyup Occupation
107 |   
108 |   levels(audit$Occupation)[1] <- "Clerical"
109 |   levels(audit$Occupation)[2] <- "Military"
110 |   levels(audit$Occupation)[3] <- "Repair"
111 |   levels(audit$Occupation)[4] <- "Executive"
112 |   levels(audit$Occupation)[5] <- "Farming"
113 |   levels(audit$Occupation)[6] <- "Cleaner"
114 |   levels(audit$Occupation)[7] <- "Machinist"
115 |   levels(audit$Occupation)[8] <- "Service"
116 |   levels(audit$Occupation)[9] <- "Home"
117 |   levels(audit$Occupation)[10] <- "Professional"
118 |   levels(audit$Occupation)[11] <- "Protective"
119 |   levels(audit$Occupation)[12] <- "Sales"
120 |   levels(audit$Occupation)[13] <- "Support"
121 |   levels(audit$Occupation)[14] <- "Transport"
122 |   
123 |   levels(audit$Education)[1] <- "Yr10"
124 |   levels(audit$Education)[2] <- "Yr11"
125 |   levels(audit$Education)[3] <- "Yr12"
126 |   levels(audit$Education)[4] <- "Yr1t4"
127 |   levels(audit$Education)[5] <- "Yr5t6"
128 |   levels(audit$Education)[6] <- "Yr7t8"
129 |   levels(audit$Education)[7] <- "Yr9"
130 |   levels(audit$Education)[8] <- "Associate"
131 |   levels(audit$Education)[9] <- "Vocational"
132 |   levels(audit$Education)[10] <- "Bachelor"
133 |   levels(audit$Education)[11] <- "Doctorate"
134 |   levels(audit$Education)[12] <- "HSgrad"
135 |   levels(audit$Education)[13] <- "Master"
136 |   levels(audit$Education)[14] <- "Preschool"
137 |   levels(audit$Education)[15] <- "Professional"
138 |   levels(audit$Education)[16] <- "College"
139 |   
140 |   # Turn Relationship into Income
141 |   
142 |   set.seed(12345)
143 |   audit$Income <- round(abs(as.numeric(audit$Income)*rnorm(length(audit$Income),
144 |                                                            35000, 15000)), 2)
145 | 
146 |   # Make deductions look more 0 for the non-productive cases!
147 | 
148 |   audit[audit$Adjusted==0,'Deductions'] <-
149 |     audit[audit$Adjusted==0,'Deductions']/1.5
150 | 
151 |   # Sample just 2000 cases and add an Identifier - always the same
152 | 
153 |   set.seed(12345)
154 |   cases <- sample(nrow(audit), 2000)
155 |   set.seed(12345)
156 |   idents <- as.integer(sort(runif(2000, 1000000, 9999999)))
157 |   audit <- cbind(ID=idents, audit[cases,])
158 |   
159 |   # Use standard prefixes
160 |   
161 |   colnames(audit)[11] <- "IGNORE_Accounts" # randomForest can't handle
162 |   colnames(audit)[12] <- "RISK_Adjustment" 
163 |   colnames(audit)[13] <- "TARGET_Adjusted"
164 | 
165 |   audit.orig <- audit
166 |   
167 |   # Write out the data
168 |   
169 |   if (write.to.file) 
170 |   {
171 |     audit <- read.csv("audit.csv")
172 |     save(audit, file="audit.RData", compress=TRUE)
173 |     write.table(audit, "audit.csv", sep=",", row.names=FALSE)
174 |   
175 |     arff <- audit
176 |     arff$TARGET_Adjusted <- as.factor(arff$TARGET_Adjusted)
177 |     if (write.to.file) foreign::write.arff(arff, "audit.arff")
178 |   
179 |     # Create a dataset with special variable names.
180 |     # 080709 I now do this as default.
181 |   
182 |     # colnames(audit)[11] <- "IGNORE_Accounts"
183 |     # colnames(audit)[12] <- "RISK_Adjustment"
184 |     # write.table(audit, "audit_auto.csv", sep=",", row.names=FALSE)
185 |   
186 |     # Create a dataset with many more missing values.
187 |   
188 |     mr <- sample(1:nrow(audit), nrow(audit)/4, replace=TRUE)
189 |     mc <- sample(2:(ncol(audit)-1), nrow(audit)/4, replace=TRUE)
190 |   
191 |     for (i in 1:(nrow(audit)/4))
192 |     {
193 |       audit[mr[i], mc[i]] <- NA
194 |     }
195 |     write.table(audit, "audit_missing.csv", sep=",", row.names=FALSE)
196 |   }
197 |   if (write.to.file)
198 |     invisible(audit.orig)
199 |   else
200 |     return(audit.orig)
201 | }
202 | 
203 | 
204 | 


--------------------------------------------------------------------------------
/R/appendLibLog.R:
--------------------------------------------------------------------------------
 1 | #' Append a command to the Log tab dealing with namespaces
 2 | #'
 3 | #' Time-stamp: <2016-09-19 11:30:05 Graham Williams>
 4 | #'
 5 | #' @param comment      A message to include as a comment.
 6 | #' @param ...          The command(s) to report in the log.
 7 | #' @param include.libs Include any required library() calls.
 8 | #'
 9 | #' Report a command to the rattle Log tab textview. We check the
10 | #' commands for any namespace usage and then include an appropriate
11 | #' library() call for each and remove them from the commands
12 | #' themselves.
13 | #'
14 | #' Each command will be printed on a new line.
15 | #' 
16 | appendLibLog <- function(comment, ..., include.libs=TRUE)
17 | {
18 |   ## 160722 PLEASE NOTE
19 |   #
20 |   # I decided to revert to using appendLog() and exposing the ::
21 |   # operator since I do so in my book and users can get familiar with
22 |   # it and choose, but it is also more succint.
23 |   #
24 |   appendLog(comment, ...)
25 |   return()
26 |   ## 150828
27 |   # This started as the old appendLog but with a simplified parameter
28 |   # list and added in the extraction of namespaces and then rewrite
29 |   # the commands to not include the namespace.
30 | 
31 |   # Only continue if this is called from inside Rattle.
32 |   
33 |   if (is.null(crv$rattleGUI)) return()
34 | 
35 |   # Identify namespace string and namespace string with function.
36 |   
37 |   ns  <- '([a-zA-Z0-9_\\.]+)::'
38 |   nsf <- stringr::str_c(ns, '([a-zA-Z0-9_\\.]+)')
39 |   
40 |   cmds <-
41 |     list(...) %>%
42 |     unlist() %>%
43 |     stringr::str_c(collapse="\n")
44 | 
45 |   libs <-
46 |     cmds %>%
47 |     stringr::str_extract_all(nsf) %>%
48 |     unlist() %>%
49 |     unique() %>%
50 |     stringr::str_split('::') %>%
51 |     unlist()
52 | 
53 |   # 150917 Keep make check quiet....
54 |   pkg <- fun <- funs <- "." <- NULL
55 |   
56 |   if (is.null(libs))
57 |     include.libs <- FALSE
58 |   else
59 |     libs %<>%
60 |       matrix(, ncol=2, byrow=TRUE) %>%
61 |       data.frame(stringsAsFactors=FALSE) %>%
62 |       magrittr::set_names(c("pkg", "fun")) %>%
63 |       dplyr::group_by(pkg) %>%
64 |       dplyr::summarise(funs=paste(fun, collapse="(), ")) %>%
65 |       dplyr::group_by(pkg) %>%
66 |       dplyr::summarise(cmd=sprintf("library(%s) # Provides %s().", pkg, funs)) %>%
67 |       magrittr::extract2(2) %>%
68 |       stringr::str_c(collapse="\n")
69 | 
70 |   cmds %<>%
71 |     stringr::str_replace_all(ns, "")
72 | 
73 |   msg <-
74 |     (if (include.libs) libs %s+% "\n\n" else "") %s+%
75 |     cmds %>%
76 |     paste(sep="", crv$start.log.comment, comment, crv$end.log.comment, .)
77 | 
78 |   # Always place the text at the end of the Log tab textview
79 |   # irrespective of where the cursor is.
80 | 
81 |   log.buf <-
82 |     theWidget("log_textview") $
83 |     getBuffer()
84 |   
85 |   location <-
86 |     log.buf $
87 |     getEndIter() $
88 |     iter
89 |   
90 |   log.buf $ insert(location, msg)
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/R/asRules.rpart.R:
--------------------------------------------------------------------------------
 1 | # Rattle: A GUI for Data Mining in R
 2 | #
 3 | # RPART RULES
 4 | #
 5 | # Time-stamp: <2020-05-13 11:42:26 Graham Williams>
 6 | #
 7 | # Copyright (c) 2009-2014 Togaware Pty Ltd
 8 | #
 9 | # This files is part of Rattle.
10 | #
11 | # Rattle is free software: you can redistribute it and/or modify it
12 | # under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 2 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Rattle is distributed in the hope that it will be useful, but
17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 | # General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | asRules <- function(model, compact=FALSE, ...) UseMethod("asRules")
25 | 
26 | asRules.rpart <- function(model, compact=FALSE, classes=NULL, ...)
27 | {
28 |   if (!inherits(model, "rpart")) stop(Rtxt("Not a legitimate rpart tree"))
29 |   # if (model$method != "class")) stop("Model method needs to be class")
30 |   #
31 |   # Get some information.
32 |   #
33 |   rtree <- length(attr(model, "ylevels")) == 0
34 |   target <- as.character(attr(model$terms, "variables")[2])
35 |   frm <- model$frame
36 |   names <- row.names(frm)
37 |   ylevels <- attr(model, "ylevels")
38 |   ds.size <-  model$frame[1,]$n
39 |   #
40 |   # Print each leaf node as a rule.
41 |   #
42 |   if (rtree)
43 |     # Sort rules by coverage
44 |     ordered <- rev(sort(frm$n, index=TRUE)$ix)
45 |   else
46 |     # Sort rules by probabilty of second class (usually the last in binary class)
47 |     ordered <- rev(sort(frm$yval2[,5], index=TRUE)$ix)
48 |   for (i in ordered)
49 |   {
50 |     if (frm[i,1] == "<leaf>")
51 |     {
52 |       # The following [,5] is hardwired and works on one example....
53 |       if (rtree)
54 |         yval <- frm[i,]$yval
55 |       else
56 |         yval <- ylevels[frm[i,]$yval]
57 |       if (is.null(classes) || yval %in% classes)
58 |       {
59 |         cover <- frm[i,]$n
60 |         pcover <- round(100*cover/ds.size)
61 |         if (! rtree) prob <- frm[i,]$yval2[,5]
62 |         cat("\n")
63 |         pth <- rpart::path.rpart(model, nodes=as.numeric(names[i]), print.it=FALSE)
64 |         pth <- unlist(pth)[-1]
65 |         if (! length(pth)) pth <- "True"
66 |         if (compact)
67 |         {
68 |           cat(sprintf("R%03s ", names[i]))
69 |           if (rtree)
70 |             cat(sprintf("[%2.0f%%,%0.2f]", pcover, prob))
71 |           else
72 |             cat(sprintf("[%2.0f%%,%0.2f]", pcover, prob))
73 |           cat(sprintf(" %s", pth), sep="")
74 |         }
75 |         else
76 |         {
77 |           cat(sprintf(Rtxt(" Rule number: %s "), names[i]))
78 |           if (rtree)
79 |             cat(sprintf("[%s=%s cover=%d (%.0f%%)]\n",
80 |                         target, yval, cover, pcover))
81 |           else
82 |             cat(sprintf("[%s=%s cover=%d (%.0f%%) prob=%0.2f]\n",
83 |                         target, yval, cover, pcover, prob))
84 |           cat(sprintf("   %s\n", pth), sep="")
85 |         }
86 |       }
87 |     }
88 |   }
89 |   cat("\n")
90 |   invisible(ordered)
91 | }
92 | 


--------------------------------------------------------------------------------
/R/biclust.R:
--------------------------------------------------------------------------------
  1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  2 | #
  3 | # Time-stamp: <2017-09-10 10:08:08 Graham Williams>
  4 | #
  5 | # Implement biclust functionality.
  6 | #
  7 | # Copyright (c) 2010 Togaware Pty Ltd
  8 | #
  9 | # This files is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | ########################################################################
 25 | # ToDo 100121
 26 | #
 27 | # Graphical display of output.
 28 | # Allow choice of methods and options
 29 | 
 30 | ########################################################################
 31 | # Callbacks
 32 | 
 33 | # When a radio button is selected, display the appropriate tab page.
 34 | 
 35 | on_biclust_radiobutton_toggled <- function(button)
 36 | {
 37 |   if (button$getActive())
 38 |     crv$CLUSTER$setCurrentPage(crv$CLUSTER.BICLUST.TAB)
 39 |   setStatusBar()
 40 | }
 41 | 
 42 | ########################################################################
 43 | # Execution
 44 | 
 45 | executeClusterBiclust <- function(include)
 46 | {
 47 |   TV <- "biclust_textview"
 48 |   sampling  <- not.null(crs$train)
 49 | 
 50 |   # Obtain interface information.
 51 | 
 52 |   method <- "BCCC"
 53 |   seed <- "crv$seed"
 54 | 
 55 |   # Start the log.
 56 |   
 57 |   startLog(commonName(crv$BICLUST))
 58 | 
 59 |   # Load the required package.
 60 |   
 61 |   lib.cmd <- "library(biclust, quietly=TRUE)"
 62 |   if (! packageIsAvailable("biclust", Rtxt("perform bicluster analysis"))) return(FALSE)
 63 |   appendLog(packageProvides('biclust', 'biclust'), lib.cmd)
 64 |   eval(parse(text=lib.cmd))
 65 | 
 66 |   # Set the seed so we can repeat.
 67 | 
 68 |   seed.cmd <- sprintf('set.seed(%s)', seed)
 69 |   appendLog(Rtxt("Reset the random number seed to obtain the same results each time."),
 70 |             seed.cmd)
 71 |   eval(parse(text=seed.cmd))
 72 | 
 73 |   # Build the model.
 74 |   
 75 |   biclust.cmd <- sprintf(paste('crs$biclust <- biclust(',
 76 |                                'as.matrix(na.omit(crs$dataset[%s, %s])),',
 77 |                                'method=%s)', sep=""),
 78 |                          ifelse(sampling, "crs$train", ""),
 79 |                          include, method)
 80 | 
 81 |   appendLog(sprintf(Rtxt("Generate %s using method '%s'."),
 82 |                     commonName(crv$BICLUST), method),
 83 |             biclust.cmd)
 84 | 
 85 |   start.time <- Sys.time()
 86 | 
 87 |   result <- try(eval(parse(text=biclust.cmd)), TRUE)
 88 |   time.taken <- Sys.time()-start.time
 89 | 
 90 |   # Check for errors.
 91 |   
 92 |   if (inherits(result, "try-error"))
 93 |   {
 94 |     errorDialog(errorMessageFun("biclust", result))
 95 |     return(FALSE)
 96 |   }
 97 | 
 98 |   # Show the results.
 99 | 
100 |   print.cmd <- "print(crs$biclust)"
101 |   
102 |   appendLog(sprintf(Rtxt("Generate a textual view of the %s model."),
103 |                     commonName(crv$BICLUST)),
104 |             print.cmd)
105 |   
106 |   resetTextview(TV)
107 |   setTextview(TV,
108 |               sprintf(Rtxt("Summary of the %s model (built using '%s'):"),
109 |                       commonName(crv$BICLUST), "biclust"),
110 |               "\n",
111 |               collectOutput(print.cmd))
112 | 
113 |   reportTimeTaken(TV, time.taken, model=commonName(crv$BICLUST))
114 | 
115 |   return(TRUE)
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/R/binning.R:
--------------------------------------------------------------------------------
  1 | # Rattle: A GUI for Data Mining in R
  2 | #
  3 | # BIN DATA
  4 | #
  5 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  6 | #
  7 | # Time-stamp: <2014-09-05 21:27:32 gjw>
  8 | #
  9 | # Copyright (c) 2009-2014 Togaware Pty Ltd
 10 | #
 11 | # This files is part of Rattle.
 12 | #
 13 | # Rattle is free software: you can redistribute it and/or modify it
 14 | # under the terms of the GNU General Public License as published by
 15 | # the Free Software Foundation, either version 2 of the License, or
 16 | # (at your option) any later version.
 17 | #
 18 | # Rattle is distributed in the hope that it will be useful, but
 19 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU General Public License
 24 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 25 | #
 26 | #-----------------------------------------------------------------------
 27 | #
 28 | # 070131 From Daniele Medri.
 29 | # 111025 Support wtd.quantiles suggested by Brenton R. Stone.
 30 | 
 31 | binning <- function (x, bins=4,
 32 |                      method=c("quantile", "wtd.quantile", "kmeans"),
 33 |                      labels=NULL, ordered=TRUE,
 34 |                      weights=NULL)
 35 | {
 36 |   # Set ordered to FALSE in Rattle since randomForests don't work when
 37 |   # the factor is ordered, for some reason (080406).
 38 |   
 39 |   # Best k for natural breaks
 40 | 
 41 |   varkmeans <- function (x, centers, iter.max=10, num.seeds=bins)
 42 |   {
 43 |     if (mode(x) == "numeric")
 44 |     {
 45 |       x <- data.frame(new.x=x)
 46 |     }
 47 |     KM <- kmeans(x=x, centers=centers, iter.max=iter.max)
 48 |     for (i in seq_len(num.seeds))
 49 |     {
 50 |       newKM <- kmeans(x=x, centers=centers, iter.max=iter.max)
 51 |       if (sum(newKM$withinss) < sum(KM$withinss))
 52 |       {
 53 |         KM <- newKM
 54 |       }
 55 |     }
 56 |     KM$tot.withinss <- sum(KM$withinss)
 57 |     xmean <- apply(x, 2, mean)
 58 |     centers <- rbind(KM$centers, xmean)
 59 |     bss1 <- as.matrix(dist(centers)^2)
 60 |     KM$betweenss <- sum(as.vector(bss1[nrow(bss1), ]) * c(KM$size, 0))
 61 |     return(KM)
 62 |   }
 63 | 
 64 |   method <- match.arg(method)
 65 |   if(is.factor(x)) stop(Rtxt("This variable is already a factor."))
 66 |   if (is.data.frame(x)) stop(Rtxt("An object of class data.frame is required."))
 67 |   if (length(x) < bins) stop(Rtxt("There are more bins than observations."))
 68 |   if (method == "wtd.quantile" &&
 69 |       ! packageIsAvailable("Hmisc", Rtxt("weighted quantile binning")))
 70 |     stop(Rtxt("wtd.quantile requires the Hmisc package."))
 71 |   
 72 |   # Binning
 73 | 
 74 |   x <- if (method == "quantile")
 75 |   {
 76 |     breaks <- c(quantile(x, probs = seq(0, 1, 1/bins), na.rm = TRUE, type=8))
 77 |     breaks <- unique(breaks)
 78 |     breaks[1] <- min(x, na.rm=TRUE)
 79 |     breaks[length(breaks)] <- max(x, na.rm=TRUE)
 80 |     # quantiles from quantile() can be non-unique, which cut() doesn't
 81 |     # like. This is handled above through unique(). The function
 82 |     # cut2() in Hmisc handles this situation gracefully and it could
 83 |     # be used, but it is not necessary.
 84 |     if(length(breaks) >= 2)
 85 |     {
 86 |       cut(x, breaks, include.lowest = TRUE, labels = labels)
 87 |     }
 88 |     else
 89 |     {
 90 |       cat(Rtxt("Warning: the variable is not considered.\n"))
 91 |       return(NULL)
 92 |     }
 93 |   }
 94 |   else if (method == "wtd.quantile")
 95 |   {
 96 |     breaks <- c(Hmisc::wtd.quantile(x, weights=weights, probs=seq(0, 1, 1/bins),
 97 |                                     na.rm=TRUE, type="quantile"))
 98 |     breaks <- unique(breaks)
 99 |     breaks[1] <- min(x, na.rm=TRUE)
100 |     breaks[length(breaks)] <- max(x, na.rm=TRUE)
101 |     # quantiles from quantile() can be non-unique, which cut() doesn't
102 |     # like. This is handled above through unique(). The function
103 |     # cut2() in Hmisc handles this situation gracefully and it could
104 |     # be used, but it is not necessary.
105 |     if(length(breaks) >= 2)
106 |     {
107 |       cut(x, breaks, include.lowest = TRUE, labels = labels)
108 |     }
109 |     else
110 |     {
111 |       cat(Rtxt("Warning: the variable is not considered.\n"))
112 |       return(NULL)
113 |     }
114 |   }
115 |   else if (method == "kmeans")
116 |   {
117 |     xx <- na.omit(x)
118 |     maxbins <-nlevels(as.factor(xx))
119 |     if(maxbins < bins)
120 |     { 
121 |       bins <-maxbins
122 |     }
123 |     breaks <- c(min(xx), tapply(xx, varkmeans(xx, bins)$cluster, max))
124 |     if (length(unique(breaks)) >= 2)
125 |     {
126 |       cut(x, unique(breaks), include.lowest = TRUE, labels = labels)	
127 |     }
128 |     else
129 |     {
130 |       cat(Rtxt("Warning: the variable is not considered.\n"))
131 |       return(NULL)	
132 |     }
133 |   }
134 | 
135 |   if(ordered == TRUE)
136 |     result <- ordered(factor(x))
137 |   else
138 |     result <- factor(x)
139 | 
140 |   attr(result, "breaks") <- breaks
141 |   return(result)
142 | }
143 | 


--------------------------------------------------------------------------------
/R/clara.R:
--------------------------------------------------------------------------------
 1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
 2 | #
 3 | # Time-stamp: <2011-06-23 21:17:19 Graham Williams>
 4 | #
 5 | # Implement biclust functionality.
 6 | #
 7 | # Copyright (c) 2010 Togaware Pty Ltd
 8 | #
 9 | # This files is part of Rattle.
10 | #
11 | # Rattle is free software: you can redistribute it and/or modify it
12 | # under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 2 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Rattle is distributed in the hope that it will be useful, but
17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 | # General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | ########################################################################
25 | # ToDo 100121
26 | #
27 | # Execute.
28 | # Graphical display of output.
29 | # Allow choice of methods.
30 | 
31 | ########################################################################
32 | # Callbacks
33 | 
34 | # When a radio button is selected, display the appropriate tab page.
35 | 
36 | on_clara_radiobutton_toggled <- function(button)
37 | {
38 |   if (button$getActive())
39 |     crv$CLUSTER$setCurrentPage(crv$CLUSTER.CLARA.TAB)
40 |   setStatusBar()
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/R/cluster.R:
--------------------------------------------------------------------------------
  1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  2 | #
  3 | # Time-stamp: <2014-07-18 15:08:01 gjw>
  4 | #
  5 | # Implement cluster functionality.
  6 | #
  7 | # Copyright (c) 2009 Togaware Pty Ltd
  8 | #
  9 | # This files is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | ########################################################################
 25 | # EXECUTION
 26 | 
 27 | executeClusterTab <- function()
 28 | {
 29 |   # Can not cluster without a dataset.
 30 | 
 31 |   if (noDatasetLoaded()) return()
 32 | 
 33 |   # If it looks like the VARIABLES page has not been executed, complain..
 34 | 
 35 |   if (variablesHaveChanged(Rtxt("building clusters"))) return()
 36 | 
 37 |   # Check if sampling needs executing.
 38 | 
 39 |   if (sampleNeedsExecute()) return()
 40 | 
 41 |   # 091216 Automatically handle any selected categorics by converting
 42 |   # them to numeric, so they then become included variables. This
 43 |   # works, but it risks suprising the user with hte addition of new
 44 |   # variables outside their control. So let's leave it to the user to
 45 |   # do the transforms, or use clara.
 46 | 
 47 |   # factors <- crs$input[sapply(crs$input, function(x)
 48 |   #                             is.factor(crs$dataset[[x]]))]
 49 |   # sapply(factors, executeTransformRemapPerform, action="indicator",
 50 |   #        remap.prefix="TIN")
 51 |   
 52 |   # Kmeans and hclust only work for numeric data, so identify
 53 |   # variables to include.  Only work with the INPUT/TARGET/RISK
 54 |   # variables. That is, only exclude the IGNORE and IDENT variables.
 55 | 
 56 |   include <- "crs$numeric" # 20110102 getNumericVariables()
 57 |   if (! length(include))
 58 |   {
 59 |     errorDialog(Rtxt("Clusters are currently calculated only for numeric data.",
 60 |                      "No numeric variables were found in the dataset",
 61 |                      "from amongst those having an input/target/risk role."))
 62 |     return()
 63 |   }
 64 | 
 65 |   # Dispatch.
 66 | 
 67 |   if (theWidget("kmeans_radiobutton")$getActive())
 68 |   {
 69 |     if (executeClusterKMeans(include))
 70 |       theWidget("evaluate_kmeans_checkbutton")$setActive(TRUE)
 71 |   }
 72 |   else if (theWidget("ewkm_radiobutton")$getActive())
 73 |   {
 74 |     if (executeClusterEwkm(include))
 75 |       theWidget("evaluate_kmeans_checkbutton")$setActive(TRUE)
 76 |   }
 77 |   ## else if (theWidget("clara_radiobutton")$getActive())
 78 |   ## {
 79 |   ##   infoDialog(Rtxt("Not yet implemented."))
 80 |   ##   if (executeClusterClara(include))
 81 |   ##     theWidget("evaluate_clara_checkbutton")$setActive(TRUE)
 82 |   ## }
 83 |   ## else if (theWidget("pam_radiobutton")$getActive())
 84 |   ## {
 85 |   ##   infoDialog(Rtxt("Not yet implemented."))
 86 |   ##   if (executeClusterPam(include))
 87 |   ##     theWidget("evaluate_pam_checkbutton")$setActive(TRUE)
 88 |   ## }
 89 |   else if (theWidget("hclust_radiobutton")$getActive())
 90 |   {
 91 |     if (executeClusterHClust(include))
 92 |       theWidget("evaluate_hclust_checkbutton")$setActive(TRUE)
 93 |   }
 94 |   else if (theWidget("biclust_radiobutton")$getActive())
 95 |   {
 96 |     executeClusterBiclust(include)
 97 | #      theWidget("evaluate_biclust_checkbutton")$setActive(TRUE)
 98 |   }
 99 | }
100 | 
101 | ########################################################################
102 | # EXPORT
103 | 
104 | exportClusterTab <- function()
105 | {
106 |   
107 |   if (noDatasetLoaded()) return()
108 | 
109 |   if (theWidget("kmeans_radiobutton")$getActive())
110 |   {
111 |     exportKMeansTab()
112 |   }
113 |   else if (theWidget("ewkm_radiobutton")$getActive())
114 |   {
115 |     exportEwkmTab()
116 |   }
117 |   else if (theWidget("hclust_radiobutton")$getActive())
118 |   {
119 |     exportHClustTab()
120 |   }
121 |   else
122 |   {
123 |     errorDialog(Rtxt("PMML export for this model is not yet implemented."))
124 |     return()
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/R/comcat.R:
--------------------------------------------------------------------------------
1 | # 20170129 Convenience combinine format with comma and cat("\n") to
2 | # return a printed string rather than print().
3 | 
4 | comcat <- function(x, ...)
5 | {
6 |   cat(format(x, ..., big.mark=",", scientific=FALSE, trim=TRUE), "\n")
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/R/ctree.R:
--------------------------------------------------------------------------------
  1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  2 | #
  3 | # Time-stamp: <2017-09-10 10:08:18 Graham Williams>
  4 | #
  5 | # CTREE OPTION OF THE TREE TAB
  6 | #
  7 | # Copyright (c) 2009 Togaware Pty Ltd
  8 | #
  9 | # This files is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | ########################################################################
 25 | #
 26 | # Model -> Tree -> Conditional
 27 | #
 28 | 
 29 | # 100815 TODO The "partykit" package from R-Forge (only for now)
 30 | # includes .list.rules.party() to convert tree into rules:
 31 | #
 32 | # install.packages("partykit", repos = "https://R-Forge.R-project.org")
 33 | # library("partykit")
 34 | # Rebuild the ctree as partykit provides new ctree.
 35 | # partykit:::.list.rules.party(crs$rpart)
 36 | 
 37 | executeModelCTree <- function()
 38 | {
 39 |   # 080815 This is currently just copied from rpart.R, and slowly
 40 |   # being tuned for ctree specifically.
 41 |   
 42 |   # Initial setup 
 43 | 
 44 |   TV <- "rpart_textview"
 45 | 
 46 |   num.classes <- length(levels(as.factor(crs$dataset[[crs$target]])))
 47 |   control <- NULL
 48 |   parms <- NULL
 49 | 
 50 |   # Scrape the value of the tuning controls
 51 | 
 52 |   tune.controls <- theWidget("rpart_tune_entry")$getText()
 53 |   
 54 |   # Retrieve the Priors, and check there is the right number and that
 55 |   # they add up to 1.
 56 |   
 57 |   priors <- theWidget("model_tree_priors_entry")$getText()
 58 |   if (nchar(priors) > 0)
 59 |   {
 60 |     pr <- as.numeric(unlist(strsplit(priors, ",")))
 61 |     if (length(pr) != num.classes)
 62 |       {
 63 |         errorDialog(sprintf(Rtxt("The supplied priors (%s)",
 64 |                                  "need to correspond to the number of classes",
 65 |                                  "found in the target variable '%s'.",
 66 |                                  "Please supply exactly %d priors."),
 67 |                             priors, crs$target, num.classes))
 68 |         return(FALSE)
 69 |       }
 70 |     if (sum(pr) != 1)
 71 |       {
 72 |         errorDialog(sprintf(Rtxt("The supplied priors (%s)",
 73 |                                  "add up to %0.2f whereas",
 74 |                                  "they need to add up 1.00.",
 75 |                                  "Please provide appropriate priors."),
 76 |                             priors, sum(pr)))
 77 |         return(FALSE)
 78 |       }
 79 |     if (is.null(parms))
 80 |       parms <- sprintf(", parms=list(prior=c(%s))", priors)
 81 |     else
 82 |       parms <- gsub(")$", sprintf(", prior=c(%s)", priors), parms)
 83 |   }
 84 | 
 85 |   # Retrieve the Min Split and check if it is different from the
 86 |   # default, and if so then use it.
 87 | 
 88 |   minsplit <- theWidget("rpart_minsplit_spinbutton")$getValue()
 89 |   if (minsplit != crv$rpart.minsplit.default)
 90 |   {
 91 |     if (is.null(control))
 92 |       control <- sprintf(", control=ctree_control(minsplit=%d)", minsplit)
 93 |     else
 94 |       control <- gsub(")$", sprintf(", minsplit=%d)", minsplit), control)
 95 |   }
 96 | 
 97 |   # Retrieve the Min Bucket and check if it is different from the
 98 |   # default, and if so then use it.
 99 | 
100 |   minbucket <- theWidget("rpart_minbucket_spinbutton")$getValue()
101 |   if (minbucket != crv$rpart.minbucket.default)
102 |   {
103 |     if (is.null(control))
104 |       control <- sprintf(", control=ctree_control(minbucket=%d)", minbucket)
105 |     else
106 |       control <- gsub(")$", sprintf(", minbucket=%d)", minbucket), control)
107 |   }
108 | 
109 |   # Retrieve the Max Depth and check if it is different from the
110 |   # default, and if so then use it.
111 | 
112 |   maxdepth <- theWidget("rpart_maxdepth_spinbutton")$getValue()
113 |   if (maxdepth != crv$rpart.maxdepth.default)
114 |   {
115 |     if (is.null(control))
116 |       control <- sprintf(", control=ctree_control(maxdepth=%d)", maxdepth)
117 |     else
118 |       control <- gsub(")$", sprintf(", maxdepth=%d)", maxdepth), control)
119 |   }
120 | 
121 |   # Build the formula for the model.
122 | 
123 |   frml <- paste(crs$target, "~ .")
124 | 
125 |   # Variables to be included --- a string of indicies.
126 |   
127 |   # included <- getIncludedVariables()
128 |   included <- "c(crs$input, crs$target)" # 20110102
129 |   
130 |   # Some convenience booleans
131 | 
132 |   sampling  <- not.null(crs$train)
133 |   including <- not.null(included)
134 |   subsetting <- sampling || including
135 |   
136 |   # Commands.
137 |   
138 |   lib.cmd <- "library(party, quietly=TRUE)"
139 |   if (! packageIsAvailable("party", Rtxt("build conditional trees"))) return(FALSE)
140 | 
141 |   fit.cmd <- paste("crs$rpart <- ctree(", frml, ", data=crs$dataset",
142 |                    if (subsetting) "[",
143 |                    if (sampling) "crs$train",
144 |                    if (subsetting) ",",
145 |                    if (including) included,
146 |                    if (subsetting) "]",
147 |                    if (! is.null(crs$weights))
148 |                    sprintf(",\n    weights=as.integer(%s)%s",
149 |                            crs$weights,
150 |                            ifelse(sampling, "[crs$train]", "")),
151 |                    ifelse(is.null(control), "", control),
152 |                    ")", sep="")
153 | 
154 |   print.cmd <- "print(crs$rpart)"
155 |                                
156 |   # Load the required library.
157 | 
158 |   startLog(Rtxt("Conditional inference tree."))
159 |   appendLog(Rtxt("Build a conditional tree using the party package."), lib.cmd)
160 | 
161 |   eval(parse(text=lib.cmd))
162 | 
163 |   # Build the model.
164 | 
165 |   appendLog(Rtxt("Build a ctree model."), fit.cmd)
166 |   start.time <- Sys.time()
167 |   result <- try(eval(parse(text=fit.cmd)), silent=TRUE)
168 |   time.taken <- Sys.time()-start.time
169 |   if (inherits(result, "try-error"))
170 |   {
171 |     errorDialog(errorMessageFun("ctree", result))
172 |     return(FALSE)
173 |   }
174 | 
175 |   # Display the resulting model.
176 | 
177 |   appendLog(Rtxt("Generate summary of the ctree model."), print.cmd)
178 | 
179 |   resetTextview(TV)
180 |   setTextview(TV,
181 |               sprintf(Rtxt("Summary of the %s model for %s (built using '%s'):\n"),
182 |                       commonName("ctree"),
183 |                       Rtxt("Classification"), # 080604 TODO put the right type
184 |                       "ctree"),
185 |               collectOutput(print.cmd), "\n")
186 | 
187 |   if (sampling) crs$smodel <- union(crs$smodel, crv$RPART)
188 | 
189 |   # Now that we have a model, make sure the rules and plot buttons are
190 |   # not visible.
191 |   
192 |   showModelRPartExists()
193 | 
194 |   # Finish up.
195 | 
196 |   reportTimeTaken(TV, time.taken, model=commonName(crv$RPART))
197 | 
198 |   return(TRUE)
199 | }
200 | 


--------------------------------------------------------------------------------
/R/errorMatrix.R:
--------------------------------------------------------------------------------
 1 | #' @title Generate an error (confusion) matrix.
 2 | #'
 3 | #' @param actual a vector of true values.
 4 | #' @param predicted a vector of predicted values.
 5 | #' @param percentage return percentages.
 6 | #' @param digits the number of digits to round results.
 7 | #' @param count return counts.
 8 | #'
 9 | #' @value An error matrix (also known as a confusion matrix) is
10 | #'   generated based on the comparison of the actual and predicted
11 | #'   values. One of three forms is returned: percentages (pc), counts,
12 | #'   or proportions (if both percentage and counts are FALSE).
13 | 
14 | errorMatrix <- function(actual,
15 |                         predicted,
16 |                         percentage=TRUE,
17 |                         digits=ifelse(percentage,1,3),
18 |                         count=FALSE)
19 | {
20 |   # Preconditions.
21 |   
22 |   if (!missing(percentage) & percentage & count)
23 |     stop("percentages not possible as counts were specified")
24 | 
25 |   # Data quality checks.
26 |   #
27 |   # If both actual and predicted are factors they must
28 |   # have the same levels in the same order else the table will have
29 |   # rearranged column or row orders - the table is expeted to have the
30 |   # labels in the same order column and row wise.
31 |   #
32 |   # If either is a factor and the other a character then convert the
33 |   # character to a factor with the levels of the factor used.
34 |   #
35 |   # If both are character or numeric leave it to table() to sort out.
36 |   
37 |   if (is.factor(actual) & is.factor(predicted))
38 |   {
39 |     if (! all(levels(actual) == levels(predicted)))
40 |       stop("The supplied actual and predicted must have the same levels.")
41 |   } else if (is.factor(actual))
42 |   {
43 |     predicted <- factor(predicted, levels=levels(actual))
44 |   } else if (is.factor(predicted))
45 |   {
46 |     actual <- factor(actual, levels=levels(predicted))
47 |   }
48 |   
49 |   # Initial table.
50 |   
51 |   x   <- table(actual, predicted)
52 | 
53 |   # Number of classes.
54 |   
55 |   nc  <- nrow(x)
56 | 
57 |   # Number of values.
58 |   
59 |   nv  <- length(actual) - sum(is.na(actual) | is.na(predicted))
60 | 
61 |   # Calculate proportions.
62 |   
63 |   if (!count) x <- x/nv
64 | 
65 |   # Calculate class error. For row r this is the sum of all values in
66 |   # the row minus the r'th value, divided by the sum of all values in
67 |   # the row. If count then the error is returned as a percentage rather
68 |   # than a proportion.
69 |   
70 |   tbl <- cbind(x,
71 |                Error=sapply(1:nc,
72 |                             function(r)
73 |                             {
74 |                               y <- sum(x[r,-r])/sum(x[r,])
75 |                               if (count) y <- round(100*y, digits)
76 |                               return(y)
77 |                             }))
78 | 
79 |   names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
80 | 
81 |   # Round the resulting percentages or proportions unless we are
82 |   # returning count.
83 |   
84 |   if (!count) tbl <- if (percentage) round(100*tbl, digits) else round(tbl, digits)
85 | 
86 |   return(tbl)
87 | }
88 | 


--------------------------------------------------------------------------------
/R/execute.R:
--------------------------------------------------------------------------------
  1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  2 | #
  3 | # Time-stamp: <2015-05-17 08:55:47 gjw>
  4 | #
  5 | # Implement functionality associated with the Execute button and Menu.
  6 | #
  7 | # Copyright (c) 2009-2013 Togaware Pty Ltd
  8 | #
  9 | # This files is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | on_execute_button_clicked <- function(action, window)
 25 | {
 26 |   # 100402 Allow Execute to be running just once, irrespective of the
 27 |   # number of times the Execute button is clicked. Otherwise we get a
 28 |   # second load of a CSV dataset whilst still loading the first.
 29 |   
 30 |   if (! is.null(crv$executing) && crv$executing) return()
 31 |   crv$executing <- TRUE
 32 |   on.exit(crv$executing <- FALSE)
 33 | 
 34 |   # Wrap up the actual call with a "try" so that the watch cursor
 35 |   # turns off even on error.
 36 | 
 37 |   setStatusBar()
 38 | 
 39 |   # 081117 This ensures spinbuttons, for example, lose focus and hence
 40 |   # their current value is properly noted. Otherwise I was finding the
 41 |   # user had to either press Enter or click somewhere else to ensure
 42 |   # the value is noted.
 43 | 
 44 |   theWidget("rattle_window")$setFocus()
 45 | 
 46 |   # 090102 Set the cursor to busy, and make sure on failure or
 47 |   # interrupt we set it back. TODO Currently, I can interrupt with
 48 |   # Ctrl-C in the console, and that does interrupt the Rattle process,
 49 |   # but I can't work out how to get a Ctrl-C (or perhaps an ESC) in
 50 |   # the Rattle GUI to cause an interrupt.
 51 | 
 52 |   set.cursor("watch")
 53 |   tryCatch(dispatchExecuteButton(),
 54 |            interrupt=function(m) setStatusBar(Rtxt("Processing interrupted by user.")),
 55 |            finally=set.cursor())
 56 | 
 57 | #  library(multicore)
 58 | #  set.cursor("watch")
 59 | #  crs$process <- parallel(dispatchExecuteButton())
 60 | #                                   interrupt=function(m)
 61 | #                                   setStatusBar("Processing interrupted by user."),
 62 | #                                   finally=set.cursor()))
 63 | #  collect()
 64 | 
 65 |   # 090103 Return nothing, otherwise we get the results from the
 66 |   # tryCatch above.
 67 | 
 68 |   return()
 69 | }
 70 | 
 71 | dispatchExecuteButton <- function()
 72 | {
 73 |   # Check which tab of notebook and dispatch to appropriate execute action
 74 | 
 75 |   ct <- getCurrentPageLabel(crv$NOTEBOOK)
 76 |   # REMOVE 100424 No longer required here - this is done earlier now.
 77 |   # Encoding(ct) <- "UTF-8" # 100408 For French, but see if it's okay always!
 78 | 
 79 |   if (ct == crv$NOTEBOOK.DATA.NAME)
 80 |   {
 81 |     executeDataTab()
 82 |   }
 83 |   else if (ct == crv$NOTEBOOK.EXPLORE.NAME)
 84 |   {
 85 |     executeExploreTab()
 86 |   }
 87 |   else if (ct == crv$NOTEBOOK.TEST.NAME)
 88 |   {
 89 |     executeTestTab()
 90 |   }
 91 |   else if (ct == crv$NOTEBOOK.TRANSFORM.NAME)
 92 |   {
 93 |     executeTransformTab()
 94 |   }
 95 |   else if (ct == crv$NOTEBOOK.CLUSTER.NAME)
 96 |   {
 97 |     executeClusterTab()
 98 |   }
 99 |   else if (ct == crv$NOTEBOOK.ASSOCIATE.NAME)
100 |   {
101 |     executeAssociateTab()
102 |   }
103 |   else if (ct == crv$NOTEBOOK.MODEL.NAME)
104 |   {
105 |     executeModelTab()
106 |   }
107 |   else if (ct == crv$NOTEBOOK.EVALUATE.NAME)
108 |   {
109 | 
110 |     # The wrap mode of the confusion_textview may have been set to
111 |     # word wrap when a model was Executed if it had more than 2
112 |     # classes, since a message is printed about ROCR etc not handling
113 |     # any more than 2 classes.
114 | 
115 |     theWidget("confusion_textview")$setWrapMode("none")
116 |     executeEvaluateTab()
117 |   }
118 |   else if (ct == crv$NOTEBOOK.LOG.NAME)
119 |   {
120 |     executeLogTab()
121 |   }
122 |   else
123 |   {
124 |     errorDialog(Rtxt("'dispatchExecuteButton' has been called with an unknown tab."),
125 |                 "\n\n", ct,
126 |                 "\n\n", crv$support.msg)
127 |     return()
128 |   }
129 | }
130 | 


--------------------------------------------------------------------------------
/R/executeBoxPlot2.R:
--------------------------------------------------------------------------------
 1 | #' Display boxplots using ggplot2.
 2 | #' 
 3 | #' Time-stamp: <2016-09-19 19:28:44 Graham Williams>
 4 | #' 
 5 | executeBoxPlot2 <- function(dataset, vars, target, targets, stratify, sampling, pmax)
 6 | {
 7 |   # Check prerequisite packages.
 8 |   
 9 |   if (!packageIsAvailable("ggplot2", Rtxt("build plots using a grammar of graphics"))) return(FALSE)
10 |   if (!packageIsAvailable("gridExtra", Rtxt("arrange plots on a grid"))) return(FALSE)
11 |   if (!packageIsAvailable("dplyr", Rtxt("mutate the supplied dataset"))) return(FALSE)
12 | 
13 |   # Report to the Log script.
14 |   
15 |   startLog(Rtxt("Display box plots for the selected variables."))
16 | 
17 |   # Start a new plot as we could be drawing multiple types of plots.
18 |   
19 |   newPlot()
20 | 
21 |   for (i in seq_along(vars))
22 |   {
23 |     title.txt <- genPlotTitleCmd(generateTitleText(vars[i],
24 |                                                    target,
25 |                                                    sampling,
26 |                                                    stratify && length(targets)),
27 |                                  vector=TRUE)
28 | 
29 |     plot.cmd <- stringr::str_c('# Generate a box plot.\n\n',
30 |                                sprintf("p%02d", i), ' <- crs %>%\n',
31 |                                '  with(', dataset, ') %>%\n',
32 |                                if (length(target))
33 |                                  stringr::str_c('  dplyr::mutate(', target,
34 |                                                 '=as.factor(', target, ')) %>%\n'),
35 |                                '  ggplot2::ggplot(ggplot2::aes(y=', vars[i], ')) +\n',
36 |                                '  ggplot2::geom_boxplot(ggplot2::aes(x="All"), ',
37 |                                'notch=TRUE, fill="grey") +\n',
38 |                                '  ggplot2::stat_summary(ggplot2::aes(x="All"), ',
39 |                                'fun.y=mean, geom="point", shape=8) +\n',
40 |                                if (length(target))
41 |                                  stringr::str_c('  ggplot2::geom_boxplot(',
42 |                                                 'ggplot2::aes(x=', target, ', ',
43 |                                                 'fill=', target, '), notch=TRUE) +\n',
44 |                                                 '  ggplot2::stat_summary(',
45 |                                                 'ggplot2::aes(x=', target, '), ',
46 |                                                 'fun.y=mean, geom="point", ',
47 |                                                 'shape=8) +\n'),
48 |                                '  ggplot2::xlab("',
49 |                                if (length(target))
50 |                                  stringr::str_c(target, '\\n\\n'),
51 |                                title.txt[2], '") +\n',
52 |                                '  ggplot2::ggtitle("', title.txt[1], '") +\n',
53 |                                '  ggplot2::theme(legend.position="none")')
54 |   
55 |     comment <- paste(Rtxt("Use ggplot2 to generate box plot for"), vars[i])
56 |     appendLibLog(comment, plot.cmd, include.libs=(i==1))
57 |     eval(parse(text=plot.cmd))
58 |   }
59 | 
60 |   display.cmd <-
61 |     "gridExtra::grid.arrange(" %s+%
62 |     paste(sprintf("p%02d", seq_len(i)), collapse=", ") %s+%
63 |     ")"
64 | 
65 |   appendLibLog("Display the plots.", display.cmd)
66 |   eval(parse(text=display.cmd))
67 | 
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/R/executeExploreGGRaptR.R:
--------------------------------------------------------------------------------
 1 | #' Perform the required operations for displaying interactive plot generator.
 2 | #' 
 3 | #' Time-stamp: <2017-08-10 17:07:41 Graham Williams>
 4 | #' 
 5 | executeExploreGGRaptR <- function(df_name, df)
 6 | {
 7 |   # Check prerequisite packages.
 8 | 
 9 |   if (!packageIsAvailable("ggraptR", 
10 |                           Rtxt("interactively generate ggplot2 graphics")))
11 |     return(FALSE)
12 |   
13 |   startLog(Rtxt("Display interactive plot builder."))
14 | 
15 |   df_file <- 'ggraptr_df.rds'
16 |   saveRDS(df, file=df_file)
17 |   r_expr <- sprintf(
18 |     '%s <- readRDS(\'%s\');file.remove(\'%s\');ggraptR::ggraptR(%s, port=5002)', 
19 |     df_name, df_file, df_file, df_name)
20 |    
21 |   appendLog("Initiate the ggraptR application in a browser", r_expr)
22 |   
23 |   system(sprintf('R -q --vanilla -e "%s"', r_expr), wait=F, intern=F) 
24 |   
25 |   return()
26 | }
27 | 


--------------------------------------------------------------------------------
/R/executeHistPlot2.R:
--------------------------------------------------------------------------------
 1 | #' Perform the required operations for displaying histograms using ggplot2.
 2 | #' 
 3 | #' Time-stamp: <2016-09-19 17:06:40 Graham Williams>
 4 | #' 
 5 | executeHistPlot2 <- function(dataset, vars, target, targets, stratify, sampling, pmax)
 6 | {
 7 |   # Check prerequisite packages.
 8 | 
 9 |   if (!packageIsAvailable("ggplot2", Rtxt("build plots using a grammar of graphics"))) return()
10 |   if (!packageIsAvailable("dplyr", Rtxt("mutate the supplied dataset"))) return()
11 |   if (!packageIsAvailable("gridExtra", Rtxt("arrange plots on a grid"))) return()
12 | 
13 |   startLog(Rtxt("Display histogram plots for the selected variables."))
14 | 
15 |   # We start a new plot since we could be drawing multiple types of
16 |   # plots.
17 |   
18 |   newPlot()
19 | 
20 |   for (i in seq_along(vars))
21 |   {
22 |     title.txt <- genPlotTitleCmd(generateTitleText(vars[i],
23 |                                                    target,
24 |                                                    sampling,
25 |                                                    stratify && length(targets)),
26 |                                  vector=TRUE)
27 | 
28 |     plot.cmd <- stringr::str_c('# Generate the plot.\n\n',
29 |                                sprintf("p%02d", i), ' <- crs %>%\n',
30 |                                '  with(', dataset, ') %>%\n',
31 |                                if (length(target))
32 |                                  stringr::str_c('  dplyr::mutate(', target,
33 |                                                 '=as.factor(', target, ')) %>%\n'),
34 |                                '  dplyr::select(', vars[i],
35 |                                ifelse(length(target), stringr::str_c(", ", target), ""),
36 |                                ') %>%\n',
37 |                                '  ggplot2::ggplot(ggplot2::aes(x=', vars[i], ')) +\n',
38 |                                '  ggplot2::geom_density(lty=3) +\n',
39 |                                ifelse(length(target),
40 |                                       stringr::str_c('  ggplot2::geom_density(ggplot2',
41 |                                                      sprintf("::aes(fill=%s, colour=%s)",
42 |                                                              target, target),
43 |                                                      ', alpha=0.55) +\n'),
44 |                                       ""),
45 |                                '  ggplot2::xlab("', vars[i],
46 |                                '\\n\\n', title.txt[2], '") +\n',
47 |                                '  ggplot2::ggtitle("', title.txt[1], '") +\n',
48 |                                '  ggplot2::labs(',
49 |                                ifelse(length(target),
50 |                                       stringr::str_c('fill="', target, '", '),
51 |                                       ""),
52 |                                'y="Density")')
53 | 
54 |     ## plot.cmd <- stringr::str_c('# Calculate the variable value range.\n\n',
55 |     ##                            'vrange <- crs %>%\n',
56 |     ##                            '  with(', dataset, ') %>%\n', # Need access to crs vars
57 |     ##                            '  dplyr::select(', vars[i], ') %>%\n',
58 |     ##                            '  range(na.rm=TRUE)\n\n',
59 |     ##                            '# Then detemine a good bin width for the bars.\n\n',
60 |     ##                            'bwidth <- crs %>%\n',
61 |     ##                            '  with(', dataset, '$', vars[i], ') %>%\n',
62 |     ##                            '  na.omit() %>%\n',
63 |     ##                            '  nclass.FD() %>%\n',
64 |     ##                            '  magrittr::divide_by(vrange[2]-vrange[1], .)\n\n',
65 |     ##                            '# Generate the plot.\n\n',
66 |     ##                            sprintf("p%02d", i), ' <- crs %>%\n',
67 |     ##                            '  with(', dataset, ') %>%\n',
68 |     ##                            '  dplyr::select(', vars[i],
69 |     ##                            ifelse(length(target), stringr::str_c(", ", target), ""),
70 |     ##                            ') %>%\n',
71 |     ##                            '  ggplot2::ggplot(ggplot2::aes(x=', vars[i], ')) +\n',
72 |     ##                            '  ggplot2::geom_histogram(ggplot2::aes(y=..density..), ',
73 |     ##                            'binwidth=bwidth, fill="grey", colour="black") +\n',
74 |     ##                            '  ggplot2::geom_density(', 
75 |     ##                            ifelse(length(target),
76 |     ##                                   sprintf("ggplot2::aes(colour=%s)", target), ""),
77 |     ##                            ') +\n',
78 |     ##                            '  ggplot2::xlab("', vars[i],
79 |     ##                            '\\n\\n', title.txt[2], '") +\n',
80 |     ##                            '  ggplot2::ggtitle("', title.txt[1], '") +\n',
81 |     ##                            '  ggplot2::labs(colour="", y="Density")')
82 | 
83 |     comment <- paste(Rtxt("Use ggplot2 to generate histogram plot for"), vars[i])
84 |     appendLibLog(comment, plot.cmd, include.libs=(i==1))
85 |     eval(parse(text=plot.cmd))
86 |   }
87 | 
88 |   display.cmd <-
89 |     "gridExtra::grid.arrange(" %s+%
90 |     paste(sprintf("p%02d", seq_len(i)), collapse=", ") %s+%
91 |     ")"
92 | 
93 |   appendLibLog("Display the plots.", display.cmd)
94 |   eval(parse(text=display.cmd))
95 |   
96 | }
97 | 


--------------------------------------------------------------------------------
/R/executeLogTab.R:
--------------------------------------------------------------------------------
 1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
 2 | #
 3 | # Time-stamp: <2014-07-24 21:30:01 gjw>
 4 | #
 5 | # Execute Log Tab
 6 | #
 7 | # Copyright (c) 2014 Togaware Pty Ltd
 8 | #
 9 | # This file is part of Rattle.
10 | #
11 | # Rattle is free software: you can redistribute it and/or modify it
12 | # under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 2 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Rattle is distributed in the hope that it will be useful, but
17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 | # General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | executeLogTab <- function()
25 | {
26 |   log.text <- getTextviewContent("log_textview")
27 |   eval(parse(text=log.text))
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/R/executeModelAda.R:
--------------------------------------------------------------------------------
 1 | #----------------------------------------------------------------------
 2 | #
 3 | # MODEL ADA
 4 | #
 5 | 
 6 | executeModelAda <- function(dataset, formula)
 7 | {
 8 |   # Initial setup. 
 9 |   
10 |   TV  <- "ada_textview"
11 |   VAR <- "crs$ada"
12 | 
13 |   # Build model.
14 |   
15 |   crs$ada <- buildModelAda(
16 |     formula,
17 |     dataset,
18 |     tv       = theWidget("ada_textview"),
19 |     maxdepth = theWidget("ada_maxdepth_spinbutton")$getValue(),
20 |     minsplit = theWidget("ada_minsplit_spinbutton")$getValue(),
21 |     cp       = theWidget("ada_cp_spinbutton")$getValue(),
22 |     xval     = theWidget("ada_xval_spinbutton")$getValue(),
23 |     ntree    = theWidget("ada_ntree_spinbutton")$getValue())
24 |   
25 |   return(TRUE)
26 | }
27 | 


--------------------------------------------------------------------------------
/R/executeModelRxBTrees.r:
--------------------------------------------------------------------------------
 1 | #' Build a Linear model.
 2 | #' 
 3 | #' Time-stamp: <2017-08-18 12:13:21 Graham Williams>
 4 | #'
 5 | executeModelRxBTrees <- function()
 6 | {
 7 |   # Initial setup. 
 8 |   
 9 |   TV <- "ada_textview"
10 |   VAR <- "crs$ada"    
11 |   NAME <- "Boosted Trees"
12 |   FUNC <- "rxBTrees"
13 |   
14 |   # Formula Creation for the model.
15 |   
16 |   crs$target %>%
17 |     paste("~", paste(crs$input, collapse=" + ")) %>%
18 |     strwrap(crv$log_width, 0, 4) %>%
19 |     paste(collapse="\n") ->
20 |   frml
21 | 
22 |   # Build the model build command.
23 | 
24 |   # TODO Need to allow parameters to be set from the GUI.
25 | 
26 |   build.cmd <- paste0(VAR, " <- ", FUNC, "(\n\n  ", frml, ",\n\n",
27 |                       "  data     = crs$xdf.split[[1]],\n",
28 |                       "  maxDepth = 30,\n",
29 |                       "  cp       = 0.01,\n",
30 |                       "  minSplit = 20",
31 |                       ")")
32 | 
33 |   # Build the model.
34 | 
35 |   appendLog(Rtxt("Build a rxBTrees model."),
36 |             build.cmd, sep="")
37 |   start.time <- Sys.time()
38 |   result <- try(eval(parse(text=build.cmd)), silent=TRUE)
39 |   summary.cmd <- "print(summary(crs$ada))"
40 |   
41 |   print.cmd <- paste0("print(", VAR, ")")
42 |   
43 |   # Text view
44 |   resetTextview(TV)
45 |   setTextview(TV,
46 |               sprintf(Rtxt("Boosted Trees built using %s"),
47 |                       FUNC),
48 |               "\n\n",
49 |               collectOutput(print.cmd))  
50 |   return(TRUE)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/executeModelRxDForest.R:
--------------------------------------------------------------------------------
 1 | #' Build a random forest based from xdf dataset.
 2 | #' 
 3 | #' Time-stamp: <2017-08-18 12:13:50 Graham Williams>
 4 | #'
 5 | executeModelRxDForest <- function()
 6 | {
 7 |   # Identify the model specific constants.
 8 |   
 9 |   TV   <- "rf_textview"
10 |   NAME <- commonName(crv$RXDFOREST)
11 |   PKG  <- "RevoScaleR"
12 |   FUNC <- "rxDForest"
13 |   VAR  <- "crs$rf"
14 |   TYPE <- Rtxt("Classification")
15 |   DESC <- Rtxt("build an xdf based random forest model")
16 |   
17 |   # Check package prerequisites.
18 |   
19 |   if (! packageIsAvailable(PKG, DESC)) return(FALSE)
20 | 
21 |   # Construct the formula for the model build.
22 | 
23 |   crs$target %>%
24 |     paste("~", paste(crs$input, collapse=" + ")) %>%
25 |     strwrap(crv$log_width, 0, 4) %>%
26 |     paste(collapse="\n") ->
27 |   frml
28 | 
29 |   # Variables to be included --- a string of indicies.
30 | 
31 |   # included <- getIncludedVariables()
32 |   included <- "c(crs$input, crs$target)" # 20110102
33 | 
34 |   # Some convenience booleans
35 | 
36 |   sampling   <- not.null(crs$train)
37 |   including  <- not.null(included)
38 |   subsetting <- sampling || including
39 | 
40 |   # Commands.
41 | 
42 |   build.cmd <- paste0(VAR, " <- ", FUNC, "(\n\n  ", frml, ",\n\n",
43 |                       "  data       = crs$xdf.split[[1]],\n",
44 |                       "  importance = TRUE",
45 |                       ")")
46 | 
47 |   print.cmd <- paste0("print(", VAR, ")")
48 | 
49 |   startLog(NAME)
50 | 
51 |   # Build the model.
52 | 
53 |   appendLog(sprintf(Rtxt("Build the %s model."), NAME), build.cmd)
54 |   start.time <- Sys.time()
55 |   result <- try(eval(parse(text=build.cmd)), silent=TRUE)
56 |   time.taken <- Sys.time() - start.time
57 | 
58 |   # Show the results.
59 | 
60 |   resetTextview(TV)
61 |   setTextview(TV,
62 |               sprintf(Rtxt("Summary of the %s model for %s (built using '%s'):"),
63 |                       NAME, TYPE, FUNC),
64 |               "\n\n",
65 |               collectOutput(print.cmd))
66 | 
67 |   # Now that we have a model, make sure the buttons are sensitive.
68 | 
69 |   showModelRFExists(traditional=TRUE, conditional=FALSE)
70 | 
71 |   # Finish up.
72 | 
73 |   reportTimeTaken(TV, time.taken, NAME)
74 | 
75 |   return(TRUE)
76 | }
77 | 


--------------------------------------------------------------------------------
/R/executeModelXGB.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #----------------------------------------------------------------------
 3 | #
 4 | # MODEL XGB
 5 | #
 6 | 
 7 | executeModelXGB <- function(dataset, formula)
 8 | {
 9 |   # Initial setup. 
10 |   
11 |   TV <- "ada_textview"
12 |   VAR <- "crs$ada"
13 |  
14 |    # Build model
15 |   
16 |   crs$ada <- buildModelXgb(formula,
17 |                     dataset,
18 |                     tv=theWidget("ada_textview"),
19 |                     max_depth=theWidget("ada_maxdepth_spinbutton")$getValue(),
20 |                     eta=theWidget("ada_learningrate_spinbutton")$getValue(),
21 |                     #num_parallel_tree=theWidget("ada_ntree_spinbutton")$getValue(),
22 |                     nthread=theWidget("ada_nthread_spinbutton")$getValue(),
23 |                     nround=theWidget("ada_niter_spinbutton")$getValue(),
24 |                     #metrics=theWidget("ada_metrics_combobox")$getActiveText(),
25 |                     objective=theWidget("ada_objective_combobox")$getActiveText()
26 |                     )
27 | 				
28 |   return(TRUE)
29 | }


--------------------------------------------------------------------------------
/R/executePairsPlotSelect2.R:
--------------------------------------------------------------------------------
 1 | #' Perform the required operations for displaying a pairs plot.
 2 | #' 
 3 | #' Time-stamp: <Wednesday 2020-08-19 19:53:03 AEST Graham Williams>
 4 | #' 
 5 | executePairsPlotSelect2 <- function(dataset, vars, target, targets, stratify, sampling, pmax)
 6 | {
 7 |   startLog(Rtxt("Display a pairs plot for the selected variables."))
 8 | 
 9 |   varsi <- getVariableIndicies(vars)
10 |   
11 |  # v1 <- theWidget("pairs_color_combobox")$getActiveText()
12 |   v1 <- target
13 |   if (is.null(v1) || v1 == " ")
14 |   {
15 |     colorStr<-'' # No color selected.
16 |   }
17 |   else
18 |   {
19 |     colorStr<-sprintf('mapping=ggplot2::aes(colour=%s, alpha=0.5, shape=%s),', v1, v1)
20 |   }
21 | 
22 |   plot.cmd <- paste0(dataset, ' %>%\n',
23 |                      '  dplyr::mutate(', v1, '=as.factor(', v1, ')) %>%\n',
24 |                      '  GGally::ggpairs(columns=c(',
25 |                      paste(varsi, collapse=','), '),\n', 
26 |                      if (colorStr!="") paste0('        ', colorStr, "\n"),
27 |                      '                diag=list(continuous="densityDiag",\n',
28 |                      '                          discrete="barDiag"),\n',
29 |                      '                upper=list(continuous="cor",\n',
30 |                      '                           combo="box",\n',
31 |                      '                           discrete="ratio"),\n',
32 |                      '                lower=list(continuous="points",\n',
33 |                      '                           combo="denstrip",\n',
34 |                      '                           discrete="facetbar"),\n',
35 |                      '                legend=3)',
36 |                      ' +\n  ggplot2::theme(panel.grid.major=ggplot2::element_blank(), ',
37 |                      'legend.position="bottom")',
38 |                      ' +\n  ggplot2::scale_alpha_continuous(guide=FALSE)',
39 |                      ' +\n  ggplot2::scale_fill_brewer(palette=rattlePalette)',
40 |                      ' +\n  ggplot2::scale_colour_brewer(palette=rattlePalette)')
41 |   # When this next blank theme is included we get bad plots???? Some
42 |   # problem with colour.
43 |   #
44 |   #                         '         panel.grid.minor=ggplot2::element_blank())')
45 |       
46 |   appendLog(Rtxt("Use GGally's ggpairs() to do the hard work."), plot.cmd)
47 |   newPlot()
48 |   eval(parse(text=sprintf("suppressMessages(print(%s))", plot.cmd)))
49 | }
50 | 


--------------------------------------------------------------------------------
/R/fancyRpartPlot.R:
--------------------------------------------------------------------------------
  1 | # Rattle: A GUI for Data Mining in R
  2 | #
  3 | # Time-stamp: <Saturday 2020-08-15 14:58:31 AEST Graham Williams>
  4 | #
  5 | # Copyright (c) 2009-2014 Togaware Pty Ltd
  6 | #
  7 | #' Plot rpart decision trees nicely.
  8 | #'
  9 | #' @param model an rpart object
 10 | #' @param main title for the plot
 11 | #' @param sub sub title for the plot (default is a Rattle string with
 12 | #' date, time and username)
 13 | #' @param palettes a list of sequential palettes names as supported by
 14 | #' RColorBrewer::brewer.pal including Blues BuGn BuPu
 15 | #' GnBu Greens Greys Oranges OrRd PuBu PuBuGn PuRd Purples RdPu Reds
 16 | #' YlGn YlGnBu YlOrBr YlOrRd.
 17 | #' @param ... additional arguments passed on to rpart.plot::prp
 18 | #
 19 | # This files is part of Rattle.
 20 | #
 21 | # Rattle is free software: you can redistribute it and/or modify it
 22 | # under the terms of the GNU General Public License as published by
 23 | # the Free Software Foundation, either version 2 of the License, or
 24 | # (at your option) any later version.
 25 | #
 26 | # Rattle is distributed in the hope that it will be useful, but
 27 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 28 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 29 | # General Public License for more details.
 30 | #
 31 | # You should have received a copy of the GNU General Public License
 32 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 33 | 
 34 | fancyRpartPlot <- function(model,
 35 |                            main="",
 36 |                            sub,
 37 |                            caption,
 38 |                            palettes,
 39 |                            type=2,
 40 |                            ...)
 41 | {
 42 |   if (!inherits(model, "rpart")) 
 43 |     stop("The model object must be an rpart object. ",
 44 |          "Instead we found: ", paste(class(model), collapse=", "), ".")
 45 | 
 46 |   # For new version of rpart.plot (20180710 v3.0.0).
 47 | 
 48 |   roundint <- ! is.null(model$model)
 49 |   
 50 |   # Migrate to replacing sub with caption in line with ggplot.
 51 |   
 52 |   if (missing(sub) & missing(caption))
 53 |   {
 54 |     sub <- paste("Rattle",
 55 |                  format(Sys.time(), "%Y-%b-%d %H:%M:%S"), 
 56 |                  Sys.info()["user"])
 57 |   } else
 58 |   {
 59 |     if (missing(sub)) sub <- caption
 60 |   } 
 61 |   
 62 |   num.classes <- length(attr(model, "ylevels"))
 63 | 
 64 |   # Generate a colour palette, with a range of 5 (palsize) colours for
 65 |   # each of the 6 (numpals) palettes. The palette is collapsed into
 66 |   # one list. We index it according to the class. Keep to the lighter
 67 |   # end of the palette to ensure printing is okay otherwise the black
 68 |   # text is hard to read.
 69 | 
 70 |   default.palettes <- c("Greens", "Blues", "Oranges", "Purples", "Reds", "Greys")
 71 |   if (missing(palettes))
 72 |     palettes <- default.palettes
 73 |   missed <- setdiff(1:6, seq(length(palettes)))
 74 |   palettes <- c(palettes, default.palettes[missed])
 75 | 
 76 |   numpals <- 6
 77 |   palsize <- 5
 78 |   pals <- c(RColorBrewer::brewer.pal(9, palettes[1])[1:5],
 79 |             RColorBrewer::brewer.pal(9, palettes[2])[1:5],
 80 |             RColorBrewer::brewer.pal(9, palettes[3])[1:5],
 81 |             RColorBrewer::brewer.pal(9, palettes[4])[1:5],
 82 |             RColorBrewer::brewer.pal(9, palettes[5])[1:5],
 83 |             RColorBrewer::brewer.pal(9, palettes[6])[1:5])
 84 |   
 85 |   # Extract the scores/percentages for each of the nodes for the
 86 |   # majority decision.  The decisions are in column 1 of yval2 and the
 87 |   # percentages are in the final num.classes columns.
 88 | 
 89 |   # 121106 Need to handle regression as pointed out by Yana
 90 |   # Kane-Esrig, 26 October 2012.
 91 | 
 92 |   if (model$method == "class")
 93 |   {
 94 |     yval2per <- -(1:num.classes)-1
 95 |     per <- apply(model$frame$yval2[,yval2per], 1, function(x) x[1+x[1]])
 96 |   }
 97 |   else
 98 |   {
 99 |     # 130329 This is the deviance relative the the total deviance measured at
100 |     # the root node. We use this to colour the strength of the node -
101 |     # so more intense colour means less relative deviance.
102 |     
103 |     #per <- 1 - (model$frame$dev/model$frame$dev[1])
104 | 
105 |     # 130329 Perhaps instead we want to use the yval as the intensity
106 |     # of the predicted value. Currently not handling negative values.
107 | 
108 |     per <- model$frame$yval/max(model$frame$yval)
109 |     
110 |   }
111 |   
112 |   # The conversion of a tree in CORElearn to an rpart tree results in these
113 |   # being character, so ensure we have numerics.
114 |   
115 |   per <- as.numeric(per)
116 |   
117 |   # Calculate an index into the combined colour sequence. Once we go
118 |   # above numpals * palsize (30) start over.
119 | 
120 |   if (model$method == "class")
121 |     col.index <- ((palsize*(model$frame$yval-1) +
122 |                    trunc(pmin(1 + (per * palsize), palsize))) %%
123 |                   (numpals * palsize))
124 |   else
125 |     col.index <- round(per * (palsize-1)) + 1
126 | 
127 |   # Ensure the index is positive. Thanks to John Vorwald, 8 Dec
128 |   # 2014. The bug can arise when model$frame$yval are all
129 |   # negative. The error is:
130 |   #
131 |   #  fancyRpartPlot(rtreeFit,main=paste('RPART:',cName))
132 |   #  Error in pals[col.index] : only 0's may be mixed with negative subscripts
133 |   
134 |   col.index <- abs(col.index)
135 | 
136 |   # Determine the amount of extra information added to the nodes.
137 | 
138 |   if (model$method == "class")
139 |     extra <- 104
140 |   else
141 |     extra <- 101
142 |   
143 |   # Generate the plot and title.
144 |  
145 |   rpart.plot::prp(model, type=type, extra=extra,
146 |                   box.col=pals[col.index],
147 |                   nn=TRUE,
148 |                   varlen=0, faclen=0,
149 |                   shadow.col="grey",
150 |                   fallen.leaves=TRUE,
151 |                   branch.lty=3,
152 |                   roundint=roundint,
153 |                   main=main,
154 |                   sub=sub,
155 |                   ...)
156 | }
157 | 


--------------------------------------------------------------------------------
/R/ggVarImp.R:
--------------------------------------------------------------------------------
  1 | ggVarImp <- function(model, ...) UseMethod("ggVarImp")
  2 | 
  3 | ggVarImpPlot <- function(ds,
  4 |                          n=NULL,
  5 |                          title="Variable Importance",
  6 |                          label="Relative Importance",
  7 |                          caption=genPlotTitleCmd(vector=TRUE),
  8 |                          log=FALSE)
  9 | {
 10 |   # Expect ds to contain at least the columns Variable and Importance.
 11 |   
 12 |   if (length(n) == 1L) ds <- head(ds, n)
 13 | 
 14 |   ds %>%
 15 |     dplyr::arrange(desc(Importance)) %>%
 16 |     dplyr::mutate(Variable=factor(Variable, levels=rev(unique(Variable)))) %>%
 17 |     ggplot2::ggplot(ggplot2::aes(x    = Variable,
 18 |                                  y    = Importance,
 19 |                                  fill = Variable)) +
 20 |     ggplot2::geom_bar(stat     = "identity",
 21 |                       position = "identity",
 22 |                       width    = 0.1) +
 23 |     ggplot2::labs(title   = title,
 24 |                   y       = label,
 25 |                   x       = "",
 26 |                   caption = caption) +
 27 |     ggplot2::coord_flip() +
 28 |     ggplot2::theme(axis.ticks.x = ggplot2::element_blank(),
 29 |                    axis.text.x  = ggplot2::element_blank(),
 30 |                    axis.title.x = ggplot2::element_blank(),
 31 |                    legend.position = "none") ->
 32 |   p
 33 | 
 34 |   if (log)
 35 |     p <- p + ggplot2::scale_y_continuous(trans="log10")
 36 |   else
 37 |     p <- p + ggplot2::scale_y_continuous(labels=scales::comma)
 38 | 
 39 |   return(p)
 40 | }
 41 | 
 42 | ggVarImp.randomForest <- function(model, 
 43 |                                   title="Random Forest Variable Importance",
 44 |                                   ...)
 45 | {
 46 |   # By default randomForest() only returns the MeanDecreaseGini. With
 47 |   # importance=TRUE at model build time we also get
 48 |   # MeanDecreaseAccuracy and importance relative to the target levels.
 49 |   
 50 |   randomForest::importance(model) %>%
 51 |     data.frame() %>%
 52 |     dplyr::mutate(Variable=row.names(.)) %>%
 53 |     tidyr::gather(Measure, Importance, -Variable) %>%
 54 |     dplyr::group_by(Measure) %>%
 55 |     dplyr::mutate(Importance=(max(Importance)-Importance)/(max(Importance)-min(Importance))) %>%
 56 |     ggVarImpPlot(title, ...) +
 57 |     ggplot2::facet_wrap(~ Measure)
 58 | }
 59 | 
 60 | ggVarImp.rpart <- function(model,
 61 |                            title="Decision Tree Variable Importance",
 62 |                            ...)
 63 | {
 64 |   model$variable.importance %>%
 65 |     data.frame() %>%
 66 |     magrittr::set_names("Importance") %>%
 67 |     dplyr::mutate(Variable=row.names(.)) %>%
 68 | #    dplyr::arrange(desc(Importance)) %>%
 69 | #    dplyr::mutate(Variable=factor(Variable, levels=rev(unique(Variable)))) %>%
 70 |     ggVarImpPlot(title, ...)
 71 | }
 72 | 
 73 | ggVarImp.rxDForest <- function(model,
 74 |                                title="Big Data Random Forest Variable Importance",
 75 |                                ...)
 76 | {
 77 |   model$importance %>%
 78 |     data.frame() %>%
 79 |     dplyr::mutate(Variable=row.names(.)) %>%
 80 | #    dplyr::arrange(desc(IncNodePurity)) %>%
 81 | #    dplyr::mutate(Variable=factor(Variable, levels=rev(unique(Variable)))) %>%
 82 |     dplyr::rename(Importance=IncNodePurity) %>%
 83 |     ggVarImpPlot(title, ...)
 84 | }
 85 | 
 86 | ggVarImp.xgb.Booster <- function(model, 
 87 |                                  feature_names=NULL,
 88 |                                  title="Extreme Gradient Boost Variable Importance",
 89 |                                   ...)
 90 | {
 91 |   # The model does not include the feature/colnames, so we need to
 92 |   # have an option to pass it in.
 93 |   
 94 |   xgboost::xgb.importance(feature_names=feature_names, model=model) %>%
 95 |     dplyr::rename(Variable=Feature, Importance=Gain) %>%
 96 |     dplyr::select(Variable, Importance) %>%
 97 |     ggVarImpPlot(title, ...)
 98 | }
 99 | 
100 | ggVarImp.xgb.formula <- function(model, 
101 |                                  feature_names=NULL,
102 |                                  title="Extreme Gradient Boost Variable Importance",
103 |                                   ...)
104 | {
105 |   class(model) %<>% setdiff("xgb.formula")
106 |   ggVarImp(model, feature_names=model$dimnames)
107 | }
108 | 


--------------------------------------------------------------------------------
/R/loadLibs.R:
--------------------------------------------------------------------------------
 1 | #' Load a list of libraries, reporting to the Rattle Log
 2 | #'
 3 | #' Only load the package if not already loaded. If already loaded then
 4 | #' we don't return the name from the function.
 5 | #'
 6 | #' @param l Vector of pairs, "package name" "used function".
 7 | #' @return returns list of packages that get loaded.
 8 | #' @rdname loadLibs
 9 | loadLibs <- function(l)
10 | {
11 |   odd   <- seq(1, length(l), 2)
12 |   lname <- l[odd]
13 |   even  <- seq(2, length(l), 2)
14 |   lfun  <- l[even]
15 |   libs  <- NULL
16 |   for (i in 1:length(odd))
17 |   {
18 |     appendLog(packageProvides(lname[i], lfun[i]), sprintf("library(%s)", lname[i]))
19 |     if (!sprintf("package:%s", lname[i]) %in% search())
20 |     {
21 |       suppressPackageStartupMessages(library(lname[i], character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE))
22 |       libs <- c(libs, lname[i])
23 |     }
24 |   }
25 |   return(libs)
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/R/loadTooltips.R:
--------------------------------------------------------------------------------
 1 | loadTooltips <- function()
 2 | {
 3 |   if (! packageIsAvailable("XML", "load GUI tooltips"))
 4 |   {
 5 |     warning("The XML package is not available. Tooltips will not be available.")
 6 |     return(FALSE)
 7 |   }
 8 | 
 9 |   result <- try(etc <- file.path(path.package(package="rattle")[1], "etc"),
10 |                 silent=TRUE)
11 |   if (inherits(result, "try-error"))
12 |     doc <- XML::xmlTreeParse("tooltips.xml", useInternalNodes=TRUE)
13 |   else
14 |     doc <- XML::xmlTreeParse(file.path(etc, "tooltips.xml"), useInternalNodes=TRUE)
15 | 
16 |   for (tt in XML::getNodeSet(doc, "//tooltip"))
17 |   {
18 |     # 100110 format the tooltip. blank lines are retained, but other
19 |     # line breaks are ignored.
20 | 
21 |     tip <- gsub("XoX", "\\\n\\\n",
22 |                 gsub("\n *", " ",
23 |                      gsub("\n *\n *", "XoX", XML::xmlValue(tt))))
24 |     wd <- theWidget(XML::xmlGetAttr(tt, 'widget'))
25 |     wd["tooltip-text"] <- Rtxt (tip) # 100408 Space after Rtxt is intentional.
26 | 
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/R/log.R:
--------------------------------------------------------------------------------
  1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  2 | #
  3 | # Time-stamp: <2017-09-10 09:32:39 Graham Williams>
  4 | #
  5 | # Implement LOG functionality.
  6 | #
  7 | # Copyright (c) 2009 Togaware Pty Ltd
  8 | #
  9 | # This files is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | ########################################################################
 25 | # CALLBACKS
 26 | 
 27 | on_log_export_rename_checkbutton_toggled <- function(button)
 28 | {
 29 |   theWidget("log_export_rename_entry")$setSensitive(button$getActive())
 30 | }
 31 | 
 32 | initiateLog <- function()
 33 | {
 34 |   # 100407 Change the font to monospace, like all other textviews.
 35 |   
 36 |   if (! isJapanese())
 37 |     theWidget("log_textview")$modifyFont(RGtk2::pangoFontDescriptionFromString(crv$textview.font))
 38 |   
 39 |   if (! is.null(crv$log.intro))
 40 |     appendTextview("log_textview",
 41 |                    paste0("#", paste0(rep("=", 71), collapse=""),
 42 |                           "\n\n", crv$log.intro),
 43 |                    tvsep=FALSE)
 44 | 
 45 |   startLog(paste(sprintf(Rtxt("%s version %s user '%s'"),
 46 |                          crv$appname, crv$version, Sys.info()["user"]),
 47 | #LOG_LICENSE
 48 |                  #sprintf("# Started %s by %s\n\n", Sys.time(), Sys.info()["user"]),
 49 |     "\n\n",
 50 |     Rtxt("# This log captures interactions with Rattle as an R script.",
 51 |          "\n\n# For repeatability, export this activity log to a",
 52 |          "\n# file, like 'model.R' using the Export button or",
 53 |          "\n# through the Tools menu. Th script can then serve as a",
 54 |          "\n# starting point for developing your own scripts.",
 55 |          "\n# After xporting to a file called 'model.R', for exmample,",
 56 |          "\n# you can type into a new R Console the command",
 57 |          "\n# \"source('model.R')\" and so repeat all actions. Generally,",
 58 |          "\n# you will want to edit the file to suit your own needs.",
 59 |          "\n# You can also edit this log in place to record additional",
 60 |          "\n# information before exporting the script.",
 61 |          "\n",
 62 |          "\n# Note that saving/loading projects retains this log."),
 63 |     "\n",
 64 |     '\n# We begin most scripts by loading the required packages.',
 65 |     '\n# Here are some initial packages to load and others will be',
 66 |     '\n# identified as we proceed through the script. When writing',
 67 |     '\n# our own scripts we often collect together the library',
 68 |     '\n# commands at the beginning of the script here.\n\n',
 69 |     crv$library.command,
 70 |     '   # Access the weather dataset and utilities.',
 71 |     '\nlibrary(magrittr) # Utilise %>% and %<>% pipeline operators.',
 72 |     "\n\n",
 73 |     Rtxt("# This log generally records the process of building a model.",
 74 |          "\n# However, with very little effort the log can also be used",
 75 |          "\n# to score a new dataset. The logical variable 'building'",
 76 |          "\n# is used to toggle between generating transformations,",
 77 |          "\n# when building a model and using the transformations,",
 78 |          "\n# when scoring a dataset."),
 79 |     "\n\nbuilding <- TRUE",
 80 |     "\nscoring  <- ! building",
 81 |     # Removed to avoid loading librarys or suggesting such
 82 |     # Moving to using namespace :: in the script.
 83 |     #ifelse(packageIsAvailable("colorspace"),
 84 |     #       paste("\n",
 85 |     #             Rtxt("# The colorspace package is used to generate",
 86 |     #                  "the colours used in plots,",
 87 |     #                  "if available."),
 88 |     #             "\n\n",
 89 |     #             "library(colorspace)", sep=""), ""),
 90 |     "\n\n",
 91 |     Rtxt("# A pre-defined value is used to reset the random seed",
 92 |          "\n# so that results are repeatable."),
 93 |     "\n\ncrv$seed <- ", crv$seed,
 94 |     sep=""))
 95 |   
 96 | }
 97 | 
 98 | startLog <- function(msg=NULL)
 99 | {
100 |   # Output a suitable separator to the log textview, and if there is
101 |   # an optional MSG, display that message, as an introduction to this
102 |   # section.
103 |   
104 |   if (is.null(crv$rattleGUI)) return()
105 | 
106 |   appendLog(paste("\n\n#",
107 |                   paste(rep("=", 71), collapse=""),
108 |                   if (not.null(crv$show.timestamp) && crv$show.timestamp)
109 |                   paste("\n# ", crv$appname, " ", Rtxt("timestamp:"), " ",
110 |                         Sys.time(), " ", version$platform, sep=""),
111 |                   sep=""),
112 |           no.start=TRUE)
113 |   if (not.null(msg))
114 |     appendLog(paste(sep="", crv$start.log.comment, msg), no.start=TRUE)
115 | }
116 | 
117 | appendLog <- function(start, cont=NULL, ..., sep=" ", no.start=FALSE)
118 | {
119 |   # 100330 cont is used to identify whether there is more than a
120 |   # single string to print. If not, then don't include the
121 |   # crv$end.log.comment otherwise there is too much white space in the
122 |   # log.
123 |   
124 |   if (is.null(crv$rattleGUI)) return()
125 | 
126 |   if (no.start)
127 |     msg <- paste(sep=sep, start, cont, ...)
128 |   else if (is.null(cont))
129 |     msg <- paste(sep="", crv$start.log.comment, start)
130 |   else
131 |     msg <- paste(sep="", crv$start.log.comment, start, crv$end.log.comment, cont, ...)
132 |   if (length(msg) == 0) msg <-""
133 | 
134 |   # 150712 Remove and Rtxt(...), leaving just ...
135 | 
136 |   msg <- stringr::str_replace(msg, 'Rtxt\\(([^\\)]*)\\)', '\\1')
137 |   
138 |   # Always place text at the end, irrespective of where the cursor is.
139 | 
140 |   log.buf <- theWidget("log_textview")$getBuffer()
141 |   location <- log.buf$getEndIter()$iter
142 | 
143 |   log.buf$insert(location, msg)
144 | }
145 | 
146 | exportLogTab <- function()
147 | {
148 |   # Obtain filename to the LOG textview to.
149 |   
150 |   dialog <- RGtk2::gtkFileChooserDialog(Rtxt("Export Log"), NULL, "save",
151 |                                  "gtk-cancel", RGtk2::GtkResponseType["cancel"],
152 |                                  "gtk-save", RGtk2::GtkResponseType["accept"])
153 |   dialog$setDoOverwriteConfirmation(TRUE)
154 | 
155 |   if(not.null(crs$dataname))
156 |     dialog$setCurrentName(sprintf("%s_script.R", get.stem(crs$dataname)))
157 | 
158 |   ff <- RGtk2::gtkFileFilterNew()
159 |   ff$setName(Rtxt("R Files"))
160 |   ff$addPattern("*.R")
161 |   dialog$addFilter(ff)
162 | 
163 |   ff <- RGtk2::gtkFileFilterNew()
164 |   ff$setName(Rtxt("All Files"))
165 |   ff$addPattern("*")
166 |   dialog$addFilter(ff)
167 |   
168 |   if (dialog$run() == RGtk2::GtkResponseType["accept"])
169 |   {
170 |     save.name <- dialog$getFilename()
171 |     dialog$destroy()
172 |   }
173 |   else
174 |   {
175 |     dialog$destroy()
176 |     return()
177 |   }
178 | 
179 |   if (get.extension(save.name) != "R")
180 |     save.name <- sprintf("%s.R", save.name)
181 | 
182 |   save.text <- getTextviewContent("log_textview")
183 |   if (!theWidget("log_export_comments_checkbutton")$getActive())
184 |     save.text <- gsub("\n\n+", "\n", gsub("#[^\n]*\n", "", save.text))
185 |   if (theWidget("log_export_rename_checkbutton")$getActive())
186 |   {
187 |     nm <- theWidget("log_export_rename_entry")$getText()
188 |     save.text <- gsub("crs\\$", nm, save.text)
189 |   }
190 |   write(save.text, save.name)
191 | 
192 |   setStatusBar(sprintf(Rtxt("The log has been exported to '%s'."), save.name))
193 | }
194 | 
195 | packageProvides <- function(pkg, fun)
196 | {
197 |   return(sprintf(Rtxt("The '%s' package provides the '%s' function."), pkg, fun))
198 | }
199 | 
200 | 


--------------------------------------------------------------------------------
/R/normVarNames.R:
--------------------------------------------------------------------------------
1 | normVarNames <- function(vars, sep="_")
2 | {
3 |   return(janitor::make_clean_names(vars, numerals="right"))
4 | }
5 | 


--------------------------------------------------------------------------------
/R/psfchart.R:
--------------------------------------------------------------------------------
  1 | # Generate a PSF chart
  2 | 
  3 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  4 | #
  5 | # Time-stamp: <2014-09-06 18:51:58 gjw>
  6 | #
  7 | # Implement evaluate functionality.
  8 | #
  9 | # Copyright (c) 2009-2013 Togaware Pty Ltd
 10 | #
 11 | # This files is part of Rattle.
 12 | #
 13 | # Rattle is free software: you can redistribute it and/or modify it
 14 | # under the terms of the GNU General Public License as published by
 15 | # the Free Software Foundation, either version 2 of the License, or
 16 | # (at your option) any later version.
 17 | #
 18 | # Rattle is distributed in the hope that it will be useful, but
 19 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU General Public License
 24 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 25 | 
 26 | psfchart <- function(predicted,
 27 |                      actual,
 28 |                      bins=100,          # Number of bins to use for the plot.
 29 |                      threshold=0.5,     # The decision threshold.
 30 |                      splits=NULL,	# E.g., c(0.2, 0.8)
 31 |                      split.lables=c("Low", "Medium", "High"),
 32 |                      tic.size=0.2,      # proportional gap between axis ticks.
 33 |                      gg=TRUE,
 34 |                      verbose=FALSE)
 35 | {
 36 |   dosplits <- ! is.null(splits)
 37 | 
 38 |   if (is.factor(actual)) actual <- as.numeric(actual)-1
 39 |     
 40 |   doAggregate <- function()
 41 |   {
 42 |           
 43 |     # Bin the scores into "bins" bins and store the bins into
 44 |     # variable "bin". If there are more bins specified than
 45 |     # scores, then simply rank the scores. In the end we have for
 46 |     # each score a "rank", which is an integer in 1:bins or
 47 |     # 1:length(actual).
 48 |       
 49 |     if (length(actual) >= bins)
 50 |     {
 51 |       bin <- as.numeric(binning(predicted, bins, method="quantile",
 52 |                                 ordered=FALSE, labels=FALSE))
 53 |     }
 54 |     else
 55 |     {
 56 |       bin <- reshape::rescaler(predicted, "rank")
 57 |     }
 58 | 
 59 |     # Check whether the pr and target agree.
 60 | 
 61 |     agree <- as.numeric(as.numeric(predicted > threshold) == actual)
 62 | 
 63 |     # Get the size of each bin (should all be the same +/-
 64 |     # 1). Also get a count of the positives in each bin, assumming
 65 |     # a 0/1 value for actual (so sum will work) and the accuracy
 66 |     # of each bin.
 67 | 
 68 |     agg <- aggregate(actual, list(bin), length)
 69 |     names(agg) <- c("bin", "size")
 70 |     agg$pos <- aggregate(actual, list(bin), sum)[[2]]
 71 |     agg$acc <- aggregate(agree, list(bin), sum)[[2]]
 72 |     agg$max <- aggregate(predicted, list(bin), max)[[2]]
 73 |     agg$tdiff <- agg$max - threshold
 74 |         
 75 |     # Rescale bins to be between 0 and 1 so AUC is more sensible.
 76 |   
 77 |     agg$rbin <- agg$bin/bins
 78 |   
 79 |     # Calulate the proportion accuracy
 80 |   
 81 |     agg$pacc <- agg$acc/agg$size
 82 | 
 83 |     return(agg)
 84 |   }
 85 |   
 86 |   # Determine the model's decision based on the score and threshold
 87 |   # and save that as pr.
 88 | 
 89 |   pr <- as.numeric(predicted > threshold)
 90 | 
 91 |   tp <- round(100 * sum(pr==1 & actual==1)/length(actual))
 92 |   fp <- round(100 * sum(pr==1 & actual==0)/length(actual))
 93 |   tn <- round(100 * sum(pr==0 & actual==0)/length(actual))
 94 |   fn <- round(100 * sum(pr==0 & actual==1)/length(actual))
 95 |     
 96 |   if (verbose)
 97 |     cat("\nData Summary:",
 98 |         sprintf("    Obs: %s\n",
 99 |                 format(length(actual), big.mark=",")),
100 |         sprintf("    Targets: %s; Rate: %0.2f%%\n",
101 |                 format(sum(actual==1), big.mark=","),
102 |                 100*sum(actual==1)/length(actual)),
103 |         sprintf("    Model TP: %9s FN: %9s\n",
104 |                 format(sum(pr==1 & actual==1), big.mark=","),
105 |                 format(sum(pr==0 & actual==1), big.mark=",")),
106 |         sprintf("          FP: %9s TN: %9s\n",
107 |                 format(sum(pr==1 & actual==0), big.mark=","),
108 |                 format(sum(pr==0 & actual==0), big.mark=",")))
109 |   
110 |   agg <- doAggregate()
111 | 
112 |     if (gg)
113 |     {
114 |         if (dosplits)
115 |             classes <- data.frame(x=c(splits[1]/2,
116 |                                       (splits[1]+splits[2])/2,
117 |                                       (1+splits[2])/2),
118 |                                   lbl=split.labels)
119 |         
120 |         quads <- data.frame(x=c(0, 1, 0, 1), hj=c(0, 1, 0, 1),
121 |                             y=c(0, 0, 1, 1), vj=c(0, 0, 1, 1),
122 |                             lbl=c(sprintf("True Negatives (%s%%)", tn),
123 |                                 sprintf("True Positives (%s%%)", tp),
124 |                                 sprintf("False Negatives (%s%%)", fn),
125 |                                 sprintf("False Positives (%s%%)", fp)))
126 | 
127 |         xthresh <- agg$rbin[which(abs(agg$tdiff) == min(abs(agg$tdiff)))][1]
128 | 
129 |         tics <- seq(0, 1, tic.size)
130 |         ord <- order(predicted)
131 |         scores <- data.frame(x=tics,
132 |                              score=round(predicted[ord][c(1,
133 |                                  round(tics*length(ord)))], 2))
134 | 
135 |         p <- ggplot2::ggplot(agg, ggplot2::aes(rbin, pacc))
136 |         p <- p + ggplot2::geom_line()
137 |         p <- p + ggplot2::ggtitle("Proportional Score Function (PSF) Curve")
138 |         p <- p + ggplot2::scale_y_continuous("% Accuracy", limits=c(0,1),
139 |                                     labels=100*tics, breaks=tics)
140 |         p <- p + ggplot2::scale_x_continuous(paste("Proportion of Cases",
141 |                                           "\nSorted by Increasing Risk Scores"),
142 |                                     breaks=tics)
143 |         p <- p + ggplot2::geom_text(data=quads,
144 |                            ggplot2::aes(x=x, y=y, label=lbl, hjust=hj, vjust=vj, size=5))
145 |         p <- p + ggplot2::geom_text(x=xthresh, ggplot2::aes(y=0, size=5),
146 |                            label=sprintf("Threshold (%s)", threshold),
147 |                            vjust=2, hjust=1.1)
148 |         p <- p + ggplot2::geom_vline(xintercept=xthresh)
149 |         p <- p + ggplot2::geom_text(data=scores, ggplot2::aes(x=x, y=1, label=score, size=5),
150 |                            vjust=-0.5)
151 |         p <- p + ggplot2::theme(legend.position="none")
152 |         if (dosplits)
153 |         {
154 |             p <- p + ggplot2::geom_text(data=classes, ggplot2::aes(x=x, y=0.2, label=lbl))
155 |             p <- p + ggplot2::geom_vline(xintercept=low, linetype="twodash", color="grey")
156 |             p <- p + ggplot2::geom_vline(xintercept=high, linetype="twodash", color="grey")
157 |         }
158 |         return(p)
159 |     }
160 |     else
161 |     {
162 |         plot(agg$rbin, agg$pacc, type="l", xlim=c(0,1), ylim=c(0,1),
163 |              xlab="Proportion of Cases\nSorted by Risk Score", ylab="% Accuracy")
164 |         title(main="PSF\n")
165 |   
166 |         abline(v=0.25, lty=3)
167 |         abline(v=0.75, lty=3)
168 |         text(0.08, 0.08, "Low")
169 |         text(0.5, 0.08, "Medium")
170 |         text(0.92, 0.08, "High")
171 |         
172 |         # Add annotations for the sinlge plot.
173 |         
174 |         abline(v=agg$rbin[which(abs(agg$tdiff) == min(abs(agg$tdiff)))], lty=1)
175 |         xthresh <- agg$rbin[which(abs(agg$tdiff) == min(abs(agg$tdiff)))]
176 |         text(xthresh, 0.15, "Threshold", pos=2)
177 |         text(xthresh, 0.1, threshold, pos=2)
178 |         
179 |         # TODO NEED TO PROGRAMMATICALLY DETERMINE THE LABELS FROM MIN SCORE TO MAX SCORE
180 |         ord <- order(predicted)
181 |         scores <- predicted[ord]
182 |         axis(3, at=seq(0, 1, 0.2), padj=1.5, lwd.ticks=0,
183 |              labels=round(scores[c(1, round(seq(0, 1, 0.2)*length(scores)))], 2))
184 |         
185 |         text(0.92, 1, "False Positives")
186 |         text(0.085, 1, "False Negatives")
187 |         text(0.92, 0, "True Positives")
188 |         text(0.08, 0, "True Negatives")
189 |         
190 |         opar <- par(xpd=TRUE)
191 |         text(0.5, 1.08,"Scores")
192 |         par(opar)
193 |     }
194 | }
195 | 


--------------------------------------------------------------------------------
/R/rattleInfo.R:
--------------------------------------------------------------------------------
  1 | rattleInfo <- function(all.dependencies=FALSE,
  2 |                        include.not.installed=FALSE,
  3 |                        include.not.available=FALSE,
  4 |                        include.libpath=FALSE)
  5 | {
  6 | 
  7 |   # TODO: Add in support for BIOC
  8 | 
  9 |   cran.repos <- "https://cran.rstudio.org"
 10 |   bioc.repos <- ""
 11 | 
 12 |   # Using installed.packages() can be a "very slow way to find
 13 |   # information on one or a small number of packages" (Brian Riply
 14 |   # 2012). This is stated in the man page and I am very aware of
 15 |   # it. Brian also note: "In addition, many of you are using it to
 16 |   # find out if a package is installed, when actually you want to know
 17 |   # if it is usable (it might for example be installed for a different
 18 |   # architecture or require a later version of R), for which you need
 19 |   # to use require()." This was particularly relevant within
 20 |   # packageIsAvailable() and there I use a better way of checking for
 21 |   # an installed package.  Here I think it might still remain
 22 |   # appropriate to use installed.packages().
 23 |   
 24 |   iv <- utils::installed.packages()
 25 |   av <- available.packages(contriburl=contrib.url(cran.repos))
 26 |   have.av <- nrow(av) != 0
 27 |   # not a cran repos bv <- available.packages(contriburl=contrib.url(cran.repos))
 28 | 
 29 |   riv <- iv["rattle", "Version"]
 30 |   if (have.av) rav <- av["rattle", "Version"]
 31 |   
 32 |   cat(sprintf("Rattle: version %s", riv))
 33 |   if (have.av && compareVersion(riv, rav) != 1) cat(sprintf(" CRAN %s", rav))
 34 |   cat("\n")
 35 | 
 36 |   # Record the packages that can be upgraded
 37 | 
 38 |   up <- if (have.av && compareVersion(rav, riv) == 1) "rattle" else NULL
 39 |     
 40 |   cat(sprintf("%s\n", sub(" version", ": version", version$version.string)))
 41 | 
 42 |   cat("\n")
 43 |   si <- Sys.info()
 44 |   for (i in seq_along(si))
 45 |     cat(sprintf("%s%s: %s\n", toupper(substr(names(si)[i], 1, 1)),
 46 |                 substring(names(si)[i], 2), si[i]))
 47 | 
 48 |   cat("\nInstalled Dependencies\n")
 49 | 
 50 |   deps2vec <- function(deps)
 51 |   {
 52 |     if (is.na(deps)) return(NULL)
 53 |     strsplit(gsub("\\n", " ", gsub(' ?\\([^\\)]+\\)', '', deps)), ", ?")[[1]]
 54 |   }
 55 |     
 56 |   if (all.dependencies)
 57 |   {
 58 |     if (! "pkgDepTools" %in% rownames(iv))
 59 |     {
 60 |       source("https://bioconductor.org/biocLite.R")
 61 |       pkg <- "pkgDepTools"
 62 |       biocLite("pkgDepTools")
 63 |     }
 64 |     if (! "Rgraphviz" %in% rownames(iv))
 65 |     {
 66 |       source("https://bioconductor.org/biocLite.R")
 67 |       biocLite("Rgraphviz")
 68 |     }
 69 | 
 70 |     # 150711 There does not seem to be a way to get both suggest and
 71 |     # depend links using pkgDepTools::makeDepGraph which I used to
 72 |     # deploy here. It's either one or the other. Rattle only has
 73 |     # suggests links. So I want to get what Rattle suggests and then
 74 |     # find all the depends in cran.deps as the packages that are
 75 |     # reported on. Instead of going through the repository and build a
 76 |     # dependency graph, we've already dounloaded the available package
 77 |     # information so use it here instead.
 78 | 
 79 |     pkg.deps <- function(pkg, pkgs, av)
 80 |     {
 81 |       if (pkg %in% pkgs) return(pkgs)
 82 | 
 83 |       if (! pkg %in% rownames(av)) return(c(pkg, pkgs))
 84 | 
 85 |       for (p in union(deps2vec(av[pkg, "Suggests"]), deps2vec(av[pkg, "Depends"])))
 86 |       {
 87 |         pkgs <- pkg.deps(p, union(pkg, pkgs), av)
 88 |       }
 89 |       return(union(pkg, pkgs))
 90 |     }
 91 |     
 92 |     if (have.av)
 93 |       deps <- pkg.deps("rattle", NULL, av)
 94 |     else
 95 |       deps <- pkg.deps("rattle", NULL, iv)
 96 |   }    
 97 |   else
 98 |     deps <- union(deps2vec(iv["rattle", "Depends"]), deps2vec(iv["rattle", "Suggests"]))
 99 | 
100 |   for (p in sort(setdiff(deps, 'rattle')))
101 |   {
102 |     if (have.av && ! p %in% rownames(av))
103 |     {
104 |       if (include.not.available) cat(sprintf("%s: not available\n", p))
105 |     }
106 |     else if (! p %in% rownames(iv))
107 |     {
108 |       if (include.not.installed) cat(sprintf("%s: not installed\n", p))
109 |     }
110 |     else
111 |       cat(sprintf("%s: version %s%s%s%s", p, iv[p,"Version"],
112 |                   ifelse(have.av && compareVersion(av[p,"Version"], iv[p,"Version"]) == 1,
113 |                          {
114 |                            up <- c(up, p);
115 |                            sprintf(" upgrade available %s", av[p,"Version"])
116 |                          },
117 |                          ""),
118 |                   ifelse(include.libpath, paste("\t", iv[p,"LibPath"]), ""),
119 |                   "\n"))
120 |   }
121 | 
122 |   cat("\nThat was",
123 |       if (include.not.available)
124 |         length(deps)
125 |       else
126 |         sum(sapply(deps, function(p) p %in%
127 |                      if (have.av && include.not.installed) rownames(av) else rownames(iv))),
128 |       "packages.\n")
129 |   
130 |   if (! is.null(up))
131 |   {
132 |     cat(sprintf(paste('\nUpdate the packages with either',
133 |                       'of the following commands:\n\n ',
134 |                       '> install.packages(c("%s"))\n\n ',
135 |                       '> install.packages(rattleInfo(%s%s%s%s%s%s%s))\n\n'),
136 |                 paste(strwrap(paste(up, collapse='", "'),
137 |                               width=crv$log_width, exdent=23), collapse="\n"),
138 |                 ifelse(all.dependencies, "all.dependencies=TRUE", ""),
139 |                 ifelse(all.dependencies &&
140 |                        (include.not.installed ||
141 |                         include.not.available ||
142 |                         include.libpath), ", ", ""),
143 |                 ifelse(include.not.installed, "include.not.installed=TRUE", ""),
144 |                 ifelse(include.not.installed &&
145 |                        (include.not.available ||
146 |                         include.libpath), ", ", ""),
147 |                 ifelse(include.not.available, "include.not.available=TRUE", ""),
148 |                 ifelse(include.not.available &&
149 |                        include.libpath, ", ", ""),
150 |                 ifelse(include.libpath, "include.libpath=TRUE", "")))
151 |     if (isWindows() && "rattle" %in% up)
152 |       cat("Detach rattle (and other attached packages) before updating:\n\n ",
153 |           '> detach("rattle")\n\n')
154 |     cat("Alternatively update all installed packages:\n\n ",
155 |         '> update.packages()\n\n')
156 | 
157 |   }
158 | 
159 |   invisible(up)
160 | 
161 | }
162 | 


--------------------------------------------------------------------------------
/R/report.R:
--------------------------------------------------------------------------------
  1 | # Gnome R Data Miner: GNOME interface to R for Data Mining
  2 | #
  3 | # Time-stamp: <2018-08-15 20:18:41 Graham.Williams@togaware.com>
  4 | #
  5 | # Reporting support
  6 | #
  7 | # Copyright (c) 2009 Togaware Pty Ltd
  8 | #
  9 | # This files is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | # TODO
 25 | # 100307 Consider moving to using reporttools and using Sweave instead
 26 | 
 27 | on_report_toolbutton_clicked <- function(action, window)
 28 | {
 29 |   # Wrap the actual call with a "try" so that the watch cursor turns
 30 |   # off even on error.
 31 |   
 32 |   setStatusBar("Generating report.")
 33 |   set.cursor("watch")
 34 |   try(dispatchReportButton())
 35 |   set.cursor()
 36 | }
 37 | 
 38 | dispatchReportButton <- function()
 39 | {
 40 |   # Prerequisites: Can not report on data if there is no dataset.
 41 | 
 42 |   if (noDatasetLoaded()) return(FALSE)
 43 | 
 44 |   if (! questionDialog("The Report button is very experimental.",
 45 |                        "Please report issues and updates to",
 46 |                        "support@togaware.com.",
 47 |                        "\n\nKnown issues:",
 48 |                        "\n\n\tAlways saves to the same fixed file",
 49 |                        "- need chooser.",
 50 |                        "\n\tA plot is displayed on screen - need to suppress.",
 51 |                        "\n\tToo much generated to the console - how remove?",
 52 |                        "\n\nOtherwise it is safe to use!",
 53 |                        "\n\nDo you wish to continue?"))
 54 |     return(FALSE)
 55 |   
 56 |   startLog("GENERATE A REPORT")
 57 |   
 58 |   if (! packageIsAvailable("odfWeave", "generate a report")) return(FALSE)
 59 |   lib.cmd <- "library(odfWeave, quietly=TRUE)"
 60 |   appendLog("The odfWeave package processes ODT document templates.", lib.cmd)
 61 |   eval(parse(text=lib.cmd))
 62 |   
 63 |   # Check which tab of the notebook is active and dispatch to the
 64 |   # appropriate execute action.
 65 | 
 66 |   ct <- getCurrentPageLabel(crv$NOTEBOOK)
 67 | 
 68 |   if (ct == crv$NOTEBOOK.DATA.NAME ||
 69 |       ct == crv$NOTEBOOK.EXPLORE.NAME)
 70 |   {
 71 |     # For the DATA or EXPLORE tabs generate a dataset summary.
 72 |     
 73 |     reportDataTab()
 74 |   }
 75 |   else if (ct == crv$NOTEBOOK.MODEL.NAME )
 76 |   {
 77 |     if (! is.null(crs$rpart))
 78 |       reportTreeModel(crs$rpart)
 79 |     else
 80 |     {
 81 |       infoDialog("Report functionality is only available for the Tree",
 82 |                  ct, " and no Tree model found.")
 83 |       return(FALSE)
 84 |     }
 85 |   }
 86 |   else
 87 |       
 88 |   {
 89 |     infoDialog("No report functionality is available for the",
 90 |                ct, "tab as yet. Nothing done.")
 91 |     return(FALSE)
 92 |   }
 93 | }
 94 | 
 95 | #-----------------------------------------------------------------------
 96 | 
 97 | reportDataTab <- function()
 98 | {
 99 |   if (file.exists("../odf/data_summary.odt"))
100 |   {
101 |     summary <- "../odf/data_summary.odt" # For Testing
102 |     warning(Rtxt("Rattle Report is using local template ../odf"), immediate.=TRUE)
103 |   } 
104 |   else
105 |     summary <- system.file("odt", "data_summary.odt", package="rattle")
106 | 
107 |   ofile <- paste(getwd(), "data_summary_rattle.odt", sep="/")
108 | 
109 |   odf.cmd <- sprintf(paste('odfWeave("%s",',
110 |                            '\n         "%s",',
111 |                            '\n         control = odfWeaveControl(verbose = FALSE))'),
112 |                      summary, ofile)
113 | 
114 |   appendLog(Rtxt("Generate a data report."), odf.cmd)
115 | 
116 |   eval(parse(text=odf.cmd))
117 | 
118 |   setStatusBar(sprintf("Report written to %s.", ofile))
119 | 
120 |   system(paste("oowriter", ofile), wait = FALSE)
121 | }
122 | 
123 |   
124 | #-----------------------------------------------------------------------
125 | 
126 | reportTreeModel <- function(model)
127 | {
128 |   model <<- model
129 |   if (file.exists("../odf/model_rpart_summary.odt"))
130 |   {
131 |     summary <- "../odf/model_rpart_summary.odt" # For Testing
132 |     warning("Rattle Report is using local template ../odf", immediate.=TRUE)
133 |   } 
134 |   else
135 |     summary <- system.file("odt", "mode_rpart_summary.odt", package="rattle")
136 | 
137 |   ofile <- paste(getwd(), "model_rpart_summary_rattle.odt", sep="/")
138 |   
139 |   # odfWeave::odfWeave(summary, ofile, control=odfWeave::odfWeaveControl(verbose=FALSE))
140 | 
141 |   if (! is.null(crv$rattleGUI)) setStatusBar(sprintf("Report written to %s.", ofile))
142 | }
143 | 


--------------------------------------------------------------------------------
/R/rocChart.R:
--------------------------------------------------------------------------------
 1 | rocChart <- function(pr, target)
 2 | {
 3 |   # Calculate the true positive and the false 
 4 |   # positive rates.
 5 |   
 6 |   rates <- pr %>%
 7 |     ROCR::prediction(target) %>%
 8 |     ROCR::performance("tpr", "fpr")
 9 | 
10 |   # Calulcate the AUC.
11 | 
12 |   auc <- pr %>%
13 |     ROCR::prediction(target) %>%
14 |     ROCR::performance("auc") %>%
15 |     attr("y.values") %>%
16 |     magrittr::extract2(1)
17 |   
18 |   # Construct the plot.
19 |   
20 |   pl <- data.frame(tpr=attr(rates, "y.values")[[1]], 
21 |                    fpr=attr(rates, "x.values")[[1]]) %>%
22 |     ggplot2::ggplot(ggplot2::aes(fpr, tpr)) +
23 |     ggplot2::geom_line() +
24 |     ggplot2::annotate("text", x=0.875, y=0.125, vjust=0,
25 |                       label=paste("AUC =", round(100*auc, 2)), 
26 |                       family="xkcd") +
27 |     ggplot2::xlab("False Positive Rate (1-Specificity)") +
28 |     ggplot2::ylab("True Positive Rate (Sensitivity)")
29 |   
30 |   # Return the plot object.
31 | 
32 |   attr(pl, "auc") <- auc
33 |   return(pl)
34 | }
35 | 


--------------------------------------------------------------------------------
/R/textminer.R:
--------------------------------------------------------------------------------
  1 | # R Data Scientist: GNOME interface to R for Data Science
  2 | #
  3 | # Time-stamp: <2017-09-10 10:23:54 Graham Williams>
  4 | #
  5 | # 080921 TEXT MINING DATA
  6 | #
  7 | # Copyright (c) 2009-2017 Togaware Pty Ltd
  8 | #
  9 | # This file is part of Rattle.
 10 | #
 11 | # Rattle is free software: you can redistribute it and/or modify it
 12 | # under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 2 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Rattle is distributed in the hope that it will be useful, but
 17 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 19 | # General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with Rattle. If not, see <https://www.gnu.org/licenses/>.
 23 | #
 24 | ########################################################################
 25 | #
 26 | # First some notes:
 27 | #
 28 | #
 29 | 
 30 | ## > show(corpus)
 31 | ## A text document collection with 5 text documents
 32 | 
 33 | ## > summary(corpus)
 34 | ## A text document collection with 5 text documents
 35 | 
 36 | ## > inspect(corpus[1])
 37 | 
 38 | ## tdm <-  TermDocMatrix(corpus)
 39 | ## findFreqTerms(tdm, 5, Inf)
 40 | ## findAssocs(tdm, "ads", 0.97)
 41 | 
 42 | ## ##
 43 | ## ## Add in the target
 44 | ## ##
 45 | 
 46 | ## target <- c(1, 0, 0, 1, 0)
 47 | ## crs$dataset <- as.data.frame(cbind(tdm@.Data, target))
 48 | ## set.seed(123)
 49 | ## crs$train <- sample(nrow(crs$dataset), 4)
 50 | 
 51 | ## ##
 52 | ## ## Ignore 1 (15th), 61 (_is_), 238 (30%) or get error, probably
 53 | ## ## because of their names.
 54 | ## ##
 55 | 
 56 | ## crs$rpart <- rpart(target ~ .,
 57 | ##                    data=crs$dataset[crs$train,c(2:60,62:237,239:285)],
 58 | ##                    method="class")
 59 | 
 60 | ## crs$rf <- randomForest(as.factor(target) ~ .,
 61 | ##                        data=crs$dataset[crs$train,c(2:60,62:237,239:285)],
 62 | ##                        importance=TRUE, na.action=na.omit)
 63 | 
 64 | 
 65 | ## crs$glm <- glm(target ~ .,
 66 | ##                data=crs$dataset[crs$train,c(2:60,62:237,239:285)],
 67 | ##                family=binomial(logit))
 68 | 
 69 | ## ##
 70 | ## ## The others dont yet work:
 71 | ## ##
 72 | 
 73 | 
 74 | ## crs$ada <- ada(target ~ ., data=crs$dataset[crs$train,c(2:60,62:237,239:285)])
 75 | 
 76 | ## crs$ksvm <- ksvm(as.factor(target) ~ .,
 77 | ##                  data=crs$dataset[crs$train,c(2:60,62:237,239:285)],
 78 | ##                  prob.model=TRUE)
 79 | 
 80 | executeDataCorpus <- function()
 81 | {
 82 |   # 080921 Load all documents in the specified corpus as a document
 83 |   # corpus except target.csv, if there is one. Load .target.csv if
 84 |   # there is one as the target for each document in the corpus. The
 85 |   # .target.csv file must have two columns, comma separated. The first
 86 |   # row should name the columns, but we don't actually use the column
 87 |   # names here. The first column is the document id and must be the
 88 |   # filename without its extension. The second column is the
 89 |   # classification, for example 0 or 1. I use the name ".target.csv"
 90 |   # so that the corpus loader will ignore it as a hidden file.
 91 | 
 92 |   # 130310 For now, each time we Execute, reload the dataset. Effect
 93 |   # this with the following:
 94 | 
 95 |   crs$dataset <- NULL
 96 |   theWidget("select_treeview")$getModel()$clear()
 97 |   
 98 |   # Obtain interface information.
 99 | 
100 |   location <- theWidget("data_corpus_location_filechooserbutton")$getFilename()
101 |   strip <- theWidget("data_corpus_strip_checkbutton")$getActive()
102 |   lcase <- theWidget("data_corpus_lowercase_checkbutton")$getActive()
103 |   stopw <- theWidget("data_corpus_stopwords_checkbutton")$getActive()
104 |   stemw <- theWidget("data_corpus_stem_checkbutton")$getActive()
105 | 
106 |   # Start the log for this task.
107 |   
108 |   startLog("LOAD A CORPUS")
109 | 
110 |   # Ensure the package is available.
111 | 
112 |   lib.cmd <- "library(tm, quietly=TRUE)"
113 |   if (! packageIsAvailable("tm", "text mining")) return(FALSE)
114 |   appendLog("Use the tm package to support text mining.", lib.cmd)
115 |   eval(parse(text=lib.cmd))
116 | 
117 |   # This seems to be avaiable somewhere? library(RStem)
118 |   
119 |   # Load the document corpus.
120 | 
121 |   corpus.cmd <- sprintf('my.corpus <- Corpus(DirSource("%s"))',
122 |                         gsub("\\\\", "/", location))
123 |   appendLog("Load the document corpus.", corpus.cmd)
124 |   setStatusBar(Rtxt("Loading corpus from the documents found in"), location, "...")
125 |   eval(parse(text=corpus.cmd))
126 | 
127 |   # Process the documents.
128 | 
129 |   map.cmd <- ""
130 |   
131 |   if (strip)
132 |     map.cmd <- sprintf("%s\nmy.corpus <- tm_map(my.corpus, stripWhitespace)", map.cmd)
133 |   if (lcase) 
134 |     map.cmd <- sprintf("%s\nmy.corpus <- tm_map(my.corpus, content_transformer(tolower))", map.cmd)
135 |   if (stopw) 
136 |     map.cmd <- sprintf(paste("%s\nmy.corpus <- tm_map(my.corpus,",
137 |                              'removeWords, stopwords("english"))'), map.cmd)
138 |   if (stemw)
139 |   {
140 |     lib.cmd <- "library(SnowballC, quietly=TRUE)"
141 |     if (! packageIsAvailable("SnowballC", "word stemming")) return(FALSE)
142 |     appendLog(packageProvides("SnowballC", "stemDocument"), lib.cmd)
143 |     eval(parse(text=lib.cmd))
144 | 
145 |     map.cmd <- sprintf("%s\nmy.corpus <- tm_map(my.corpus, stemDocument)", map.cmd)
146 |   }
147 |   
148 | 
149 |   # 111020 For now, always remove punctuation and numbers.
150 |   
151 |   map.cmd <- sprintf("%s\nmy.corpus <- tm_map(my.corpus, removePunctuation)", map.cmd)
152 |   map.cmd <- sprintf("%s\nmy.corpus <- tm_map(my.corpus, removeNumbers)", map.cmd)
153 | 
154 |   # 111020 TODO Update and include some more information.
155 | 
156 | ##   Dictionary(TermDocumentMatrix(my.corpus))
157 | 
158 | ## tdm <- TermDocumentMatrix(my.corpus, 
159 | ##                           control = list(removePunctuation = TRUE, 
160 | ##                                          removeNumbers = TRUE, 
161 | ##                                          stopwords = TRUE))
162 | 
163 | ## plot(tdm, corThreshold = 0.8, weighting = TRUE, 
164 | ##      attrs = list(graph = list(rankdir = "BT"), 
165 | ##                   node = list(shape = "circle"))) 
166 |  
167 | 
168 | ## dissimilarity(my.corpus[[1]], my.corpus[[2]], method = "eJaccard") 
169 | ## dissimilarity(tdm, method = "cosine")
170 | 
171 | ## rownames(tdm) 
172 | ## colnames(tdm) 
173 | ## dimnames(tdm) 
174 | ## Docs(tdm) 
175 | ## nTerms(tdm) 
176 | ## Terms(tdm)
177 | 
178 | ## inspect(my.corpus[1:3]) 
179 | ## tdm <- TermDocumentMatrix(my.corpus)[1:10, 1:10] 
180 | ## inspect(tdm)
181 | 
182 | ## summary(my.corpus)
183 | 
184 | ## findFreqTerms(tdm, 2, 3 )
185 | 
186 | ## removeSparseTerms(tdm,0.4)
187 | 
188 | ## searchFullText(my.corpus[[3]], "accounts")
189 | 
190 | ## termFreq(my.corpus[[1]])
191 | 
192 | 
193 |   
194 |   appendLog("Transform the documents.", sub("^\n", "", map.cmd))
195 |   setStatusBar(Rtxt("Transforming the documents"), "...")
196 |   eval(parse(text=map.cmd))
197 | 
198 |   # Convert into a keyword count dataset.
199 | 
200 |   ds.cmd <- "crs$dataset <- as.data.frame(t(as.matrix(TermDocumentMatrix(my.corpus))))"
201 |   appendLog("Convert into a dataset.", ds.cmd)
202 |   eval(parse(text=ds.cmd))
203 | 
204 |   # Add in targets if they exist.
205 | 
206 |   target.fname <- paste(location, ".target.csv", sep="/")
207 |   if (file.exists(target.fname))
208 |   {
209 |     read.cmd <- sprintf('target <- read.csv("%s", encoding="%s")',
210 |                         target.fname, crv$csv_encoding)
211 |     appendLog("Read in the targets.", read.cmd)
212 |     eval(parse(text=read.cmd))
213 | 
214 |     if (nrow(crs$dataset) != nrow(target))
215 |     {
216 |       errorDialog(Rtxt("The number of targets is different to the",
217 |                        "number of documents:"),
218 |                   sprintf("%s %s %s.", nrow(target), Rtxt("versus"), nrow(crs$dataset)),
219 |                   Rtxt("You may need to update the file"),
220 |                   target.fname,
221 |                   Rtxt("to match the number of documents in the corpus."))
222 |       return(FALSE)
223 |     }
224 |     
225 |     target.cmd <- "crs$dataset <- cbind(crs$dataset, TARGET=target[[2]])"
226 |     appendLog("Add the targets to the dataset.", target.cmd)
227 |     eval(parse(text=target.cmd))
228 |   }
229 | 
230 |   # Set the title and dataname correctly.
231 | 
232 |   crs$dataname <- basename(location)
233 |   setMainTitle(crs$dataname)
234 | 
235 |   # For now, always succeed.
236 |   
237 |   setStatusBar(Rtxt("Corpus has been loaded from the documents in"),
238 |                location,
239 |                ifelse(file.exists(target.fname),
240 |                       paste(Rtxt("with targets from"), ".target.csv"),
241 |                       ""))
242 | 
243 |   return(TRUE)
244 | }
245 | 
246 |   
247 | 


--------------------------------------------------------------------------------
/R/unloadLibs.R:
--------------------------------------------------------------------------------
 1 | #' Unload pacakges
 2 | #'
 3 | #' Detach the list of pacakges, only detaching those that are on the
 4 | #' search path.
 5 | #'
 6 | #' @param l Vector of package names.
 7 | #' @return nothing.
 8 | #' @rdname unloadLibs
 9 | unloadLibs <- function(l)
10 | {
11 |   for (p in l)
12 |   {
13 |     pn <- sprintf("package:%s", p)
14 |     if (pn %in% search()) detach(pn, character.only=TRUE)
15 |   }
16 |   invisible()
17 | }
18 |   
19 | 


--------------------------------------------------------------------------------
/R/xgboostFormula.R:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | ## Title: Define functions to enable the formula form of xgboost model
  3 | ## Author: Zhou Fang, Data Scientist, Microsoft
  4 | ## Date: 11-05-2017
  5 | ## Rework the implementation: Graham Williams
  6 | ## Date: 20170710
  7 | ## Function names:
  8 | ## xgboost.formula
  9 | ## xgb.importance.formula
 10 | ## predict.xgboost.formula
 11 | ###########################################################################
 12 | 
 13 | xgboost <- function(...) UseMethod("xgboost")
 14 | 
 15 | xgboost.formula <- function(form, data, nrounds=100, na.action=na.omit, ...)
 16 | {
 17 |   # FOR NOW ASSUME BINARY CLASSIFICATION TASK ONLY FIXME
 18 | 
 19 |   # Perform the NA action and note the missing observations.
 20 | 
 21 |   nads <- data %>% na.action()
 22 |   miss <- nads %>% attr("na.action") %>% as.vector() # Assume na.omit() FIXME
 23 | 
 24 |   # Create a sparse matrix from the supplied dataset. This will turn
 25 |   # categoricals into indictor variables.
 26 | 
 27 |   sds <- Matrix::sparse.model.matrix(form, data=nads)
 28 |  
 29 |   # Create the target vector.
 30 | 
 31 |   form %>%
 32 |     all.vars() %>%
 33 |     magrittr::extract(1) ->
 34 |   target
 35 | 
 36 |   # Make sure the target is a factor then convert to 0/1.
 37 |   
 38 |   data[[target]] %>%
 39 |     as.factor() %>%
 40 |     as.integer() %>%
 41 |     magrittr::subtract(1) ->
 42 |   label
 43 | 
 44 |   if (! is.null(miss)) label <- label[-miss]
 45 |   
 46 |   # Train xgboost model.  Note the use of print_every_n. I tried
 47 |   # verbose=0 but then there is no cb.evaluation.log produced and so
 48 |   # don't get the extra information we need. So use a big value for n
 49 |   # to aim for first and last iterations.
 50 |   
 51 |   model <- xgboost::xgboost(data          = sds,
 52 |                             label         = label,
 53 |                             nrounds       = nrounds,
 54 |                             print_every_n = 1000,
 55 |                             ...)
 56 | 
 57 |   # Record the actual formula and the final list of features for later
 58 |   # usage.
 59 |   
 60 |   model$formula  <- form
 61 |   model$dimnames <- sds@Dimnames[[2]]
 62 | 
 63 |   # Add extra class for the formula based model.
 64 | 
 65 |   class(model) <- c("xgb.formula", class(model))
 66 |   
 67 |   return(model)
 68 | }
 69 | 
 70 | importance <- function(...) UseMethod("importance")
 71 | 
 72 | importance.xgb.formula <- function(model, data, ...)
 73 | {
 74 |   # Remove the local class so xgboost is not confused.
 75 | 
 76 |   class(model) %<>% setdiff("xgb.formula")
 77 |   
 78 |   # Calculate the feature importance.
 79 | 
 80 |   imp <- xgboost::xgb.importance(feature_names=model$dimnames, model=model, ...) 
 81 |   
 82 |   return(imp)
 83 | }
 84 | 
 85 | predict.xgb.formula <- function(object, newdata, ...)
 86 | {
 87 |   # 20171029 FIXME needs to be able to run without providing a target
 88 |   # variable column in the dataset.
 89 |   
 90 |   # Transform to model matrix of just the variables required based on
 91 |   # the formula.
 92 |   
 93 |   mf <- model.frame(object$formula, data=newdata)
 94 |   vars <- attr(attr(mf, "terms"), "term.labels")
 95 |   x  <- model.matrix(attr(mf, "terms"), data=mf) 
 96 |   na <- attr(mf, "na.action") %>% as.vector()
 97 |   
 98 |   # Convert the data into a sparse matrix as required for
 99 |   # predict.xgb.Booster().
100 |   
101 |   x <- Matrix::Matrix(x, sparse=TRUE)
102 |   
103 |   # Remove our local xgb.formula class so that predict will use the
104 |   # appropriate xgboost:: method. Otherwise xgboost includes a test
105 |   # for == class() rather than %in% class() and fails.
106 |   
107 |   class(object) %<>% setdiff("xgb.formula")
108 |   
109 |   # Predict on the new data.
110 | 
111 |   pr <- predict(object, newdata=x, ...)
112 | 
113 |   # Splice the missing observations as NA predicitons into the
114 |   # result. Is there a splice function? Note the boundary conditions.
115 | 
116 |   for (i in na)
117 |     if (i > length(pr))
118 |       pr <- c(pr, NA)
119 |     else
120 |       pr <- c(pr[1:i-1], NA, pr[i:length(pr)])
121 |   
122 |   return(pr)
123 | }
124 | 
125 | print.xgb.formula <- function(model, ...)
126 | {
127 |   # Remove the local class so xgboost is not confused.
128 | 
129 |   class(model) %<>% setdiff("xgb.formula")
130 | 
131 |   print(model, ...)
132 | }
133 | 


--------------------------------------------------------------------------------
/build/vignette.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/build/vignette.rds


--------------------------------------------------------------------------------
/data/audit.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/data/audit.RData


--------------------------------------------------------------------------------
/data/locationsAUS.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/data/locationsAUS.RData


--------------------------------------------------------------------------------
/data/weather.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/data/weather.RData


--------------------------------------------------------------------------------
/data/weatherAUS.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/data/weatherAUS.RData


--------------------------------------------------------------------------------
/data/wine.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/data/wine.RData


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("Please cite the 'rattle' package in publications using:")
 2 | 
 3 | year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date)
 4 | vers <- paste("R package version", meta$Version)
 5 | 
 6 | citEntry(entry="Book",
 7 | 	title=paste("Data Mining with {Rattle} and {R}:",
 8 |                     "The art of excavating data for knowledge discovery"),
 9 | 	author=personList(as.person("Graham J. Williams")),
10 | 	publisher="Springer",
11 | 	series="Use R!",
12 | 	year="2011",
13 | 	url="https://rd.springer.com/book/10.1007/978-1-4419-9890-3",
14 | 	textVersion=
15 | 	paste("Williams, G. J. (2011), Data Mining with Rattle and R: ",
16 | 	      "The Art of Excavating Data for Knowledge Discovery, ",
17 | 	      "Use R!, Springer.",
18 | 	      sep=""))
19 | 
20 | 


--------------------------------------------------------------------------------
/inst/csv/dvdtrans.csv:
--------------------------------------------------------------------------------
 1 | ID,Item
 2 | 1,Sixth Sense
 3 | 1,LOTR1
 4 | 1,Harry Potter1
 5 | 1,Green Mile
 6 | 1,LOTR2
 7 | 2,Gladiator
 8 | 2,Patriot
 9 | 2,Braveheart
10 | 3,LOTR1
11 | 3,LOTR2
12 | 4,Gladiator
13 | 4,Patriot
14 | 4,Sixth Sense
15 | 5,Gladiator
16 | 5,Patriot
17 | 5,Sixth Sense
18 | 6,Gladiator
19 | 6,Patriot
20 | 6,Sixth Sense
21 | 7,Harry Potter1
22 | 7,Harry Potter2
23 | 8,Gladiator
24 | 8,Patriot
25 | 9,Gladiator
26 | 9,Patriot
27 | 9,Sixth Sense
28 | 10,Sixth Sense
29 | 10,LOTR
30 | 10,Gladiator
31 | 10,Green Mile
32 | 


--------------------------------------------------------------------------------
/inst/doc/rattle.R:
--------------------------------------------------------------------------------
 1 | ### R code from vignette source 'rattle.Rnw'
 2 | 
 3 | ###################################################
 4 | ### code chunk number 1: install (eval = FALSE)
 5 | ###################################################
 6 | ## install.packages("rattle", dependencies=c("Depends", "Suggests"))
 7 | 
 8 | 
 9 | ###################################################
10 | ### code chunk number 2: install_togaware (eval = FALSE)
11 | ###################################################
12 | ## install.packages("rattle", repos="https://rattle.togaware.com", type="source")
13 | 
14 | 
15 | ###################################################
16 | ### code chunk number 3: start_up (eval = FALSE)
17 | ###################################################
18 | ## library(rattle)
19 | ## rattle()
20 | 
21 | 
22 | ###################################################
23 | ### code chunk number 4: rattle.Rnw:149-150 (eval = FALSE)
24 | ###################################################
25 | ## source("~/weather_script.R")
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/inst/doc/rattle.Rnw:
--------------------------------------------------------------------------------
  1 | % \VignetteIndexEntry{Rattle Quick Start Guide}
  2 | % \VignetteDepends{rattle}
  3 | % \VignetteKeywords{data mining}
  4 | % \VignettePackage{rattle}
  5 | \documentclass[12pt]{article}
  6 | \usepackage{amsmath}
  7 | \usepackage[pdftex]{graphicx}
  8 | \usepackage{color}
  9 | \usepackage{xspace}
 10 | \usepackage{fancyvrb}
 11 | \usepackage{fancyhdr}
 12 | \usepackage{lastpage}
 13 | \usepackage{algorithm2e}
 14 | \usepackage[
 15 |          colorlinks=true,
 16 |          linkcolor=blue,
 17 |          citecolor=blue,
 18 |          urlcolor=blue]
 19 |          {hyperref}
 20 | \usepackage{Sweave}         
 21 | 
 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 23 | 
 24 | % define new colors for use
 25 | \definecolor{darkgreen}{rgb}{0,0.6,0}
 26 | \definecolor{darkred}{rgb}{0.6,0.0,0}
 27 | \definecolor{lightbrown}{rgb}{1,0.9,0.8}
 28 | \definecolor{brown}{rgb}{0.6,0.3,0.3}
 29 | \definecolor{darkblue}{rgb}{0,0,0.8}
 30 | \definecolor{darkmagenta}{rgb}{0.5,0,0.5}
 31 | 
 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 33 | 
 34 | \newcommand{\Rattle}{\textsf{Rattle}\xspace}
 35 | \newcommand{\pkg}[1]{{\tt #1}\xspace}
 36 | 
 37 | \setlength{\oddsidemargin}{-.25 truein}
 38 | \setlength{\evensidemargin}{0truein}
 39 | \setlength{\topmargin}{-0.2truein}
 40 | \setlength{\textwidth}{7 truein}
 41 | \setlength{\textheight}{8.5 truein}
 42 | \setlength{\parindent}{0.20truein}
 43 | \setlength{\parskip}{0.10truein}
 44 | 
 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 46 | \pagestyle{fancy}
 47 | \lhead{}
 48 | \chead{Rattle}
 49 | \rhead{}
 50 | \lfoot{}
 51 | \cfoot{}
 52 | \rfoot{\thepage\ of \pageref{LastPage}}
 53 | \renewcommand{\headrulewidth}{1pt}
 54 | \renewcommand{\footrulewidth}{1pt}
 55 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 56 | 
 57 | \title{The Rattle Package: Quick Start Guide}
 58 | \author{Graham Williams \\ Graham.Williams@togaware.com}
 59 | 
 60 | \begin{document}
 61 | 
 62 | \maketitle
 63 | 
 64 | \thispagestyle{empty}
 65 | 	
 66 | \section{Introduction}
 67 | 
 68 | \Rattle (Williams, 2011) is a package written in R providing a
 69 | graphical user interface to very many other R packages that provide
 70 | functionality for data mining.
 71 | 
 72 | This quick start guide is under development. See
 73 | \url{https://rattle.togaware.com} for extensive documentation
 74 | 
 75 | \section{Requirements}
 76 | 
 77 | \Rattle depends on over 40 other R packages and a couple of other
 78 | software applications/libraries that are independent of R. The first
 79 | thing to ensure is that you have installed the GTK+ libraries and the
 80 | GGobi application. This is operating system dependent and full
 81 | installation instructions are available from \url{https://rattle.togaware.com/}.
 82 | 
 83 | Only a couple of R packages are dependencies for \Rattle. Most are
 84 | suggestions, but without them functionality is quite limited. At a
 85 | minimum it is useful to ensure you have the
 86 | \href{https://cran.r-project.org/package=RGtk2}{\pkg{RGtk2}} package
 87 | installed. Others that you might like to install include:
 88 | \href{https://cran.r-project.org/package=ada}{\pkg{ada}},
 89 | \href{https://cran.r-project.org/package=arules}{\pkg{arules}},
 90 | \href{https://cran.r-project.org/package=doBy}{\pkg{doBy}},
 91 | \href{https://cran.r-project.org/package=ellipse}{\pkg{ellipse}},
 92 | \href{https://cran.r-project.org/package=fBasics}{\pkg{fBasics}},
 93 | \href{https://cran.r-project.org/package=fpc}{\pkg{fpc}},
 94 | \href{https://cran.r-project.org/package=gplots}{\pkg{gplots}},
 95 | \href{https://cran.r-project.org/package=Hmisc}{\pkg{Hmisc}},
 96 | \href{https://cran.r-project.org/package=kernlab}{\pkg{kernlab}},
 97 | \href{https://cran.r-project.org/package=mice}{\pkg{mice}},
 98 | \href{https://cran.r-project.org/package=party}{\pkg{party}},
 99 | \href{https://cran.r-project.org/package=playwith}{\pkg{playwith}},
100 | \href{https://cran.r-project.org/package=pmml}{\pkg{pmml}},
101 | \href{https://cran.r-project.org/package=randomForest}{\pkg{randomForest}},
102 | \href{https://cran.r-project.org/package=reshape}{\pkg{reshape}},
103 | \href{https://cran.r-project.org/package=rggobi}{\pkg{rggobi}},
104 | \href{https://cran.r-project.org/package=RGtk2}{\pkg{RGtk2}},
105 | \href{https://cran.r-project.org/package=ROCR}{\pkg{ROCR}},
106 | \href{https://cran.r-project.org/package=RODBC}{\pkg{RODBC}}, and
107 | \href{https://cran.r-project.org/package=rpart}{\pkg{rpart}}.
108 | 
109 | The packages will usually be installed with the following command:
110 | 
111 | <<install, eval=FALSE>>=
112 | install.packages("rattle", dependencies=c("Depends", "Suggests"))
113 | @ 
114 | 
115 | The latest beta version of rattle is available from
116 | \url{https://rattle.togaware.com/}:
117 | 
118 | <<install_togaware, eval=FALSE>>=
119 | install.packages("rattle", repos="https://rattle.togaware.com", type="source")
120 | @ 
121 | 
122 | \section{First Steps}
123 | 
124 | Start up rattle:
125 | <<start_up, eval=FALSE>>=
126 | library(rattle)
127 | rattle()
128 | @ 
129 | 
130 | \section{Sipmle Scenario: Build a Couple of Models}
131 | 
132 | \begin{enumerate}
133 | \item Click Execute
134 | \item Click Yes (load the sample weather dataset)
135 | \item Click the Model tab
136 | \item Click Execute (to build a decision tree)
137 | \item Click Draw to display the decision tree (loads other packages as required)
138 | \item Click the Forest radio button
139 | \item Click Execute (to build a random forest - loads packages as required)
140 | \item Click the Evaluate tab
141 | \item Click the Risk radio button (installs packages as required)
142 | \item Click Execute to display two Risk (Cummulative) performance plots
143 | \item Click the Log tab
144 | \item Click the Export button to save script to file weather\_script.R to home folder
145 | \end{enumerate}
146 | 
147 | Now exit from R (and rattle) and start R up again.
148 | 
149 | <<eval=FALSE>>=
150 | source("~/weather_script.R")
151 | @ 
152 | 
153 | This will rerun everything that was done in the GUI session but purely as a script.
154 | 
155 | \section{References}
156 | 
157 | \begin{description}
158 | \item Williams, G. J. (2009). {\em Rattle: A Data Mining GUI for R}.
159 |   The R Journal, 1(2), 45-55. URL:
160 |   \href{https://journal.r-project.org/archive/2009-2/RJournal_2009-2_Williams.pdf}
161 |   {https://journal.r-project.org/archive/2009-2/RJournal\_2009-2\_Williams.pdf}.
162 | \item Williams, G. J. (2011). {\em Data Mining with Rattle and R: The
163 |     Art of Excavating Data for Knowledge Discovery}. Use R!
164 |   series. Springer. \href{https://bit.ly/rattle_data_mining}{https://bit.ly/rattle\_data\_mining}.
165 | \end{description}
166 | 
167 | \end{document}
168 | 


--------------------------------------------------------------------------------
/inst/doc/rattle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/doc/rattle.pdf


--------------------------------------------------------------------------------
/inst/etc/Rlogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/etc/Rlogo.png


--------------------------------------------------------------------------------
/inst/extdata/audit.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/extdata/audit.xlsx


--------------------------------------------------------------------------------
/inst/odt/data_summary.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/odt/data_summary.odt


--------------------------------------------------------------------------------
/inst/po/de/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/de/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/inst/po/es/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/es/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/inst/po/fr/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/fr/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/inst/po/id/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/id/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/inst/po/ja/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/ja/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/inst/po/no/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/no/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/inst/po/zh_CN/LC_MESSAGES/R-rattle.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/rattle/3875c10d0ae6c7a499d918bc501e121861067e06/inst/po/zh_CN/LC_MESSAGES/R-rattle.mo


--------------------------------------------------------------------------------
/man/acquireAuditData.Rd:
--------------------------------------------------------------------------------
 1 | \name{acquireAuditData}
 2 | 
 3 | \alias{acquireAuditData}
 4 | 
 5 | \title{Generate the audit dataset.}
 6 | 
 7 | \description{
 8 | 
 9 |   Rattle uses an artificial dataset for demonstration purposes. This
10 |   function retrieves the source data
11 |   \url{https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data}
12 |   and then transforms the data in a variety of ways.
13 |   
14 | }
15 | 
16 | \usage{
17 | acquireAuditData(write.to.file=FALSE)
18 | }
19 | 
20 | \arguments{
21 | 
22 |   \item{write.to.file}{Whether to generate a colleciton of files based
23 |     on the data. The files generated include: audit.csv, audit.Rdata,
24 |     audit.arf, and audit\_missing.csv}
25 | 
26 | }
27 | 
28 | \details{
29 | 
30 |   See the function definition for details of the processing done on the
31 |   data downloaded from the UCI repository.
32 | 
33 | }
34 | 
35 | \value{
36 | 
37 |   By default the function returns a data frame containing the audit
38 |   dataset. If write.to.file is TRUE then the data frame is returned
39 |   invisibly.
40 | 
41 | }
42 | 
43 | \references{Package home page: \url{https://rattle.togaware.com}}
44 | 
45 | \author{\email{Graham.Williams@togaware.com}}
46 | 
47 | \seealso{
48 | 
49 |   \code{\link{audit}}, \code{\link{rattle}}.
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/man/asRules.Rd:
--------------------------------------------------------------------------------
 1 | \name{asRules}
 2 | \alias{asRules}
 3 | \title{
 4 |   List the rules corresponding to the rpart decision tree
 5 | }
 6 | \description{
 7 |   
 8 |   Display a list of rules for an rpart decision tree.
 9 |   
10 | }
11 | \usage{
12 | asRules(model, compact=FALSE, \dots)
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{an rpart model.}
17 | 
18 |   \item{compact}{whether to list cateogricals compactly.}
19 | 
20 |   \item{\dots}{further arguments passed to or from other methods.}
21 |   
22 | }
23 | \details{
24 | 
25 |   Traverse a decision tree to generate the equivalent set of rules, one
26 |   rule for each path from the root node to a leaf node.
27 | 
28 | }
29 | \references{Package home page: \url{https://rattle.togaware.com}}
30 | \author{\email{Graham.Williams@togaware.com}}
31 | \examples{
32 | \dontrun{asRules.rpart(my.rpart)}
33 | }
34 | \keyword{tree}
35 | 


--------------------------------------------------------------------------------
/man/asRules.rpart.Rd:
--------------------------------------------------------------------------------
 1 | \name{asRules.rpart}
 2 | \alias{asRules.rpart}
 3 | \title{
 4 |   List the rules corresponding to the rpart decision tree
 5 | }
 6 | \description{
 7 |   
 8 |   Display a list of rules for an rpart decision tree.
 9 |   
10 | }
11 | \usage{
12 | \method{asRules}{rpart}(model, compact=FALSE, classes=NULL, \dots)
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{an rpart model.}
17 | 
18 |   \item{compact}{whether to list cateogricals compactly (default FALSE).}
19 | 
20 |   \item{classes}{which target classes should be listed (default all).}
21 | 
22 |   \item{\dots}{further arguments passed to or from other methods.}
23 |   
24 | }
25 | \details{
26 | 
27 |   Traverse a decision tree to generate the equivalent set of rules, one
28 |   rule for each path from the root node to a leaf node.
29 | 
30 | }
31 | \references{Package home page: \url{https://rattle.togaware.com}}
32 | \author{\email{Graham.Williams@togaware.com}}
33 | \examples{
34 | \dontrun{asRules.rpart(my.rpart)}
35 | }
36 | \keyword{tree}
37 | 


--------------------------------------------------------------------------------
/man/audit.Rd:
--------------------------------------------------------------------------------
 1 | \name{audit}
 2 | \docType{data}
 3 | \alias{audit}
 4 | \title{Sample dataset to illustrate Rattle functionality.}
 5 | \description{
 6 |   
 7 |   The audit dataset is an artificially constructed dataset that has some
 8 |   of the characteristics of a true financial audit dataset for modelling
 9 |   productive and non-productive audits of a person's financial
10 |   statement. A productive audit is one which identifies errors or
11 |   inaccuracies in the information provided by a client. A non-productive
12 |   audit is usually an audit which found all supplied information to be
13 |   in order.
14 | 
15 |   The audit dataset is used to illustrate binary classification.  The
16 |   target variable is identified as \code{TARGET\_Adjusted}.
17 | 
18 |   The dataset is quite small, consisting of just 2000 entities. Its
19 |   primary purpose is to illustrate modelling in Rattle, so a minimally
20 |   sized dataset is suitable.
21 | 
22 |   The dataset itself is derived from publicly available data (which has
23 |   nothing to do with audits).
24 | 
25 | }
26 | 
27 | \format{
28 | 
29 |   A data frame. In line with data mining terminology we refer to the
30 |   rows of the data frame (or the observations) as entities. The columns
31 |   are refered to as variables.  The entities represent people in this
32 |   case. We describe the variables here:
33 | 
34 |   \describe{
35 |     
36 |     \item{\code{ID}}{This is a unique identifier for each person.}
37 | 
38 |     \item{\code{Age}}{The age.}
39 |     
40 |     \item{\code{Employment}}{The type of employment.}
41 | 
42 |     \item{\code{Education}}{The highest level of education.}
43 |     
44 |     \item{\code{Marital}}{Current marital status.}
45 |     
46 |     \item{\code{Occupation}}{The type of occupation.}
47 | 
48 |     \item{\code{Income}}{The amount of income declared.}
49 | 
50 |     \item{\code{Gender}}{The persons gender.}
51 | 
52 |     \item{\code{Deductions}}{Total amount of expenses that a person
53 |       claims in their financial statement.}
54 | 
55 |     \item{\code{Hours}}{The average hours worked on a weekly basis.}
56 | 
57 |     \item{\code{IGNORE_Accounts}}{The main country in which the person
58 |       has most of their money banked. Note that the variable name is
59 |       prefixed with IGNORE. This is recognised by Rattle as the default
60 |       role for this variable.}
61 | 
62 |     \item{\code{RISK_Adjustment}}{This variable records the monetary
63 |       amount of any adjustment to the person's financial claims as a
64 |       result of a productive audit. This variable, which should not be
65 |       treated as an input variable, is thus a measure of the size of the
66 |       risk associated with the person.}
67 | 
68 |     \item{\code{TARGET_Adjusted}}{The target variable for modelling
69 |       (generally for classification modelling). This is a numeric field
70 |       of class integer, but limited to 0 and 1, indicating
71 |       non-productive and productive audits, respectively. Productive
72 |       audits are those that result in an adjustment being made to a
73 |       client's financial statement.}
74 | 
75 |   }
76 | 
77 | }
78 | \keyword{datasets}
79 | 


--------------------------------------------------------------------------------
/man/binning.Rd:
--------------------------------------------------------------------------------
 1 | \name{binning}
 2 | \alias{binning}
 3 | \title{
 4 |   Perform binning over numeric data
 5 | }
 6 | \description{
 7 |   
 8 |   Perform binning.
 9 |   
10 | }
11 | \usage{
12 | binning(x, bins=4, method=c("quantile", "wtd.quantile", "kmeans"),
13 |                      labels=NULL, ordered=TRUE, weights=NULL)
14 | }
15 | \arguments{
16 |   
17 |   \item{x}{the numeric data to bin.}
18 | 
19 |   \item{bins}{the number of bins to use.}
20 | 
21 |   \item{method}{whether to use "quantile", weighted quantile
22 |   "wtd.quantile" or "kmeans" binning.}
23 | 
24 |   \item{labels}{the labels or names to use for each of the bins.}
25 | 
26 |   \item{ordered}{whether to build an ordered factor or not.}
27 | 
28 |   \item{weights}{vector of numeric weights for each observation for
29 |   weighted quantile binning.}
30 |   
31 | }
32 | \details{
33 | 
34 |   Bin the provided nmeric data into the specified number of bins using
35 |   one of the supported methods. The bins will have the names specified
36 |   by labels, if supplied. The result can optionally be an ordered
37 |   factor.
38 |   
39 | }
40 | \value{A factor is returned.}
41 | \references{Package home page: \url{https://rattle.togaware.com}}
42 | \author{Daniele Medri and Graham Williams}
43 | 


--------------------------------------------------------------------------------
/man/calcInitialDigitDistr.Rd:
--------------------------------------------------------------------------------
 1 | \name{calcInitialDigitDistr}
 2 | \alias{calcInitialDigitDistr}
 3 | \title{
 4 |   Generate a frequency count of the initial digits
 5 | }
 6 | \description{
 7 |   
 8 |   In the context of Benford's Law calculate the distribution of the
 9 |   frequencies of the first digit of the numbers supplied as the
10 |   argument.
11 | 
12 | }
13 | \usage{
14 | calcInitialDigitDistr(l, digit=1, len=1,
15 |  sp=c("none", "positive", "negative"))
16 | }
17 | \arguments{
18 |   
19 |   \item{l}{a vector of numbers.}
20 |   \item{digit}{the digit to generate frequencies for.}
21 |   \item{len}{The number of digits.}
22 |   \item{sp}{whether and how to split the digits.}
23 | 
24 | }
25 | \references{Package home page: \url{https://rattle.togaware.com}}
26 | \author{\email{Graham.Williams@togaware.com}}
27 | \keyword{hplot}
28 | 


--------------------------------------------------------------------------------
/man/calculateAUC.Rd:
--------------------------------------------------------------------------------
 1 | \name{calculateAUC}
 2 | \alias{calculateAUC}
 3 | \title{
 4 |   Determine area under a curve (e.g. a risk or recall curve) of a risk chart
 5 | }
 6 | \description{
 7 |   
 8 |   Given the evaluation returned by evaluateRisk, for example, calculate
 9 |   the area under the risk or recall curves, to use as a metric to
10 |   compare the performance of a model.
11 |   
12 | }
13 | \usage{
14 | calculateAUC(x, y)
15 | }
16 | \arguments{
17 |   
18 |   \item{x}{a vector of values for the x points.}
19 |   \item{y}{a vector of values for the y points.}
20 | 
21 | }
22 | \details{
23 | 
24 |   The area is returned.
25 | 
26 | }
27 | \references{Package home page: \url{https://rattle.togaware.com}}
28 | \author{\email{Graham.Williams@togaware.com}}
29 | \seealso{\code{\link{evaluateRisk}}.}
30 | \examples{
31 | ## this is usually used in the context of the evaluateRisk function
32 | \dontrun{ev <- evaluateRisk(predicted, actual, risk)}
33 | 
34 | ## imitate this output here
35 | ev <- data.frame(Caseload=c(1.0, 0.8, 0.6, 0.4, 0.2, 0),
36 |                  Precision=c(0.15, 0.18, 0.21, 0.25, 0.28, 0.30),
37 |                  Recall=c(1.0, 0.95, 0.80, 0.75, 0.5, 0.0),
38 |                  Risk=c(1.0, 0.98, 0.90, 0.77, 0.30, 0.0))
39 | 
40 | ## Calculate the areas unde the Risk and the Recall curves.
41 | calculateAUC(ev$Caseload, ev$Risk)
42 | calculateAUC(ev$Caseload, ev$Recall)
43 | 
44 | }
45 | \keyword{hplot}
46 | 


--------------------------------------------------------------------------------
/man/centers.hclust.Rd:
--------------------------------------------------------------------------------
 1 | \name{centers.hclust}
 2 | \alias{centers.hclust}
 3 | \title{
 4 |   List Cluster Centers for a Hierarchical Cluster
 5 | }
 6 | \description{
 7 |   
 8 |   Generate a matrix of centers from a hierarchical cluster.
 9 |   
10 | }
11 | \usage{
12 | centers.hclust(x, object, nclust=10, use.median=FALSE)
13 | }
14 | \arguments{
15 |   
16 |   \item{x}{The data used to build the cluster.}
17 | 
18 |   \item{object}{A hclust object.}
19 | 
20 |   \item{nclust}{Number of clusters.}
21 | 
22 |   \item{use.median}{Use meadion instead of mean.}
23 | 
24 | }
25 | \details{
26 | 
27 |   For the specified number of clusters, cut the hierarchical cluster
28 |   appropriately to that number of clusters, and return the mean (or
29 |   median) of each resulting cluster.
30 | 
31 | }
32 | \references{Package home page: \url{https://rattle.togaware.com}}
33 | \author{Daniele Medri and \email{Graham.Williams@togaware.com}}
34 | \keyword{cluster}
35 | 


--------------------------------------------------------------------------------
/man/comcat.Rd:
--------------------------------------------------------------------------------
 1 | \name{comcat}
 2 | \alias{comcat}
 3 | \title{
 4 |   Echo data in a human readable form.
 5 | }
 6 | \description{
 7 |   
 8 |   Format data in the most appropriate human readable form.
 9 |   
10 | }
11 | \usage{
12 | comcat(x, ...)
13 | }
14 | \arguments{
15 |   
16 |   \item{x}{object.}
17 | 
18 |   \item{...}{additional arguments passed on to format.}
19 | 
20 | }
21 | \references{Package home page: \url{https://rattle.togaware.com}}
22 | \author{\email{Graham.Williams@togaware.com}}
23 | 
24 | \examples{
25 |   comcat(dim(iris))
26 | }
27 | 


--------------------------------------------------------------------------------
/man/drawTreeNodes.Rd:
--------------------------------------------------------------------------------
 1 | \name{drawTreeNodes}
 2 | \alias{drawTreeNodes}
 3 | \title{
 4 |   Draw nodes of a decision tree
 5 | }
 6 | \description{
 7 |   
 8 |   Draw the nodes of a decision tree
 9 |   
10 | }
11 | \usage{
12 | drawTreeNodes(tree, cex = par("cex"), pch = par("pch"),
13 |                            size = 4 * cex, col = NULL, nodeinfo = FALSE,
14 |                            units = "", cases = "obs", 
15 |                            digits = getOption("digits"),
16 |                            decimals = 2,
17 |                            print.levels = TRUE, new = TRUE) 
18 | }
19 | \arguments{
20 |   
21 |   \item{tree}{an rpart decision tree.}
22 | 
23 |   \item{cex}{.}
24 | 
25 |   \item{pch}{.}
26 | 
27 |   \item{size}{.}
28 | 
29 |   \item{col}{.}
30 | 
31 |   \item{nodeinfo}{.}
32 | 
33 |   \item{units}{.}
34 | 
35 |   \item{cases}{.}
36 | 
37 |   \item{digits}{.}
38 | 
39 |   \item{decimals}{the number of decimal digits to include in numeric
40 |     split nodes.}
41 | 
42 |   \item{print.levels}{.}
43 | 
44 |   \item{new}{.}
45 | 
46 | }
47 | \details{
48 | 
49 |   A variation of draw.tree() from the maptree package.
50 | 
51 | }
52 | \references{Package home page: \url{https://rattle.togaware.com}}
53 | \author{\email{Graham.Williams@togaware.com}, Denis White}
54 | \examples{
55 | ## this is usually used in the context of the plotRisk function
56 | \dontrun{drawTreeNodes(rpart(Species ~ ., iris))}
57 | }
58 | \keyword{hplot}
59 | 


--------------------------------------------------------------------------------
/man/drawTreesAda.Rd:
--------------------------------------------------------------------------------
 1 | \name{drawTreesAda}
 2 | \alias{drawTreesAda}
 3 | \title{
 4 |   Draw trees from an Ada model
 5 | }
 6 | \description{
 7 |   
 8 |   Using the Rattle drawTreeNodes, draw a selection of Ada trees.
 9 |   
10 | }
11 | \usage{
12 | drawTreesAda(model, trees=0, title="")
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{an ada model.}
17 | 
18 |   \item{trees}{The list of trees to draw. Use 0 to draw all trees.}
19 | 
20 |   \item{title}{An option title to add.}
21 | 
22 | }
23 | \details{
24 | 
25 |   Using Rattle's drawTreeNodes underneath, a plot for each of the
26 |   specified trees from an Ada model will be displayed.
27 | 
28 | }
29 | \references{Package home page: \url{https://rattle.togaware.com}}
30 | \author{\email{Graham.Williams@togaware.com}}
31 | \examples{
32 | \dontrun{drawTreesAda(ds.ada)}
33 | }
34 | \keyword{hplot}
35 | 


--------------------------------------------------------------------------------
/man/errorMatrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{errorMatrix}
 2 | \alias{errorMatrix}
 3 | \title{
 4 |   Generate an error matrix from actua and predicted data.
 5 | }
 6 | \description{
 7 |   An error matrix reports the true/false potisitve/negative rates.
 8 | }
 9 | \usage{
10 | errorMatrix(actual,
11 |                         predicted,
12 |                         percentage=TRUE,
13 |                         digits=ifelse(percentage,1,3),
14 |                         count=FALSE)
15 | }
16 | \arguments{
17 |   
18 |   \item{actual}{a vector of true values.}
19 |   \item{predicted}{a vector of predicted values.}
20 |   \item{percentage}{return percentages.}
21 |   \item{digits}{the number of digits to round results.}
22 |   \item{count}{return counts.}
23 | 
24 | }
25 | \references{Package home page: \url{https://rattle.togaware.com}}
26 | \author{\email{Graham.Williams@togaware.com}}
27 | 
28 | \examples{
29 |   \dontrun{errorMatrix(model)}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/evaluateRisk.Rd:
--------------------------------------------------------------------------------
 1 | \name{evaluateRisk}
 2 | \alias{evaluateRisk}
 3 | \title{
 4 |   Summarise the performance of a data mining model
 5 | }
 6 | \description{
 7 |   
 8 |   By taking predicted values, actual values, and measures of the risk
 9 |   associated with each case, generate a summary that groups the distinct
10 |   predicted values, calculating the accumulative percentage Caseload,
11 |   Recall, Risk, Precision, and Measure.
12 | 
13 | }
14 | \usage{
15 | evaluateRisk(predicted, actual, risks)
16 | }
17 | \arguments{
18 | 
19 |   \item{predicted}{a numeric vector of probabilities (between 0 and 1)
20 |   representing the probability of each entity being a 1.}
21 | 
22 |   \item{actual}{a numeric vector of classes (0 or 1).}
23 | 
24 |   \item{risks}{a numeric vector of risk (e.g., dollar amounts)
25 |   associated with each entity that has a acutal of 1.}
26 | 
27 | }
28 | \references{Package home page: \url{https://rattle.togaware.com}}
29 | \author{\email{Graham.Williams@togaware.com}}
30 | \seealso{\code{\link{plotRisk}}.}
31 | \examples{
32 | 
33 | ## simulate the data that is typical in data mining
34 | 
35 | ## we often have only a small number of positive known case
36 | cases <- 1000
37 | actual <- as.integer(rnorm(cases) > 1)
38 | adjusted <- sum(actual)
39 | nfa <- cases - adjusted
40 | 
41 | ## risks might be dollar values associated adjusted cases
42 | risks <- rep(0, cases)
43 | risks[actual==1] <- round(abs(rnorm(adjusted, 10000, 5000)), 2)
44 | 
45 | ## our models will generated a probability of a case being a 1
46 | predicted <- rep(0.1, cases) 
47 | predicted[actual==1] <- predicted[actual==1] + rnorm(adjusted, 0.3, 0.1)
48 | predicted[actual==0] <- predicted[actual==0] + rnorm(nfa, 0.1, 0.08)
49 | predicted <- signif(predicted)
50 | 
51 | ## call upon evaluateRisk to generate performance summary
52 | ev <- evaluateRisk(predicted, actual, risks)
53 | 
54 | ## have a look at the first few and last few
55 | head(ev)
56 | tail(ev)
57 | 
58 | ## the performance is usually presented as a Risk Chart
59 | ## under the CRAN MS/Windows this causes a problem, so don't run for now
60 | \dontrun{plotRisk(ev$Caseload, ev$Precision, ev$Recall, ev$Risk)}
61 | }
62 | \keyword{dplot}
63 | 


--------------------------------------------------------------------------------
/man/fancyRpartPlot.Rd:
--------------------------------------------------------------------------------
 1 | \name{fancyRpartPlot}
 2 | \alias{fancyRpartPlot}
 3 | \title{
 4 |   A wrapper for plotting rpart trees using prp
 5 | }
 6 | \description{
 7 |   
 8 |   Plots a fancy RPart decision tree using the pretty rpart plotter.
 9 |   
10 | }
11 | \usage{
12 | fancyRpartPlot(model, main="", sub, caption, palettes, type=2, ...)
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{an rpart object.}
17 | 
18 |   \item{main}{title for the plot.}
19 | 
20 |   \item{sub}{sub title for the plot. The default is a Rattle string with
21 |   date, time and username.}
22 | 
23 |   \item{caption}{caption for bottom right of plot.}
24 | 
25 |   \item{palettes}{a list of sequential palettes names. As supported by
26 |   RColorBrewer::brewer.pal the available names are Blues BuGn BuPu GnBu
27 |   Greens Greys Oranges OrRd PuBu PuBuGn PuRd Purples RdPu Reds YlGn
28 |   YlGnBu YlOrBr YlOrRd.}
29 | 
30 |   \item{type}{the type of plot to generate (2).}
31 | 
32 |   \item{...}{additional arguments passed on to prp.}
33 | 
34 | }
35 | \references{Package home page: \url{https://rattle.togaware.com}}
36 | \author{\email{Graham.Williams@togaware.com}}
37 | 
38 | \examples{
39 | ## Use rpart to build a decision tree.
40 | 
41 | \dontrun{library(rpart)
42 | 
43 | ## Set up the data for modelling.
44 | 
45 | set.seed(42)
46 | ds     <- weather
47 | target <- "RainTomorrow"
48 | risk   <- "RISK_MM"
49 | ignore <- c("Date", "Location", risk)
50 | vars   <- setdiff(names(ds), ignore)
51 | nobs   <- nrow(ds)
52 | form   <- formula(paste(target, "~ ."))
53 | train  <- sample(nobs, 0.7*nobs)
54 | test   <- setdiff(seq_len(nobs), train)
55 | actual <- ds[test, target]
56 | risks  <- ds[test, risk]
57 | 
58 | # Fit the model.
59 | 
60 | fit <- rpart(form, data=ds[train, vars])
61 | 
62 | ## Plot the model.
63 | 
64 | fancyRpartPlot(fit)
65 | 
66 | ## Choose different colours.
67 | 
68 | fancyRpartPlot(fit, palettes=c("Greys", "Oranges"))
69 | 
70 | ## Add a main title to the plot.
71 | 
72 | fancyRpartPlot(fit, main=target) 
73 | 
74 | }}
75 | \keyword{hplot}
76 | 


--------------------------------------------------------------------------------
/man/genPlotTitleCmd.Rd:
--------------------------------------------------------------------------------
 1 | \name{genPlotTitleCmd}
 2 | \alias{genPlotTitleCmd}
 3 | \title{
 4 |   Generate a string to add a title to a plot
 5 | }
 6 | \description{
 7 |   
 8 |   Generate a string that is intended to be \code{\link{eval}}'d that
 9 |   will add a title and sub-title to a plot. The string is a call to
10 |   \code{\link{title}}, supplying the given arguments,
11 |   \code{\link{paste}}d together, as the main title, and generating a
12 |   sub-title that begins with `Rattle' and continues with the current date
13 |   and time, and finishes with the current user's username. This is used
14 |   internally in Rattle to adorn a plot with relevant information, but
15 |   may be useful outside of Rattle.
16 | 
17 | }
18 | \usage{
19 | genPlotTitleCmd(..., vector=FALSE)
20 | }
21 | \arguments{
22 |   \item{...}{one or more strings that will be pasted together to form
23 |     the main title.}
24 |   \item{vector}{whether to return a vector as the result.}
25 | }
26 | \references{Package home page: \url{https://rattle.togaware.com}}
27 | \author{\email{Graham.Williams@togaware.com}}
28 | \seealso{
29 | 
30 |   \code{\link{eval}}, \code{\link{title}}, \code{\link{plotRisk}}.
31 | }
32 | \examples{
33 | # generate some random plot
34 | plot(rnorm(100))
35 | 
36 | # generate the string representing the command to add titles
37 | tl <- genPlotTitleCmd("Sample Plot of", "No Particular Importance")
38 | 
39 | # cause the string to be executed as an R command
40 | eval(parse(text=tl))
41 | }
42 | \keyword{aplot}
43 | 


--------------------------------------------------------------------------------
/man/ggVarImp.Rd:
--------------------------------------------------------------------------------
 1 | \name{ggVarImp}
 2 | \alias{ggVarImp}
 3 | \title{
 4 |   Model.
 5 | }
 6 | \description{
 7 |   
 8 |   Model.
 9 |   
10 | }
11 | \usage{
12 | ggVarImp(model, ...)
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{object.}
17 | 
18 |   \item{...}{arguments passed on.}
19 | 
20 | }
21 | \references{Package home page: \url{https://rattle.togaware.com}}
22 | \author{\email{Graham.Williams@togaware.com}}
23 | 
24 | \examples{
25 | \dontrun{ggVarImp(model)}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/grouper.Rd:
--------------------------------------------------------------------------------
 1 | \name{rescale.by.group}
 2 | \alias{rescale.by.group}
 3 | \title{
 4 | 
 5 |   Transform a numeric vector by grouping it according to the values of
 6 |   the supplied factor and then rescaling within the groups.
 7 | 
 8 | }
 9 | \description{
10 | 
11 |   The numeric vector is remapped to integers from 0 to max-1, with any
12 |   missing values mapped to the midpoint. Original idea from Tony
13 |   Nolan. This will eventually be generalised to do the remapping using
14 |   any of the rescaling functions.
15 | 
16 | }
17 | \usage{
18 | rescale.by.group(x, by=NULL, type = "irank", itop = 100)
19 | }
20 | \arguments{
21 |   
22 |   \item{x}{The numeric vector to rescale.}
23 | 
24 |   \item{by}{A factor of the same length as x used to define the groups.}
25 | 
26 |   \item{type}{The type of rescaling to perform.}
27 | 
28 |   \item{itop}{For an integer remapping this is the number of groups, so
29 |     that the numeric values are maped to the integers from 0 to (max-1).}
30 | 
31 | }
32 | \details{
33 | 
34 |   This Rattle support function, which is also useful by itself, provides
35 |   a simple mechanism to rescale a numeric variable. Several rescalings
36 |   are possible. The rescaling is done by first grouping the observations
37 |   according to the by argument.
38 |   
39 | }
40 | 
41 | \references{Package home page: \url{https://rattle.togaware.com}}
42 | 
43 | \author{\email{Graham.Williams@togaware.com}}
44 | 
45 | \seealso{
46 | 
47 |   \code{\link{rattle}}.
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/man/listAdaVarsUsed.Rd:
--------------------------------------------------------------------------------
 1 | \name{listAdaVarsUsed}
 2 | \alias{listAdaVarsUsed}
 3 | \title{
 4 |   List the variables used by an adaboost model
 5 | }
 6 | \description{
 7 |   
 8 |   Returns a list of the variables used and their frequencies.
 9 |   
10 | }
11 | \usage{
12 | listAdaVarsUsed(model)
13 | 
14 | }
15 | \arguments{
16 |   
17 |   \item{model}{an rpart object.}
18 | 
19 | }
20 | \references{Package home page: \url{https://rattle.togaware.com}}
21 | \author{\email{Graham.Williams@togaware.com}}
22 | 


--------------------------------------------------------------------------------
/man/listTreesAda.Rd:
--------------------------------------------------------------------------------
 1 | \name{listTreesAda}
 2 | \alias{listTreesAda}
 3 | \title{
 4 |   List trees from an Ada model
 5 | }
 6 | \description{
 7 |   
 8 |   Display the textual representation of a selection of Ada trees.
 9 |   
10 | }
11 | \usage{
12 | listTreesAda(model, trees=0)
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{an ada model.}
17 | 
18 |   \item{trees}{The list of trees to list. Use 0 to list all trees.}
19 | 
20 | }
21 | \details{
22 | 
23 |   Using rpart's print method display each of the specified trees from an
24 |   Ada model.
25 | 
26 | }
27 | \references{Package home page: \url{https://rattle.togaware.com}}
28 | \author{\email{Graham.Williams@togaware.com}}
29 | \examples{
30 | \dontrun{listTreesAda(ds.ada)}
31 | }
32 | \keyword{hplot}
33 | 


--------------------------------------------------------------------------------
/man/listVersions.Rd:
--------------------------------------------------------------------------------
 1 | \name{listVersions}
 2 | \alias{listVersions}
 3 | \title{
 4 |   
 5 |   Versions of Installed Packages
 6 |   
 7 | }
 8 | \description{
 9 | 
10 |   Generate a list of packages installed and their version number.
11 | 
12 | }
13 | \usage{
14 | 
15 | listVersions(file="", ...)
16 | 
17 | }
18 | \arguments{
19 | 
20 |   \item{file}{a character string naming a file or a connection open for
21 |     writing. '""' indicates output to the console.}
22 | 
23 |   \item{...}{arguments to \code{\link{write.csv}}.}
24 | 
25 | }
26 | \details{
27 | 
28 |   This function is useful in reporting problems or bugs, to ensure there
29 |   is a clear match of R package versions between the system exhibiting
30 |   the issue and the test system replicating the issue.
31 | 
32 |   By default the information is written to the console in a comma
33 |   separated form, that is ideally designed to be written to a CSV file
34 |   for emailing.
35 |   
36 | }
37 | \seealso{\code{\link{write.csv}}}
38 | \author{\email{Graham.Williams@togaware.com}}
39 | 


--------------------------------------------------------------------------------
/man/modalvalue.Rd:
--------------------------------------------------------------------------------
 1 | \name{modalvalue}
 2 | \alias{modalvalue}
 3 | \title{
 4 |   Calculate the mode of a vector, array or list.
 5 | }
 6 | \description{
 7 |   
 8 |    The mode is the most common or modal value of a list.
 9 |   
10 | }
11 | \usage{
12 | modalvalue(x, na.rm=FALSE)
13 | }
14 | \arguments{
15 |   
16 |   \item{x}{A vector, array or list.}
17 | 
18 |   \item{na.rm}{Whether to remove missing values.}
19 | }
20 | \details{
21 | 
22 |   This function calculates the mode of a vector, array or list (lists
23 |    are flattened). This code originated from an anonymous post on the R
24 |    Wiki.
25 | 
26 | }
27 | \keyword{hplot}
28 | 


--------------------------------------------------------------------------------
/man/plotOptimalLine.Rd:
--------------------------------------------------------------------------------
 1 | \name{plotOptimalLine}
 2 | \alias{plotOptimalLine}
 3 | \title{
 4 |   Plot three lines on a risk chart, one vertical and two horizontal
 5 | }
 6 | \description{
 7 |   
 8 |   Plots a a vertical line at x up to max of y1 and y2, then horizontal
 9 |   from this line at y1 and y2. Intended for plotting on a plotRisk.
10 |   
11 | }
12 | \usage{
13 | plotOptimalLine(x, y1, y2, pr = NULL, colour = "plum", label = NULL)
14 | }
15 | \arguments{
16 |   
17 |   \item{x}{location of vertical line.}
18 | 
19 |   \item{y1}{location of one horizontal line.}
20 | 
21 |   \item{y2}{location of other horizontal line.}
22 | 
23 |   \item{pr}{Aprint a percentage at this point.}
24 | 
25 |   \item{colour}{of the line.}
26 | 
27 |   \item{label}{at bottom of line.}
28 | 
29 | }
30 | \details{
31 | 
32 |   Intended to plot an optimal line on a Risk Chart as plotted by
33 |   plotRisk.
34 | 
35 | }
36 | \references{Package home page: \url{https://rattle.togaware.com}}
37 | \author{\email{Graham.Williams@togaware.com}}
38 | \seealso{\code{\link{plotRisk}}.}
39 | \examples{
40 | ## this is usually used in the context of the plotRisk function
41 | \dontrun{ev <- evaluateRisk(predicted, actual, risk)}
42 | 
43 | ## imitate this output here
44 | ev <- NULL
45 | ev$Caseload  <- c(1.0, 0.8, 0.6, 0.4, 0.2, 0)
46 | ev$Precision <- c(0.15, 0.18, 0.21, 0.25, 0.28, 0.30)
47 | ev$Recall    <- c(1.0, 0.95, 0.80, 0.75, 0.5, 0.0)
48 | ev$Risk      <- c(1.0, 0.98, 0.90, 0.77, 0.30, 0.0)
49 | 
50 | ## plot the Risk Chart
51 | plotRisk(ev$Caseload, ev$Precision, ev$Recall, ev$Risk,
52 |          chosen=60, chosen.label="Pr=0.45")
53 | 
54 | ## plot the optimal point
55 | plotOptimalLine(40, 77, 75, colour="maroon")
56 | 
57 | }
58 | \keyword{hplot}
59 | 


--------------------------------------------------------------------------------
/man/plotRisk.Rd:
--------------------------------------------------------------------------------
  1 | \name{plotRisk}
  2 | \alias{plotRisk}
  3 | \title{
  4 |   Plot a risk chart
  5 | }
  6 | \description{
  7 |   
  8 |   Plots a Rattle Risk Chart. Such a chart has been developed in a
  9 |   practical context to present the performance of data mining models to
 10 |   clients, plotting a caseload against performance, allowing a client to
 11 |   see the tradeoff between coverage and performance.
 12 |   
 13 | }
 14 | \usage{
 15 | plotRisk(cl, pr, re, ri = NULL, title = NULL,
 16 |     show.legend = TRUE, xleg = 60, yleg = 55,
 17 |     optimal = NULL, optimal.label = "", chosen = NULL, chosen.label = "",
 18 |     include.baseline = TRUE, dev = "", filename = "", show.knots = NULL,
 19 |     show.lift=TRUE, show.precision=TRUE,
 20 |     risk.name = "Risk", recall.name = "Recall",
 21 |     precision.name = "Precision")
 22 | }
 23 | \arguments{
 24 |   
 25 |   \item{cl}{a vector of caseloads corresponding to different probability
 26 |     cutoffs. Can be either percentages (between 0 and 100) or fractions
 27 |     (between 0 and 1).}
 28 | 
 29 |   \item{pr}{a vector of precision values for each probability
 30 |     cutoff. Can be either percentages (between 0 and 100) or fractions
 31 |     (between 0 and 1).}
 32 | 
 33 |   \item{re}{a vector of recall values for each probability cutoff. Can
 34 |     be either percentages (between 0 and 100) or fractions (between 0
 35 |     and 1).}
 36 | 
 37 |   \item{ri}{a vector of risk values for each probability cutoff. Can be
 38 |     either percentages (between 0 and 100) or fractions (between 0 and
 39 |     1).}
 40 | 
 41 |   \item{title}{the main title to place at the top of the plot.}
 42 | 
 43 |   \item{show.legend}{whether to display the legend in the plot.}
 44 |   
 45 |   \item{xleg}{the x coordinate for the placement of the legend.}
 46 | 
 47 |   \item{yleg}{the y coordinate for the placement of the legend.}
 48 | 
 49 |   \item{optimal}{a caseload (percentage or fraction) that represents an
 50 |   optimal performance point which is also plotted. If instead the value
 51 |   is \code{TRUE} then the optimal point is identified internally
 52 |   (maximum valud for \code{(recall-casload)+(risk-caseload)}) and
 53 |   plotted.}
 54 | 
 55 |   \item{optimal.label}{a string which is added to label the line drawn
 56 |   as the optimal point.}
 57 | 
 58 |   \item{chosen}{a caseload (percentage or fraction) that represents a
 59 |   user chosen optimal performance point which is also plotted.}
 60 | 
 61 |   \item{chosen.label}{a string which is added to label the line drawn as
 62 |   the chosen point.}
 63 | 
 64 |   \item{include.baseline}{if TRUE (the default) then display the
 65 |   diagonal baseline.}
 66 | 
 67 |   \item{dev}{a string which, if supplied, identifies a device type as
 68 |   the target for the plot. This might be one of \code{wmf} (for
 69 |   generating a Windows Metafile, but only available on MS/Windows),
 70 |   \code{pdf}, or \code{png}.}
 71 | 
 72 |   \item{filename}{a string naming a file. If \code{dev} is not given
 73 |   then the filename extension is used to identify the image format as
 74 |   one of those recognised by the \code{dev} argument.}
 75 | 
 76 |   \item{show.knots}{a vector of caseload values at which a vertical line
 77 |   should be drawn. These might correspond, for example, to individual
 78 |   paths through a decision tree, illustrating the impact of each path on
 79 |   the caseload and performance.}
 80 | 
 81 | \item{show.lift}{whether to label the right axis with lift.}
 82 | 
 83 | \item{show.precision}{whether to show the precision plot.}
 84 | 
 85 |   \item{risk.name}{a string used within the plot's legend that gives a
 86 |   name to the risk. Often the risk is a dollar amount at risk from a
 87 |   fraud or from a bank loan point of view, so the default is
 88 |   \code{Revenue}.}
 89 | 
 90 |   \item{recall.name}{a string used within the plot's legend that gives a
 91 |   name to the recall. The recall is often the percentage of cases that
 92 |   are positive hits, and in practise these might correspond to known
 93 |   cases of fraud or reviews where some adjustment to perhaps a incom tax
 94 |   return or application for credit had to be made on reviewing the case,
 95 |   and so the default is \code{Adjustments}.}
 96 | 
 97 |   \item{precision.name}{a string used within the plot's legend that gives a
 98 |   name to the precision. A common name for precision is \code{Strike
 99 |   Rate}, which is the default here.}
100 |   
101 | }
102 | \details{
103 | 
104 |   Caseload is the percentage of the entities in the dataset covered by
105 |   the model at a particular probability cutoff, so that with a cutoff of
106 |   0, all (100\%) of the entities are covered by the model. With a cutoff
107 |   of 1 (0\%) no entities are covered by the model. A diagonal line is
108 |   drawn to represent a baseline random performance. Then the percentage
109 |   of positive cases (the recall) covered for a particular caseload is
110 |   plotted, and optionally a measure of the percentage of the total risk
111 |   that is also covered for a particular caseload may be plotted. Such a
112 |   chart allows a user to select an appropriate tradeoff between caseload
113 |   and performance. The charts are similar to ROC curves. The precision
114 |   (i.e., strike rate) is also plotted.
115 | 
116 | }
117 | \references{Package home page: \url{https://rattle.togaware.com}}
118 | \author{\email{Graham.Williams@togaware.com}}
119 | \seealso{\code{\link{evaluateRisk}}, \code{\link{genPlotTitleCmd}}.}
120 | \examples{
121 | ## this is usually used in the context of the evaluateRisk function
122 | \dontrun{ev <- evaluateRisk(predicted, actual, risk)}
123 | 
124 | ## imitate this output here
125 | ev <- NULL
126 | ev$Caseload  <- c(1.0, 0.8, 0.6, 0.4, 0.2, 0)
127 | ev$Precision <- c(0.15, 0.18, 0.21, 0.25, 0.28, 0.30)
128 | ev$Recall    <- c(1.0, 0.95, 0.80, 0.75, 0.5, 0.0)
129 | ev$Risk      <- c(1.0, 0.98, 0.90, 0.77, 0.30, 0.0)
130 | 
131 | ## plot the Risk Chart
132 | plotRisk(ev$Caseload, ev$Precision, ev$Recall, ev$Risk,
133 |          chosen=60, chosen.label="Pr=0.45")
134 | 
135 | ## Add a title
136 | eval(parse(text=genPlotTitleCmd("Sample Risk Chart")))
137 | }
138 | \keyword{hplot}
139 | 


--------------------------------------------------------------------------------
/man/printRandomForests.Rd:
--------------------------------------------------------------------------------
 1 | \name{printRandomForests}
 2 | \alias{printRandomForests}
 3 | \title{
 4 |   Print a representation of the Random Forest models to the console
 5 | }
 6 | \description{
 7 |   
 8 |   A randomForest model, by default, consists of 500 decision trees. This
 9 |   function walks through each tree and generates a set of rules which
10 |   are printed to the console. This takes a considerable amount of time
11 |   and is provided for users to access the actual model, but it is not
12 |   yet used within the Rattle GUI. It may be used to display the output
13 |   of the RF (but it takes longer to generate than the model itself!). Or
14 |   it might only be used on export to PMML or SQL.
15 | 
16 | }
17 | \usage{
18 | printRandomForests(model, models=NULL, include.class=NULL, format="")
19 | }
20 | \arguments{
21 |   
22 |   \item{model}{a randomForest model.}
23 | 
24 |   \item{models}{a list of integers limiting the models in MODEL that are
25 |   displayed.}
26 |   
27 |   \item{include.class}{limit the output to the specific class.}
28 | 
29 |   \item{format}{possible values are "VB".}
30 | 
31 | }
32 | \references{Package home page: \url{https://rattle.togaware.com}}
33 | \author{\email{Graham.Williams@togaware.com}}
34 | \examples{
35 | ## Display a ruleset for a specific model amongst the 500.
36 | \dontrun{printRandomForests(rfmodel, 5)}
37 | 
38 | ## Display a ruleset for specific models amongst the 500.
39 | \dontrun{printRandomForests(rfmodel, c(5,10,15))}
40 | 
41 | ## Display a ruleset for each of the 500 models.
42 | \dontrun{printRandomForests(rfmodel)}
43 | }
44 | \keyword{hplot}
45 | 


--------------------------------------------------------------------------------
/man/randomForest2Rules.Rd:
--------------------------------------------------------------------------------
 1 | \name{randomForest2Rules}
 2 | \alias{randomForest2Rules}
 3 | \title{
 4 |   Generate accessible data structure of a randomForest model
 5 | }
 6 | \description{
 7 |   
 8 |   A randomForest model, by default, consists of 500 decision trees. This
 9 |   function walks through each tree and generates a set of rules. This
10 |   takes a considerable amount of time and is provided for users to
11 |   access the actual model, but it is not yet used within the Rattle
12 |   GUI. It may be used to display the output of the RF (but it takes
13 |   longer to generate than the model itself!). Or it might only be used
14 |   on export to PMML or SQL.
15 | 
16 | }
17 | \usage{
18 | randomForest2Rules(model, models=NULL)
19 | }
20 | \arguments{
21 |   
22 |   \item{model}{a randomForest model.}
23 | 
24 |   \item{models}{a list of integers limiting the models in MODEL that are
25 |   converted.}
26 |   
27 | }
28 | \references{Package home page: \url{https://rattle.togaware.com}}
29 | \author{\email{Graham.Williams@togaware.com}}
30 | \examples{
31 | ## Generate a ruleset for a specific model amongst the 500.
32 | \dontrun{randomForest2Rules(rfmodel, 5)}
33 | 
34 | ## Generate a ruleset for specific models amongst the 500.
35 | \dontrun{randomForest2Rules(rfmodel, c(5,10,15))}
36 | 
37 | ## Generate a ruleset for each of the 500 models.
38 | \dontrun{randomForest2Rules(rfmodel)}
39 | }
40 | \keyword{hplot}
41 | 


--------------------------------------------------------------------------------
/man/rattle.Rd:
--------------------------------------------------------------------------------
 1 | \name{rattle}
 2 | \alias{rattle}
 3 | \alias{crs}
 4 | \alias{crv}
 5 | \title{Display the Rattle User Interface}
 6 | \description{
 7 | 
 8 |   The Rattle user interface uses the RGtk2 package to present an
 9 |   intuitive point and click interface for data mining, extensively
10 |   building on the excellent collection of R packages by very many
11 |   authors for data manipulation, exploration, analysis, and evaluation.
12 | 
13 | }
14 | \usage{
15 | rattle(csvname=NULL, dataset=NULL, useGtkBuilder=TRUE)
16 | }
17 | \arguments{
18 |   
19 |   \item{csvname}{the optional name of a CSV file to load into Rattle on
20 |     startup.}
21 | 
22 |   \item{dataset}{The optional name as a character string of a dataset to
23 |     load into Rattle on startup.}
24 | 
25 |   \item{useGtkBuilder}{if not supplied then automatically determine whether to
26 |     use the new GtkBuilder rather than the deprecated libglade. A user
27 |     can override the heuristic choice with TRUE or FALSE.}
28 | }
29 | \details{
30 | 
31 |   Refer to the Rattle home page in the URL below for a growing reference
32 |   manual for using Rattle.
33 | 
34 |   Whilst the underlying functionality of Rattle is built upon a vast
35 |   collection of other R packages, Rattle itself provides a collection of
36 |   utility functions used within Rattle. These are made available through
37 |   loading the rattle package into your R library. The See Also section
38 |   lists these utility functions that may be useful outside of Rattle.
39 |   
40 |   Rattle can initialise some options using a .Rattle file if the folder
41 |   in which Rattle is started. The currently supported options are
42 |   .RATTLE.DATA, .RATTLE.SCORE.IN, and .RATTLE.SCORE.OUT.
43 |   
44 |   If the environment variable RATTLE\_DATA is defined then that is set
45 |   as the default CSV file name to load. Otherwise, if .RATTLE.DATA is
46 |   defined then that will be used as the CSV file to load. Otherwise, if
47 |   csvname is provided then that will be used.
48 | 
49 |   Two environments are exported by Rattle, capturing the current rattle
50 |   state (crs) and the current rattle variables (crv).
51 | 
52 | }
53 | 
54 | \references{Package home page: \url{https://rattle.togaware.com}}
55 | 
56 | \author{\email{Graham.Williams@togaware.com}}
57 | 
58 | \seealso{
59 | 
60 |   \code{\link{evaluateRisk}}, \code{\link{genPlotTitleCmd}},
61 |   \code{\link{plotRisk}}.
62 | 
63 | }
64 | \examples{
65 | # You can start rattle with a path to a csv file to pre-specify the
66 | # dataset. You then need to click Execute to load the data.
67 | 
68 | \dontrun{rattle(system.file("csv", "weather.csv", package = "rattle"))}
69 | 
70 | }
71 | \keyword{environment}
72 | 


--------------------------------------------------------------------------------
/man/rattle.print.summary.multinom.Rd:
--------------------------------------------------------------------------------
 1 | \name{rattle.print.summary.multinom}
 2 | \alias{rattle.print.summary.multinom}
 3 | \title{
 4 |   Print information about a multinomial model
 5 | }
 6 | \description{
 7 |   
 8 |   Displays a textual reveiw of the performance of a multinom model.
 9 |   
10 | }
11 | \usage{
12 | rattle.print.summary.multinom(x, digits = x$digits, ...)
13 | }
14 | \arguments{
15 |   
16 |   \item{x}{An rpart object.}
17 | 
18 |   \item{digits}{Number of digist to print for numbers.}
19 | 
20 |   \item{...}{Other arguments.}
21 |   
22 | }
23 | \details{
24 | 
25 |   Print a summary of a multinom model. This is sipmly a modification of
26 |   the print.summary.multinom function to add the number of entities!
27 | 
28 | }
29 | \references{Package home page: \url{https://rattle.togaware.com}}
30 | \author{\email{Graham.Williams@togaware.com}}
31 | 
32 | 


--------------------------------------------------------------------------------
/man/rattleInfo.Rd:
--------------------------------------------------------------------------------
 1 | \name{rattleInfo}
 2 | \alias{rattleInfo}
 3 | \title{
 4 | 
 5 |   Extract Rattle and related package information.
 6 | 
 7 | }
 8 | \description{
 9 | 
10 |   Display system information, including versions of Rattle and R,
11 |   operating system, and versions of other packages used by
12 |   Rattle. Useful for reporting bugs but also invisibly returns a list of
13 |   packages that have updates available and can be passed to
14 |   install.packages().
15 | 
16 | }
17 | \usage{
18 | rattleInfo(all.dependencies=FALSE,
19 |            include.not.installed=FALSE,
20 |            include.not.available=FALSE,
21 |            include.libpath=FALSE)
22 | }
23 | \arguments{
24 |   
25 |   \item{all.dependencies}{If TRUE then check the full dependency graph
26 |     for Rattle and list all of those packages (which may take quite a
27 |     few seconds to compute), or else just list those key packages that
28 |     Rattle Depends on and Suggests.}
29 | 
30 |   \item{include.not.installed}{If TRUE then make mention of any packages
31 |     that are not installed, but are available.}
32 | 
33 |   \item{include.not.available}{If TRUE then make mention of any packages
34 |     that are not available from CRAN.}
35 | 
36 |   \item{include.libpath}{If TRUE then list the library location where
37 |     each package is installed.}
38 | 
39 | }
40 | \details{
41 | 
42 |   This is a support function to list useful information to provide the
43 |   developers with information about the system environment when running
44 |   Rattle. It is intended to provide the information that is useful in
45 |   reporting bugs.
46 | 
47 |   It also lists the currently installed version of a number of packages
48 |   that Rattle makes use of as well as checking for any updates available
49 |   for those packages.
50 | 
51 |   If updates are found then a command is generated and printed so that a
52 |   user can simply copy and paste the command to update the relevant
53 |   packages. The function also invisibly returns the list of packages
54 |   that can be updated, so that we can do something like:
55 |   install.packages(rattleInfo()).
56 |   
57 | }
58 | 
59 | \references{Package home page: \url{https://rattle.togaware.com}}
60 | 
61 | \author{\email{Graham.Williams@togaware.com}}
62 | 
63 | \seealso{
64 | 
65 |   \code{\link{rattle}}.
66 | 
67 | }
68 | 
69 | \keyword{environment}
70 | 


--------------------------------------------------------------------------------
/man/riskchart.Rd:
--------------------------------------------------------------------------------
  1 | \name{riskchart}
  2 | \alias{riskchart}
  3 | \title{
  4 |   Plot a risk chart
  5 | }
  6 | \description{
  7 |   
  8 |   Plots a Rattle Risk Chart for binary classification models using
  9 |   ggplot2. Such a chart has been developed in a practical context to
 10 |   present the performance of data mining models to clients, plotting a
 11 |   caseload against performance, allowing a client to see the tradeoff
 12 |   between coverage and performance.
 13 |   
 14 | }
 15 | \usage{
 16 | riskchart(pr,
 17 |           ac,
 18 |           ri               = NULL,
 19 |           title            = "Risk Chart",
 20 |           title.size       = 10,
 21 |           subtitle         = NULL,
 22 |           caption          = TRUE,
 23 |           show.legend      = TRUE,
 24 |           optimal          = NULL,
 25 |           optimal.label    = "",
 26 |           chosen           = NULL,
 27 |           chosen.label     = "",
 28 |           include.baseline = TRUE,
 29 |           dev              = "",
 30 |           filename         = "",
 31 |           show.knots       = NULL,
 32 |           show.lift        = TRUE,
 33 |           show.precision   = TRUE,
 34 |           show.maximal     = TRUE,
 35 |           risk.name        = "Risk",
 36 |           recall.name      = "Recall",
 37 |           precision.name   = "Precision",
 38 |           thresholds       = NULL,
 39 |           legend.horiz     = TRUE)
 40 | }
 41 | \arguments{
 42 |   
 43 |   \item{pr}{The predicted class for each observation.}
 44 | 
 45 |   \item{ac}{The actual class for each observation.}
 46 | 
 47 |   \item{ri}{The risk class for each observation.}
 48 | 
 49 |   \item{title}{the main title to place at the top of the plot.}
 50 |   
 51 |   \item{title.size}{font size for the main title.}
 52 | 
 53 |   \item{subtitle}{subtitle under the main title.}
 54 | 
 55 |   \item{caption}{caption for the bottom right of plot.}
 56 |   
 57 |   \item{show.legend}{whether to display the legend in the plot.}
 58 |   
 59 |   \item{optimal}{a caseload (percentage or fraction) that represents an
 60 |   optimal performance point which is also plotted. If instead the value
 61 |   is \code{TRUE} then the optimal point is identified internally
 62 |   (maximum valud for \code{(recall-casload)+(risk-caseload)}) and
 63 |   plotted.}
 64 | 
 65 | \item{optimal.label}{a string which is added to label the line drawn
 66 |   as the optimal point.}
 67 | 
 68 | \item{chosen}{a caseload (percentage or fraction) that represents a
 69 |   user chosen optimal performance point which is also plotted.}
 70 | 
 71 | \item{chosen.label}{a string which is added to label the line drawn as
 72 |   the chosen point.}
 73 | 
 74 | \item{include.baseline}{if TRUE (the default) then display the
 75 |   diagonal baseline.}
 76 | 
 77 | \item{dev}{a string which, if supplied, identifies a device type as
 78 |   the target for the plot. This might be one of \code{wmf} (for
 79 |   generating a Windows Metafile, but only available on MS/Windows),
 80 |   \code{pdf}, or \code{png}.}
 81 | 
 82 | \item{filename}{a string naming a file. If \code{dev} is not given
 83 |   then the filename extension is used to identify the image format as
 84 |   one of those recognised by the \code{dev} argument.}
 85 | 
 86 | \item{show.knots}{a vector of caseload values at which a vertical line
 87 |   should be drawn. These might correspond, for example, to individual
 88 |   paths through a decision tree, illustrating the impact of each path on
 89 |   the caseload and performance.}
 90 | 
 91 | \item{show.lift}{whether to label the right axis with lift.}
 92 | 
 93 | \item{show.precision}{whether to show the precision plot.}
 94 | 
 95 | \item{show.maximal}{whether to show the maximal performance line.}
 96 | 
 97 | \item{risk.name}{a string used within the plot's legend that gives a
 98 |   name to the risk. Often the risk is a dollar amount at risk from a
 99 |   fraud or from a bank loan point of view, so the default is
100 |   \code{Revenue}.}
101 | 
102 | \item{recall.name}{a string used within the plot's legend that gives a
103 |   name to the recall. The recall is often the percentage of cases that
104 |   are positive hits, and in practise these might correspond to known
105 |   cases of fraud or reviews where some adjustment to perhaps a incom tax
106 |   return or application for credit had to be made on reviewing the case,
107 |   and so the default is \code{Adjustments}.}
108 | 
109 | \item{precision.name}{a string used within the plot's legend that gives
110 |   a name to the precision. A common name for precision is \code{Strike
111 |   Rate}, which is the default here.}
112 |   
113 | \item{thresholds}{whether to display scores along the top axis.}
114 | 
115 | \item{legend.horiz}{whether to display a horizontal legend.}
116 | }
117 | \details{
118 | 
119 |   Caseload is the percentage of the entities in the dataset covered by
120 |   the model at a particular probability cutoff, so that with a cutoff of
121 |   0, all (100\%) of the entities are covered by the model. With a cutoff
122 |   of 1 (0\%) no entities are covered by the model. A diagonal line is
123 |   drawn to represent a baseline random performance. Then the percentage
124 |   of positive cases (the recall) covered for a particular caseload is
125 |   plotted, and optionally a measure of the percentage of the total risk
126 |   that is also covered for a particular caseload may be plotted. Such a
127 |   chart allows a user to select an appropriate tradeoff between caseload
128 |   and performance. The charts are similar to ROC curves. The precision
129 |   (i.e., strike rate) is also plotted.
130 | 
131 | }
132 | \references{Package home page: \url{https://rattle.togaware.com}}
133 | \author{\email{Graham.Williams@togaware.com}}
134 | \seealso{\code{\link{evaluateRisk}}, \code{\link{genPlotTitleCmd}}.}
135 | \examples{
136 | \dontrun{
137 | 
138 | ## Use rpart to build a decision tree.
139 | 
140 | library(rpart)
141 | 
142 | ## Set up the data for modelling.
143 | 
144 | set.seed(42)
145 | ds     <- weather
146 | target <- "RainTomorrow"
147 | risk   <- "RISK_MM"
148 | ignore <- c("Date", "Location", risk)
149 | vars   <- setdiff(names(ds), ignore)
150 | nobs   <- nrow(ds)
151 | form   <- formula(paste(target, "~ ."))
152 | train  <- sample(nobs, 0.7*nobs)
153 | test   <- setdiff(seq_len(nobs), train)
154 | actual <- ds[test, target]
155 | risks  <- ds[test, risk]
156 | 
157 | # Build the model.
158 | 
159 | model <- rpart(form, data=ds[train, vars])
160 | 
161 | ## Obtain predictions.
162 | 
163 | predicted <- predict(model, ds[test, vars], type="prob")[,2]
164 | 
165 | ## Plot the Risk Chart.
166 | 
167 | riskchart(predicted, actual, risks)
168 | }
169 | }
170 | \keyword{hplot}
171 | 


--------------------------------------------------------------------------------
/man/savePlotToFile.Rd:
--------------------------------------------------------------------------------
 1 | \name{savePlotToFile}
 2 | \alias{savePlotToFile}
 3 | \alias{copyPlotToClipboard}
 4 | \alias{printPlot}
 5 | \title{
 6 |   Save a plot in some way
 7 | }
 8 | \description{
 9 |   
10 |   For the current device, or for the device identified, save the plot
11 |   displayed there in some way. This is either saved to file, copied to
12 |   the clipboard for pasting into other applications, or sent to the
13 |   printer for saving a hard copy.
14 |   
15 | }
16 | \usage{
17 | savePlotToFile(file.name, dev.num=dev.cur())
18 | copyPlotToClipboard(dev.num=dev.cur())
19 | printPlot(dev.num=dev.cur()) 
20 | }
21 | \arguments{
22 |   
23 |   \item{file.name}{Character string naming the file including the file
24 |     name extension which is used to specify the type of file to save.}
25 | 
26 |   \item{dev.num}{A device number indicating which device to save.}
27 | 
28 | }
29 | \references{Package home page: \url{https://rattle.togaware.com}}
30 | \author{\email{Graham.Williams@togaware.com}}
31 | \keyword{hplot}
32 | 


--------------------------------------------------------------------------------
/man/setupDataset.Rd:
--------------------------------------------------------------------------------
 1 | \name{setupDataset}
 2 | \alias{setupDataset}
 3 | \title{
 4 |   Given specific contents of env add other dataset related variables.
 5 | }
 6 | \description{
 7 |   
 8 |   This rattle support function is used for encapsulating data mining
 9 |   objects. The supplied environment is augmented with other data derived
10 |   from the supplied data, such as a sample trianing dataset, list of
11 |   numeric variables, and a formula for modelling.
12 |   
13 | }
14 | \usage{
15 | setupDataset(env, seed=NULL)
16 | 
17 | }
18 | \arguments{
19 |   
20 |   \item{env}{the environment to modify.}
21 | 
22 |   \item{seed}{optionally set the seed for repeatability.}
23 |   
24 | }
25 | \details{
26 |   
27 |   The supplied object (an environment) is assumed to also contain the
28 |   variables data (a data frame), target (a character string naming the
29 |   target variable), risk (a character string naming the risk variable),
30 |   and inputs (a character vector naming all the input variables). This
31 |   function then adds in the variables vars (the variables used for
32 |   modelling), numerics (the numeric vars within inputs), nobs (the
33 |   number of observations), form (the formula for building models), train
34 |   (a 70\% training dataset).
35 | 
36 | }
37 | \references{Package home page: \url{https://rattle.togaware.com}}
38 | \author{\email{Graham.Williams@togaware.com}}
39 | 


--------------------------------------------------------------------------------
/man/treeset.randomForest.Rd:
--------------------------------------------------------------------------------
 1 | \name{treeset.randomForest}
 2 | \alias{treeset.randomForest}
 3 | \title{
 4 |   Generate a representation of a tree in a Random Forest
 5 | }
 6 | \description{
 7 |   Often we want to view the actual trees built by a random
 8 |   forest. Although reviewing all 500 trees might be a bit much, this
 9 |   function allows us to at least list them.
10 | }
11 | \usage{
12 | treeset.randomForest(model, n=1, root=1, format="R")
13 | }
14 | \arguments{
15 |   
16 |   \item{model}{a randomForest model.}
17 | 
18 |   \item{n}{a specific tree to list.}
19 |   
20 |   \item{root}{where to start the stree from, primarily for internal use.}
21 |   
22 |   \item{format}{one of "R", "VB".}
23 |   
24 | }
25 | \references{Package home page: \url{https://rattle.togaware.com}}
26 | \author{\email{Graham.Williams@togaware.com}}
27 | \examples{
28 | ## Display a treeset for a specific model amongst the 500.
29 | \dontrun{treeset.randomForests(rfmodel, 5)}
30 | }
31 | \keyword{hplot}
32 | 


--------------------------------------------------------------------------------
/man/weather.Rd:
--------------------------------------------------------------------------------
  1 | \name{weather}
  2 | \docType{data}
  3 | \alias{weather}
  4 | \title{Sample dataset of daily weather observations from Canberra
  5 |   airport in Australia.}
  6 | \description{
  7 | 
  8 |   One year of daily weather observations collected from the Canberra
  9 |   airport in Australia was obtained from the Australian Commonwealth
 10 |   Bureau of Meteorology and processed to create this sample dataset for
 11 |   illustrating data mining using R and Rattle.
 12 |   
 13 |   The data has been processed to provide a target variable
 14 |   \code{RainTomorrow} (whether there is rain on the following day -
 15 |   No/Yes) and a risk variable \code{RISK_MM} (how much rain recorded in
 16 |   millimetres). Various transformations were performed on the source
 17 |   data. The dataset is quite small and is useful only for repeatable
 18 |   demonstration of various data science operations. 
 19 | 
 20 |   The source dataset is Copyright by the Australian Commonwealth Bureau
 21 |   of Meteorology and is provided as part of the rattle package with
 22 |   permission.
 23 | 
 24 | }
 25 | \usage{weather}
 26 | \format{
 27 | 
 28 |   The \code{weather} dataset is a data frame containing one year of
 29 |   daily observations from a single weather station (Canberra).
 30 | 
 31 |   \describe{
 32 | 
 33 |     \item{\code{Date}}{The date of observation (a Date object).}
 34 | 
 35 |     \item{\code{Location}}{The common name of the location of the
 36 |     weather station.}
 37 | 
 38 |     \item{\code{MinTemp}}{The minimum temperature in degrees celsius. }
 39 | 
 40 |     \item{\code{MaxTemp}}{The maximum temperature in degrees celsius. }
 41 | 
 42 |     \item{\code{Rainfall}}{The amount of rainfall recorded for the day in mm. }
 43 |     
 44 |     \item{\code{Evaporation}}{The so-called Class A pan evaporation (mm)
 45 |     in the 24 hours to 9am.}
 46 | 
 47 |     \item{\code{Sunshine}}{The number of hours of bright sunshine in the day.}
 48 |     
 49 |     \item{\code{WindGustDir}}{The direction of the strongest wind gust
 50 |     in the 24 hours to midnight.}
 51 | 
 52 |     \item{\code{WindGustSpeed}}{The speed (km/h) of the strongest wind
 53 |     gust in the 24 hours to midnight.}
 54 | 
 55 |     \item{\code{Temp9am}}{ Temperature (degrees C) at 9am. }
 56 | 
 57 |     \item{\code{RelHumid9am}}{ Relative humidity (percent) at 9am. }
 58 | 
 59 |     \item{\code{Cloud9am}}{ Fraction of sky obscured by cloud at
 60 |       9am. This is measured in "oktas", which are a unit of eigths. It
 61 |       records how many eigths of the sky are obscured by cloud. A 0
 62 |       measure indicates completely clear sky whilst an 8 indicates that
 63 |       it is completely overcast.  }
 64 | 
 65 |     \item{\code{WindSpeed9am}}{
 66 |       Wind speed (km/hr) averaged over 10 minutes prior to 9am.
 67 |     }
 68 | 
 69 |     \item{\code{Pressure9am}}{
 70 |       Atmospheric pressure (hpa) reduced to mean sea level at 9am.
 71 |     }
 72 | 
 73 |     \item{\code{Temp3pm}}{ Temperature (degrees C) at 3pm. }
 74 | 
 75 |     \item{\code{RelHumid3pm}}{ Relative humidity (percent) at 3pm. }
 76 | 
 77 |     \item{\code{Cloud3pm}}{
 78 | 
 79 |       Fraction of sky obscured by cloud (in "oktas": eighths) at
 80 |       3pm. See Cload9am for a description of the values.
 81 | 
 82 |     }
 83 | 
 84 |     \item{\code{WindSpeed3pm}}{
 85 |       Wind speed (km/hr) averaged over 10 minutes prior to 3pm.
 86 |     }
 87 | 
 88 |     \item{\code{Pressure3pm}}{
 89 |       Atmospheric pressure (hpa) reduced to mean sea level at 3pm.
 90 |     }
 91 | 
 92 |     \item{\code{ChangeTemp}}{
 93 |       Change in temperature.
 94 |     }
 95 | 
 96 |     \item{\code{ChangeTempDir}}{
 97 |       Direction of change in temperature.
 98 |     }
 99 | 
100 |     \item{\code{ChangeTempMag}}{
101 |       Magnitude of change in temperature.
102 |     }
103 | 
104 |     \item{\code{ChangeWindDirect}}{
105 |       Direction of wind change.
106 |     }
107 | 
108 |     \item{\code{MaxWindPeriod}}{
109 |       Period of maximum wind.
110 |     }
111 | 
112 |     \item{\code{RainToday}}{
113 |       Integer: 1 if precipitation (mm) in the 24 hours to 9am exceeds
114 |       1mm, otherwise 0.
115 |     }
116 | 
117 |     \item{\code{TempRange}}{
118 | 
119 |       Difference between minimum and maximum temperatures (degrees C) in
120 |       the 24 hours to 9am.
121 | 
122 |     }
123 | 
124 |     \item{\code{PressureChange}}{
125 |       Change in pressure.
126 |     }
127 |     \item{\code{RISK_MM}}{
128 |       The amount of rain. A kind of measure of the "risk".
129 |     }
130 | 
131 |     \item{\code{RainTomorrow}}{
132 | 
133 |       The target variable. Did it rain tomorrow?
134 | 
135 |     }
136 |   }
137 | }
138 | \source{
139 | 
140 |   The daily observations are available from
141 |   \url{https://www.bom.gov.au/climate/data}.  Copyright Commonwealth of
142 |   Australia 2010, Bureau of Meteorology.
143 | 
144 |   Definitions adapted from
145 |   \url{https://www.bom.gov.au/climate/dwo/IDCJDW0000.shtml}
146 |   
147 | }
148 | 
149 | \references{
150 | 
151 |   Package home page: \url{https://rattle.togaware.com}. Data source:
152 |   \url{https://www.bom.gov.au/climate/dwo/} and
153 |   \url{https://www.bom.gov.au/climate/data}.
154 | 
155 | }
156 | 
157 | \author{\email{Graham.Williams@togaware.com}}
158 | 
159 | \seealso{
160 | 
161 |   \code{\link{weatherAUS}}, \code{\link{audit}}.
162 | 
163 | }
164 | 
165 | \keyword{datasets}
166 | 


--------------------------------------------------------------------------------
/man/weatherAUS.Rd:
--------------------------------------------------------------------------------
  1 | \name{weatherAUS}
  2 | \docType{data}
  3 | \alias{weatherAUS}
  4 | \alias{locationsAUS}
  5 | \title{Daily weather observations from multiple Australian weather stations.}
  6 | \description{
  7 | 
  8 |   Daily weather observations from multiple locations around Australia,
  9 |   obtained from the Australian Commonwealth Bureau of Meteorology and
 10 |   processed to create this realtively large sample dataset for
 11 |   illustrating analytics, data mining, and data science using R and
 12 |   Rattle.
 13 |   
 14 |   The data has been processed to provide a target variable
 15 |   \code{RainTomorrow} (whether there is rain on the following day -
 16 |   No/Yes) and a risk variable \code{RISK_MM} (how much rain recorded in
 17 |   millimeters). Various transformations are performed on the data.
 18 |   
 19 |   The \code{weatherAUS} dataset is regularly updated an updates of this
 20 |   package usually correspond to updates to this dataset. The data is
 21 |   updated from the Bureau of Meteorology web site.
 22 | 
 23 |   The \code{locationsAUS} dataset records the location of each weather
 24 |   station.
 25 |   
 26 |   The source dataset comes from the Australian Commonwealth Bureau of
 27 |   Meteorology. The Bureau provided permission to use the data with the
 28 |   Bureau of Meteorology acknowledged as the source of the data, as per
 29 |   email from Cathy Toby (C.Toby@bom.gov.au) of the Climate Information
 30 |   Services of the National CLimate Centre, 17 Dec 2008.
 31 |   
 32 |   A CSV version of this dataset is available as
 33 |   \url{https://rattle.togaware.com/weatherAUS.csv}.
 34 | 
 35 | }
 36 | \usage{weatherAUS}
 37 | \format{
 38 | 
 39 |   The \code{weatherAUS} dataset is a data frame containing over 140,000
 40 |   daily observations from over 45 Australian weather stations.
 41 | 
 42 |   \describe{
 43 | 
 44 |     \item{\code{Date}}{The date of observation (a Date object).}
 45 | 
 46 |     \item{\code{Location}}{The common name of the location of the
 47 |     weather station.}
 48 | 
 49 |     \item{\code{MinTemp}}{The minimum temperature in degrees celsius. }
 50 | 
 51 |     \item{\code{MaxTemp}}{The maximum temperature in degrees celsius. }
 52 | 
 53 |     \item{\code{Rainfall}}{The amount of rainfall recorded for the day in mm. }
 54 |     
 55 |     \item{\code{Evaporation}}{The so-called Class A pan evaporation (mm)
 56 |     in the 24 hours to 9am.}
 57 | 
 58 |     \item{\code{Sunshine}}{The number of hours of bright sunshine in the day.}
 59 |     
 60 |     \item{\code{WindGustDir}}{The direction of the strongest wind gust
 61 |     in the 24 hours to midnight.}
 62 | 
 63 |     \item{\code{WindGustSpeed}}{The speed (km/h) of the strongest wind
 64 |     gust in the 24 hours to midnight.}
 65 | 
 66 |     \item{\code{Temp9am}}{ Temperature (degrees C) at 9am. }
 67 | 
 68 |     \item{\code{RelHumid9am}}{ Relative humidity (percent) at 9am. }
 69 | 
 70 |     \item{\code{Cloud9am}}{ Fraction of sky obscured by cloud at
 71 |       9am. This is measured in "oktas", which are a unit of eigths. It
 72 |       records how many eigths of the sky are obscured by cloud. A 0
 73 |       measure indicates completely clear sky whilst an 8 indicates that
 74 |       it is completely overcast.  }
 75 | 
 76 |     \item{\code{WindSpeed9am}}{
 77 |       Wind speed (km/hr) averaged over 10 minutes prior to 9am.
 78 |     }
 79 | 
 80 |     \item{\code{Pressure9am}}{
 81 |       Atmospheric pressure (hpa) reduced to mean sea level at 9am.
 82 |     }
 83 | 
 84 |     \item{\code{Temp3pm}}{ Temperature (degrees C) at 3pm. }
 85 | 
 86 |     \item{\code{RelHumid3pm}}{ Relative humidity (percent) at 3pm. }
 87 | 
 88 |     \item{\code{Cloud3pm}}{
 89 | 
 90 |       Fraction of sky obscured by cloud (in "oktas": eighths) at
 91 |       3pm. See Cload9am for a description of the values.
 92 | 
 93 |     }
 94 | 
 95 |     \item{\code{WindSpeed3pm}}{
 96 |       Wind speed (km/hr) averaged over 10 minutes prior to 3pm.
 97 |     }
 98 | 
 99 |     \item{\code{Pressure3pm}}{
100 |       Atmospheric pressure (hpa) reduced to mean sea level at 3pm.
101 |     }
102 | 
103 |     \item{\code{ChangeTemp}}{
104 |       Change in temperature.
105 |     }
106 | 
107 |     \item{\code{ChangeTempDir}}{
108 |       Direction of change in temperature.
109 |     }
110 | 
111 |     \item{\code{ChangeTempMag}}{
112 |       Magnitude of change in temperature.
113 |     }
114 | 
115 |     \item{\code{ChangeWindDirect}}{
116 |       Direction of wind change.
117 |     }
118 | 
119 |     \item{\code{MaxWindPeriod}}{
120 |       Period of maximum wind.
121 |     }
122 | 
123 |     \item{\code{RainToday}}{
124 |       Integer: 1 if precipitation (mm) in the 24 hours to 9am exceeds
125 |       1mm, otherwise 0.
126 |     }
127 | 
128 |     \item{\code{TempRange}}{
129 | 
130 |       Difference between minimum and maximum temperatures (degrees C) in
131 |       the 24 hours to 9am.
132 | 
133 |     }
134 | 
135 |     \item{\code{PressureChange}}{
136 |       Change in pressure.
137 |     }
138 |     \item{\code{RISK_MM}}{
139 |       The amount of rain. A kind of measure of the "risk".
140 |     }
141 | 
142 |     \item{\code{RainTomorrow}}{
143 | 
144 |       The target variable. Did it rain tomorrow?
145 | 
146 |     }
147 |   }
148 | }
149 | \source{
150 | 
151 |   Observations were drawn from numerous weather stations.  The daily
152 |   observations are available from
153 |   \url{https://www.bom.gov.au/climate/data}.  Copyright Commonwealth of
154 |   Australia 2010, Bureau of Meteorology.
155 | 
156 |   Definitions adapted from
157 |   \url{https://www.bom.gov.au/climate/dwo/IDCJDW0000.shtml}
158 |   
159 | }
160 | 
161 | \references{
162 | 
163 |   Package home page: \url{https://rattle.togaware.com}. Data source:
164 |   \url{https://www.bom.gov.au/climate/dwo/} and
165 |   \url{https://www.bom.gov.au/climate/data}.
166 | 
167 | }
168 | 
169 | \author{\email{Graham.Williams@togaware.com}}
170 | 
171 | \seealso{
172 | 
173 |   \code{\link{weather}}, \code{\link{audit}}.
174 | 
175 | }
176 | 
177 | \keyword{datasets}
178 | 


--------------------------------------------------------------------------------
/man/whichNumerics.Rd:
--------------------------------------------------------------------------------
 1 | \name{whichNumerics}
 2 | \alias{whichNumerics}
 3 | \title{
 4 |   Returns a list of the names of the numeric variables in a data frame.
 5 | }
 6 | \description{
 7 |   
 8 |   A rattle support function.
 9 |   
10 | }
11 | \usage{
12 | whichNumerics(data)
13 | 
14 | }
15 | \arguments{
16 |   
17 |   \item{data}{a data frame.}
18 | 
19 | }
20 | \references{Package home page: \url{https://rattle.togaware.com}}
21 | \author{\email{Graham.Williams@togaware.com}}
22 | 


--------------------------------------------------------------------------------
/man/wine.Rd:
--------------------------------------------------------------------------------
 1 | \name{wine}
 2 | \docType{data}
 3 | \alias{wine}
 4 | \title{The wine dataset from the UCI Machine Learning Repository.}
 5 | \description{
 6 | 
 7 |   The \code{wine} dataset contains the results of a chemical analysis of
 8 |   wines grown in a specific area of Italy. Three types of wine are
 9 |   represented in the 178 samples, with the results of 13 chemical
10 |   analyses recorded for each sample.  The \code{Type} variable has been
11 |   transformed into a categoric variable.
12 | 
13 |   The data contains no missing values and consits of only numeric data,
14 |   with a three class target variable (\code{Type}) for classification.
15 | 
16 | }
17 | \usage{wine}
18 | \format{
19 | 
20 |   A data frame containing 178 observations of 13 variables.
21 | 
22 |   \describe{
23 | 
24 |     \item{\code{Type}}{ The type of wine, into one of three classes, 1
25 |     (59 obs), 2(71 obs), and 3 (48 obs).}
26 |     
27 |     \item{\code{Alcohol}}{Alcohol}
28 |     
29 |     \item{\code{Malic}}{Malic acid}
30 |     
31 |     \item{\code{Ash}}{Ash}
32 |     
33 |     \item{\code{Alcalinity}}{Alcalinity of ash}
34 |     
35 |     \item{\code{Magnesium}}{Magnesium}
36 |     
37 |     \item{\code{Phenols}}{Total phenols}
38 |     
39 |     \item{\code{Flavanoids}}{Flavanoids}
40 |     
41 |     \item{\code{Nonflavanoids}}{Nonflavanoid phenols}
42 |     
43 |     \item{\code{Proanthocyanins}}{Proanthocyanins}
44 |     
45 |     \item{\code{Color}}{Color intensity.}
46 |     
47 |     \item{\code{Hue}}{Hue}
48 |     
49 |     \item{\code{Dilution}}{D280/OD315 of diluted wines.}
50 |     
51 |     \item{\code{Proline}}{Proline}
52 |     
53 |   }
54 | }
55 | \source{
56 |   
57 |   The data was downloaded from the UCI Machine Learning Repository.
58 | 
59 |   It was read as a CSV file with no header using
60 |   \code{\link{read.csv}}. The columns were then given the appropriate
61 |   names using \code{\link{colnames}} and the Type was transformed into a
62 |   factor using \code{\link{as.factor}}. The compressed R data file was
63 |   saved using \code{\link{save}}:
64 | 
65 |   \preformatted{
66 |   UCI <- "https://archive.ics.uci.edu/ml"
67 |   REPOS <- "machine-learning-databases"
68 |   wine.url <- sprintf("%s/%s/wine/wine.data", UCI, REPOS)
69 |   wine <- read.csv(wine.url, header=FALSE) 
70 |   colnames(wine) <- c('Type', 'Alcohol', 'Malic', 'Ash', 
71 |                       'Alcalinity', 'Magnesium', 'Phenols', 
72 |                       'Flavanoids', 'Nonflavanoids',
73 |                       'Proanthocyanins', 'Color', 'Hue', 
74 |                       'Dilution', 'Proline')
75 |   wine$Type <- as.factor(wine$Type)
76 |   save(wine, file="wine.Rdata", compress=TRUE)
77 |   }
78 | }
79 | 
80 | \references{
81 | 
82 |   Asuncion, A. & Newman, D.J. (2007). \emph{UCI Machine Learning
83 |   Repository}
84 |   [\url{https://www.ics.uci.edu/~mlearn/MLRepository.html}]. Irvine, CA:
85 |   University of California, School of Information and Computer Science.
86 | 
87 | }
88 | 
89 | \keyword{datasets}
90 | 


--------------------------------------------------------------------------------
/vignettes/rattle.Rnw:
--------------------------------------------------------------------------------
  1 | % \VignetteIndexEntry{Rattle Quick Start Guide}
  2 | % \VignetteDepends{rattle}
  3 | % \VignetteKeywords{data mining}
  4 | % \VignettePackage{rattle}
  5 | \documentclass[12pt]{article}
  6 | \usepackage{amsmath}
  7 | \usepackage[pdftex]{graphicx}
  8 | \usepackage{color}
  9 | \usepackage{xspace}
 10 | \usepackage{fancyvrb}
 11 | \usepackage{fancyhdr}
 12 | \usepackage{lastpage}
 13 | \usepackage{algorithm2e}
 14 | \usepackage[
 15 |          colorlinks=true,
 16 |          linkcolor=blue,
 17 |          citecolor=blue,
 18 |          urlcolor=blue]
 19 |          {hyperref}
 20 | \usepackage{Sweave}         
 21 | 
 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 23 | 
 24 | % define new colors for use
 25 | \definecolor{darkgreen}{rgb}{0,0.6,0}
 26 | \definecolor{darkred}{rgb}{0.6,0.0,0}
 27 | \definecolor{lightbrown}{rgb}{1,0.9,0.8}
 28 | \definecolor{brown}{rgb}{0.6,0.3,0.3}
 29 | \definecolor{darkblue}{rgb}{0,0,0.8}
 30 | \definecolor{darkmagenta}{rgb}{0.5,0,0.5}
 31 | 
 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 33 | 
 34 | \newcommand{\Rattle}{\textsf{Rattle}\xspace}
 35 | \newcommand{\pkg}[1]{{\tt #1}\xspace}
 36 | 
 37 | \setlength{\oddsidemargin}{-.25 truein}
 38 | \setlength{\evensidemargin}{0truein}
 39 | \setlength{\topmargin}{-0.2truein}
 40 | \setlength{\textwidth}{7 truein}
 41 | \setlength{\textheight}{8.5 truein}
 42 | \setlength{\parindent}{0.20truein}
 43 | \setlength{\parskip}{0.10truein}
 44 | 
 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 46 | \pagestyle{fancy}
 47 | \lhead{}
 48 | \chead{Rattle}
 49 | \rhead{}
 50 | \lfoot{}
 51 | \cfoot{}
 52 | \rfoot{\thepage\ of \pageref{LastPage}}
 53 | \renewcommand{\headrulewidth}{1pt}
 54 | \renewcommand{\footrulewidth}{1pt}
 55 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 56 | 
 57 | \title{The Rattle Package: Quick Start Guide}
 58 | \author{Graham Williams \\ Graham.Williams@togaware.com}
 59 | 
 60 | \begin{document}
 61 | 
 62 | \maketitle
 63 | 
 64 | \thispagestyle{empty}
 65 | 	
 66 | \section{Introduction}
 67 | 
 68 | \Rattle (Williams, 2011) is a package written in R providing a
 69 | graphical user interface to very many other R packages that provide
 70 | functionality for data mining.
 71 | 
 72 | This quick start guide is under development. See
 73 | \url{https://rattle.togaware.com} for extensive documentation
 74 | 
 75 | \section{Requirements}
 76 | 
 77 | \Rattle depends on over 40 other R packages and a couple of other
 78 | software applications/libraries that are independent of R. The first
 79 | thing to ensure is that you have installed the GTK+ libraries and the
 80 | GGobi application. This is operating system dependent and full
 81 | installation instructions are available from \url{https://rattle.togaware.com/}.
 82 | 
 83 | Only a couple of R packages are dependencies for \Rattle. Most are
 84 | suggestions, but without them functionality is quite limited. At a
 85 | minimum it is useful to ensure you have the
 86 | \href{https://cran.r-project.org/package=RGtk2}{\pkg{RGtk2}} package
 87 | installed. Others that you might like to install include:
 88 | \href{https://cran.r-project.org/package=ada}{\pkg{ada}},
 89 | \href{https://cran.r-project.org/package=arules}{\pkg{arules}},
 90 | \href{https://cran.r-project.org/package=doBy}{\pkg{doBy}},
 91 | \href{https://cran.r-project.org/package=ellipse}{\pkg{ellipse}},
 92 | \href{https://cran.r-project.org/package=fBasics}{\pkg{fBasics}},
 93 | \href{https://cran.r-project.org/package=fpc}{\pkg{fpc}},
 94 | \href{https://cran.r-project.org/package=gplots}{\pkg{gplots}},
 95 | \href{https://cran.r-project.org/package=Hmisc}{\pkg{Hmisc}},
 96 | \href{https://cran.r-project.org/package=kernlab}{\pkg{kernlab}},
 97 | \href{https://cran.r-project.org/package=mice}{\pkg{mice}},
 98 | \href{https://cran.r-project.org/package=party}{\pkg{party}},
 99 | \href{https://cran.r-project.org/package=playwith}{\pkg{playwith}},
100 | \href{https://cran.r-project.org/package=pmml}{\pkg{pmml}},
101 | \href{https://cran.r-project.org/package=randomForest}{\pkg{randomForest}},
102 | \href{https://cran.r-project.org/package=reshape}{\pkg{reshape}},
103 | \href{https://cran.r-project.org/package=rggobi}{\pkg{rggobi}},
104 | \href{https://cran.r-project.org/package=RGtk2}{\pkg{RGtk2}},
105 | \href{https://cran.r-project.org/package=ROCR}{\pkg{ROCR}},
106 | \href{https://cran.r-project.org/package=RODBC}{\pkg{RODBC}}, and
107 | \href{https://cran.r-project.org/package=rpart}{\pkg{rpart}}.
108 | 
109 | The packages will usually be installed with the following command:
110 | 
111 | <<install, eval=FALSE>>=
112 | install.packages("rattle", dependencies=c("Depends", "Suggests"))
113 | @ 
114 | 
115 | The latest beta version of rattle is available from
116 | \url{https://rattle.togaware.com/}:
117 | 
118 | <<install_togaware, eval=FALSE>>=
119 | install.packages("rattle", repos="https://rattle.togaware.com", type="source")
120 | @ 
121 | 
122 | \section{First Steps}
123 | 
124 | Start up rattle:
125 | <<start_up, eval=FALSE>>=
126 | library(rattle)
127 | rattle()
128 | @ 
129 | 
130 | \section{Sipmle Scenario: Build a Couple of Models}
131 | 
132 | \begin{enumerate}
133 | \item Click Execute
134 | \item Click Yes (load the sample weather dataset)
135 | \item Click the Model tab
136 | \item Click Execute (to build a decision tree)
137 | \item Click Draw to display the decision tree (loads other packages as required)
138 | \item Click the Forest radio button
139 | \item Click Execute (to build a random forest - loads packages as required)
140 | \item Click the Evaluate tab
141 | \item Click the Risk radio button (installs packages as required)
142 | \item Click Execute to display two Risk (Cummulative) performance plots
143 | \item Click the Log tab
144 | \item Click the Export button to save script to file weather\_script.R to home folder
145 | \end{enumerate}
146 | 
147 | Now exit from R (and rattle) and start R up again.
148 | 
149 | <<eval=FALSE>>=
150 | source("~/weather_script.R")
151 | @ 
152 | 
153 | This will rerun everything that was done in the GUI session but purely as a script.
154 | 
155 | \section{References}
156 | 
157 | \begin{description}
158 | \item Williams, G. J. (2009). {\em Rattle: A Data Mining GUI for R}.
159 |   The R Journal, 1(2), 45-55. URL:
160 |   \href{https://journal.r-project.org/archive/2009-2/RJournal_2009-2_Williams.pdf}
161 |   {https://journal.r-project.org/archive/2009-2/RJournal\_2009-2\_Williams.pdf}.
162 | \item Williams, G. J. (2011). {\em Data Mining with Rattle and R: The
163 |     Art of Excavating Data for Knowledge Discovery}. Use R!
164 |   series. Springer. \href{https://bit.ly/rattle_data_mining}{https://bit.ly/rattle\_data\_mining}.
165 | \end{description}
166 | 
167 | \end{document}
168 | 


--------------------------------------------------------------------------------