├── .gitignore
├── Julia
    ├── Data
    │   ├── ClassifierID.dat
    │   ├── ClassifierNames.dat
    │   ├── DatasetID.dat
    │   ├── DatasetNames.dat
    │   ├── Hierarchical.dat
    │   ├── Percent_correct.dat
    │   └── get_accuracies.jl
    ├── Notebook1 loads accuracies and computes pvalues.ipynb
    ├── Notebook2 shows densityplot of the data and computes posteriors for Bayesian correlated t-test.ipynb
    ├── Notebook3 Bayesian analysis of Squash dataset.ipynb
    ├── Notebook4 computes posterior for all datasets based on Bayesian correlated ttest.ipynb
    ├── Notebook5 compares two classifiers on multiple datasets.ipynb
    ├── Notebook6 compares two classifiers on multiple datasets using left rope right.ipynb
    ├── Notebook7 hierarchical plots.ipynb
    ├── Plots
    │   ├── bars.pdf
    │   ├── hierarchical.pdf
    │   ├── hist1217.pdf
    │   ├── hist122.pdf
    │   ├── hist1220.pdf
    │   ├── hist123.pdf
    │   ├── hist129.pdf
    │   ├── hist12Any[].pdf
    │   ├── output1217.pdf
    │   ├── output122.pdf
    │   ├── output1220.pdf
    │   ├── output123.pdf
    │   ├── output1246.pdf
    │   ├── output129.pdf
    │   ├── piechart.pdf
    │   ├── piechartbay.pdf
    │   ├── piechartbay1.pdf
    │   ├── plotSignRankSimplex12.pdf
    │   ├── plotSignSimplex12.pdf
    │   ├── plot_data.jl
    │   ├── plot_post_hierch.jl
    │   ├── plot_posterior_ttest.jl
    │   ├── plot_posterior_ttest2.jl
    │   ├── plot_simplex.jl
    │   ├── plotmany.pdf
    │   ├── plotmanytriangles.pdf
    │   ├── postSignRankA12.pdf
    │   ├── postSignRankB12.pdf
    │   ├── postSignRanknorope12.pdf
    │   ├── postSignTestA12.pdf
    │   ├── postSignTestB12.pdf
    │   └── postSignTestnorope12.pdf
    └── Tests
    │   ├── Bsignranktest.jl
    │   ├── Bsignranktest.jl~
    │   ├── Bsigntest.jl
    │   ├── Bttest_correlated.jl
    │   ├── heaviside.jl
    │   ├── makedecision.jl
    │   └── ttest_correlated.jl
├── LICENSE.md
├── Python
    ├── Bsignedrank.ipynb
    ├── Bsigntest.ipynb
    ├── Content of the package.ipynb
    ├── Correlated t-test for comparing classifiers performance on the same dataset.ipynb
    ├── Hierarchical test.ipynb
    ├── The importance of the Rope.ipynb
    ├── bayesiantests.py
    ├── triangle.png
    └── triangle_hierarchical.png
├── R
    ├── BayesianSignTest.R
    ├── BayesianSignedRank.R
    └── correlatedBayesianTtest.R
├── README.md
├── hierarchical
    ├── Utils.R
    ├── actualFriedmanAccuracy.R
    ├── analyzeFriedmanResults.R
    ├── analyze_uci_with_rope.R
    ├── batchSensivity.R
    ├── compTable.R
    ├── cv_nbc.R
    ├── cvalFriedmanAccuracy.R
    ├── cvalFriedmanPredictive.R
    ├── cvalFriedmanPredictivePlots.R
    ├── genFriedmanSettings.R
    ├── generateFriedmanData.R
    ├── hierUciAnalysis.R
    ├── hierarchical_test.R
    ├── logPredictive.R
    ├── multiple_dsets_rope.R
    ├── selectTrainSettings.R
    ├── sensitivityNormalStudent.R
    ├── stan
    │   ├── hierarchical-t-test
    │   ├── hierarchical-t-test.stan
    │   ├── hierarchical-t-testGaussian.stan
    │   ├── hierarchical-t-test_nuJuaSteel
    │   ├── hierarchical-t-test_nuJuaSteel.stan
    │   ├── hierarchical-t-test_nuKru
    │   └── hierarchical-t-test_nuKru.stan
    └── uci_data.RData
└── slides
    ├── ECML2016nonparametric-0part.ipynb
    ├── ECML2016nonparametric-1part.ipynb
    ├── ECML2016nonparametric-2part.ipynb
    ├── case-against-nhst.ipynb
    ├── parametricBayesianComparison.pdf
    └── plots
        ├── Bsigntest1.jl
        ├── CONVERTING.txt
        ├── Comic1.jpg
        ├── bayes.png
        ├── canvas.png
        ├── canvas1.png
        ├── confused.png
        ├── densplot.png
        ├── densplotpost.png
        ├── eye.jpg
        ├── fig1.pdf
        ├── fig1.png
        ├── fig1.tex
        ├── fig2.pdf
        ├── fig2.png
        ├── fig2.tex
        ├── manytriangles.png
        ├── plot_data.jl
        ├── real.png
        ├── realpost.png
        ├── realpost1.png
        ├── realpost1b.png
        ├── realv.png
        ├── simplex.png
        ├── summing.png
        ├── tab0.png
        ├── tab1.png
        ├── table.png
        ├── table0.png
        ├── tablecomp.png
        ├── tablepvalue.png
        ├── trianglenbc.png
        ├── tvb&w.png
        └── tvcol.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | /GC
 2 | *.csv
 3 | 
 4 | *.hpp
 5 | *.cpp
 6 | 
 7 | *.rda
 8 | hierarchical/12-1000-student1
 9 | *.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_11
10 | hierarchical/delta00Std00.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_12
11 | hierarchical/delta00Std00.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_13
12 | hierarchical/delta00Std00.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_14
13 | hierarchical/12-1000-student4
14 | hierarchical/12-1000-student2
15 | hierarchical/12-1000-student3
16 | hierarchical/15-1000-student4
17 | *.dat*
18 | *.StanOut1
19 | *.StanOut2
20 | *.StanOut4
21 | *.StanOut3
22 | *.StanOut6
23 | *.StanOut8
24 | *.StanOut7
25 | *.StanOut5
26 | *.html
27 | *.arff
28 | *.pdf
29 | hierarchical/Untitled.R
30 | hierarchical/Rdata/compTable.R
31 | hierarchical/Rdata/plotPosterior.R
32 | hierarchical/Rdata/sensitvity.xls
33 | hierarchical/csvResults/.Rhistory
34 | hierarchical/.Rhistory
35 | *.StanOut
36 | hierarchical/svnCheckout.txt
37 | *.tex
38 | hierarchical/Rdata/.Rhistory
39 | hierarchical/Rdata/.Rapp.history
40 | *.history
41 | hierarchical/weka/weka500runs.Rdata
42 | hierarchical/sandbox.R
43 | hierarchical/weka/weka500runsRawResults.Rdata
44 | hierarchical/weka/weka500runsRawResults.Rdata
45 | *.Rdata
46 | .ipynb_checkpoints
47 | 
48 | ## Core latex/pdflatex auxiliary files:
49 | *.aux
50 | *.lof
51 | *.log
52 | *.lot
53 | *.fls
54 | *.out
55 | *.toc
56 | *.fmt
57 | *.fot
58 | *.cb
59 | *.cb2
60 | .*.lb
61 | 
62 | ## Build tool auxiliary files:
63 | *.fdb_latexmk
64 | *.synctex
65 | *.synctex(busy)
66 | *.synctex.gz
67 | *.synctex.gz(busy)
68 | *.pdfsync


--------------------------------------------------------------------------------
/Julia/Data/ClassifierNames.dat:
--------------------------------------------------------------------------------
1 | nbc
2 | aode
3 | hnb
4 | j48
5 | j48gr
6 | 


--------------------------------------------------------------------------------
/Julia/Data/DatasetNames.dat:
--------------------------------------------------------------------------------
 1 | anneal
 2 | audiology
 3 | wisconsin-breast-cancer
 4 | cmc
 5 | contact-lenses
 6 | credit
 7 | german-credit
 8 | pima-diabetes
 9 | ecoli
10 | eucalyptus
11 | glass
12 | grub-damage
13 | haberman
14 | hayes-roth
15 | cleeland-14
16 | hungarian-14
17 | hepatitis
18 | hypothyroid
19 | ionosphere
20 | iris
21 | kr-s-kp
22 | labor
23 | lier-disorders
24 | lymphography
25 | monks1
26 | monks3
27 | monks
28 | mushroom
29 | nursery
30 | optdigits
31 | page-blocks
32 | pasture-production
33 | pendigits
34 | postoperatie
35 | primary-tumor
36 | segment
37 | solar-flare-C
38 | solar-flare-m
39 | solar-flare-X
40 | sonar
41 | soybean
42 | spambase
43 | spect-reordered
44 | splice
45 | squash-stored
46 | squash-unstored
47 | tae
48 | credit
49 | owel
50 | waveform
51 | white-clover
52 | wine
53 | yeast
54 | zoo
55 | 


--------------------------------------------------------------------------------
/Julia/Data/get_accuracies.jl:
--------------------------------------------------------------------------------
 1 | function get_accuracies(i,j,d,ClassID,DatasetID,Percent_correct)
 2 | 
 3 | indi=find(x->x==i,ClassID)
 4 | indj=find(x->x==j,ClassID)
 5 | indd=find(x->x==d,DatasetID)
 6 | indid=intersect(indi,indd)
 7 | indjd=intersect(indj,indd)
 8 | acci=Percent_correct[indid]/100
 9 | accj=Percent_correct[indjd]/100
10 | 
11 | return acci, accj
12 | 
13 | end
14 | 


--------------------------------------------------------------------------------
/Julia/Notebook6 compares two classifiers on multiple datasets using left rope right.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Print all the p-values for the comparison of all 5 classifiers on multiple datasets"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 4,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "nbc & aode & 0.000 \\\\\n",
 22 |       "nbc & hnb & 0.001 \\\\\n",
 23 |       "nbc & j48 & 0.463 \\\\\n",
 24 |       "nbc & j48gr & 0.394 \\\\\n",
 25 |       "aode & hnb & 0.654 \\\\\n",
 26 |       "aode & j48 & 0.077 \\\\\n",
 27 |       "aode & j48gr & 0.106 \\\\\n",
 28 |       "hnb & j48 & 0.067 \\\\\n",
 29 |       "hn"
 30 |      ]
 31 |     },
 32 |     {
 33 |      "ename": "LoadError",
 34 |      "evalue": "LoadError: UndefVarError: tieadj not defined\nwhile loading In[4], in expression starting on line 12",
 35 |      "output_type": "error",
 36 |      "traceback": [
 37 |       "LoadError: UndefVarError: tieadj not defined\nwhile loading In[4], in expression starting on line 12",
 38 |       "",
 39 |       " in SignedRankTest at /home/benavoli/.julia/v0.4/HypothesisTests/src/wilcoxon.jl:33",
 40 |       " [inlined code] from In[4]:29",
 41 |       " in anonymous at no file:0"
 42 |      ]
 43 |     },
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "b & j48gr & 0.084 \\\\\n"
 49 |      ]
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "using HypothesisTests\n",
 54 |     "ClassID = readdlm(\"Data/ClassifierID.dat\", ',')\n",
 55 |     "ClassNames = readdlm(\"Data/ClassifierNames.dat\", ',')\n",
 56 |     "DatasetID = readdlm(\"Data/DatasetID.dat\", ',');\n",
 57 |     "DatasetNames = readdlm(\"Data/DatasetNames.dat\", ',');\n",
 58 |     "Percent_correct = readdlm(\"Data/Percent_correct.dat\", ',');\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "ii=0\n",
 64 |     "for cl1=1:Int32(maximum(ClassID))\n",
 65 |     "    for cl2=cl1+1:Int32(maximum(ClassID))\n",
 66 |     "\n",
 67 |     "        ii=ii+1\n",
 68 |     "        indi=find(x->x==cl1,ClassID)\n",
 69 |     "        indj=find(x->x==cl2,ClassID)\n",
 70 |     "        acci=Float64[]\n",
 71 |     "        accj=Float64[]\n",
 72 |     "        for d=1:Int32(maximum(DatasetID))\n",
 73 |     "            indd=find(x->x==d,DatasetID)\n",
 74 |     "            indid=intersect(indi,indd)\n",
 75 |     "            indjd=intersect(indj,indd)\n",
 76 |     "            push!(acci,mean(Percent_correct[indid])/100)\n",
 77 |     "            push!(accj,mean(Percent_correct[indjd])/100)\n",
 78 |     "        end\n",
 79 |     "\n",
 80 |     "        \n",
 81 |     "     pvalSignedRankTest=pvalue(SignedRankTest(acci,accj))\n",
 82 |     "\n",
 83 |     "     @printf \"%s & %s & %1.3f \\\\\\\\\\n\" ClassNames[cl1] ClassNames[cl2] pvalSignedRankTest\n",
 84 |     "\n",
 85 |     "    end\n",
 86 |     "end"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 1,
 92 |    "metadata": {
 93 |     "collapsed": false,
 94 |     "scrolled": true
 95 |    },
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "nbc & aode & 0.000 & 0.123 & 0.877 \\\\\n",
102 |       "nbc & hnb & 0.000 & 0.001 & 0.999 \\\\\n",
103 |       "nbc & j48 & 0.228 & 0.006 & 0.766 \\\\\n",
104 |       "nbc & j48gr & 0.179 & 0.004 & 0.817 \\\\\n",
105 |       "aode & hnb & 0.001 & 0.965 & 0.034 \\\\\n",
106 |       "aode & j48 & 0.904 & 0.034 & 0.062 \\\\\n",
107 |       "aode & j48gr & 0.883 & 0.046 & 0.071 \\\\\n",
108 |       "hnb & j48 & 0.962 & 0.020 & 0.019 \\\\\n",
109 |       "hnb & j48gr & 0.950 & 0.026 & 0.024 \\\\\n",
110 |       "j48 & j48gr & 0.000 & 1.000 & 0.000 \\\\\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "using  Distributions\n",
116 |     "using DataFrames\n",
117 |     "using Gadfly\n",
118 |     "using Compose\n",
119 |     "\n",
120 |     "include(\"Tests/Bsignranktest.jl\")\n",
121 |     "include(\"Tests/Bsigntest.jl\")\n",
122 |     "include(\"Plots/plot_simplex.jl\")\n",
123 |     "\n",
124 |     "ClassID = readdlm(\"Data/ClassifierID.dat\", ',')\n",
125 |     "ClassNames = readdlm(\"Data/ClassifierNames.dat\", ',')\n",
126 |     "DatasetID = readdlm(\"Data/DatasetID.dat\", ',');\n",
127 |     "DatasetNames = readdlm(\"Data/DatasetNames.dat\", ',');\n",
128 |     "Percent_correct = readdlm(\"Data/Percent_correct.dat\", ',');\n",
129 |     "\n",
130 |     "\n",
131 |     "p1=Array{Gadfly.Plot}(Int32(maximum(ClassID)*(maximum(ClassID)-1)/2))\n",
132 |     "data=0\n",
133 |     "\n",
134 |     "ii=0\n",
135 |     "for cl1=1:Int32(maximum(ClassID))\n",
136 |     "    for cl2=cl1+1:Int32(maximum(ClassID))\n",
137 |     "\n",
138 |     "        ii=ii+1\n",
139 |     "indi=find(x->x==cl1,ClassID)\n",
140 |     "indj=find(x->x==cl2,ClassID)\n",
141 |     "acci=Float64[]\n",
142 |     "accj=Float64[]\n",
143 |     "for d=1:Int32(maximum(DatasetID))\n",
144 |     "    indd=find(x->x==d,DatasetID)\n",
145 |     "    indid=intersect(indi,indd)\n",
146 |     "    indjd=intersect(indj,indd)\n",
147 |     "    push!(acci,mean(Percent_correct[indid])/100)\n",
148 |     "    push!(accj,mean(Percent_correct[indjd])/100)\n",
149 |     "end\n",
150 |     "        rope=0.01\n",
151 |     "        #data=Bsigntest(acci,accj,rope)\n",
152 |     "        data=Bsignranktest(acci,accj,rope)\n",
153 |     "        #println(mean(data[3,:]), \" \", mean(data[2,:]), \" \",mean(data[1,:]))\n",
154 |     "        val=zeros(size(data,2),1)\n",
155 |     "        for ind=1:size(data,2)\n",
156 |     "            val[ind,:]=indmax(data[:,ind])\n",
157 |     "        end\n",
158 |     "        \n",
159 |     "        @printf \"%s & %s & %1.3f & %1.3f & %1.3f \\\\\\\\\\n\" ClassNames[cl1] ClassNames[cl2] length(find(x->x==3,val))/length(val) length(find(x->x==2,val))/length(val)  length(find(x->x==1,val))/length(val)\n",
160 |     "        \n",
161 |     "ptriangle=plot_simplex(data, ClassNames[cl1],ClassNames[cl2])\n",
162 |     "p1[ii,:]=ptriangle\n",
163 |     "        \n",
164 |     "     #   pvalSignedRankTest=pvalue(SignedRankTest(acci,accj))\n",
165 |     "\n",
166 |     "    # @printf \"%s & %s & %1.3f \\\\\\\\\\n\" ClassNames[cl1] ClassNames[cl2] pvalSignedRankTest\n",
167 |     "\n",
168 |     "    end\n",
169 |     "end\n",
170 |     "\n",
171 |     "#set_default_plot_size(30cm, 40cm)\n",
172 |     "#display(vstack(hstack(p1[2,1],p1[3,1],p1[4,1]),\n",
173 |     " #              hstack(p1[5,1],p1[6,1],p1[7,1]),\n",
174 |     "  #             hstack(p1[8,1],p1[9,1],p1[10,1])))\n",
175 |     "\n",
176 |     "draw(PDF(\"Plots/plotmanytriangles.pdf\", 30cm, 26cm),vstack(hstack(p1[2,1],p1[3,1],p1[4,1]),\n",
177 |     "               hstack(p1[5,1],p1[6,1],p1[7,1]),\n",
178 |     "               hstack(p1[8,1],p1[9,1],p1[10,1])))"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": false
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "\n"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": false
197 |    },
198 |    "outputs": [],
199 |    "source": []
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {
205 |     "collapsed": false
206 |    },
207 |    "outputs": [],
208 |    "source": []
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [],
217 |    "source": []
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": true
224 |    },
225 |    "outputs": [],
226 |    "source": []
227 |   }
228 |  ],
229 |  "metadata": {
230 |   "kernelspec": {
231 |    "display_name": "Julia 0.4.2",
232 |    "language": "julia",
233 |    "name": "julia-0.4"
234 |   },
235 |   "language_info": {
236 |    "file_extension": ".jl",
237 |    "mimetype": "application/julia",
238 |    "name": "julia",
239 |    "version": "0.4.5"
240 |   }
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 0
244 | }
245 | 


--------------------------------------------------------------------------------
/Julia/Plots/bars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/bars.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hierarchical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hierarchical.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hist1217.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hist1217.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hist122.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hist122.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hist1220.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hist1220.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hist123.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hist123.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hist129.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hist129.pdf


--------------------------------------------------------------------------------
/Julia/Plots/hist12Any[].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/hist12Any[].pdf


--------------------------------------------------------------------------------
/Julia/Plots/output1217.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/output1217.pdf


--------------------------------------------------------------------------------
/Julia/Plots/output122.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/output122.pdf


--------------------------------------------------------------------------------
/Julia/Plots/output1220.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/output1220.pdf


--------------------------------------------------------------------------------
/Julia/Plots/output123.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/output123.pdf


--------------------------------------------------------------------------------
/Julia/Plots/output1246.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/output1246.pdf


--------------------------------------------------------------------------------
/Julia/Plots/output129.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/output129.pdf


--------------------------------------------------------------------------------
/Julia/Plots/piechart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/piechart.pdf


--------------------------------------------------------------------------------
/Julia/Plots/piechartbay.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/piechartbay.pdf


--------------------------------------------------------------------------------
/Julia/Plots/piechartbay1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/piechartbay1.pdf


--------------------------------------------------------------------------------
/Julia/Plots/plotSignRankSimplex12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/plotSignRankSimplex12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/plotSignSimplex12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/plotSignSimplex12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/plot_data.jl:
--------------------------------------------------------------------------------
 1 | function plot_data(i,j,d,x,marginleft,marginright)
 2 | 
 3 | 
 4 | 
 5 | df = DataFrame(DeltaAcc=x)
 6 | 
 7 | #Geom.histogram(bincount=20
 8 | p=plot(df, x=:DeltaAcc,  xintercept=[-0.01, 0.01],Geom.vline(color="orange", size=1mm), Geom.density,Coord.Cartesian(xmin=marginleft,xmax=marginright),Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt))
 9 | 
10 | draw(PDF("Plots/hist$i$j$d.pdf", 6inch, 3inch), p)
11 | 
12 | return p
13 | 
14 | end
15 | 


--------------------------------------------------------------------------------
/Julia/Plots/plot_post_hierch.jl:
--------------------------------------------------------------------------------
 1 | function plot_post_hierch(x,marginleft,marginright,title)
 2 | 
 3 | 
 4 | 
 5 | 
 6 | df = DataFrame(mu0=x)
 7 | 
 8 | #Geom.histogram(bincount=20
 9 | p=plot(df, x=:mu0,  xintercept=[-0.01, 0.01],Guide.title(title),Geom.vline(color="orange", size=1mm), Geom.density,Coord.Cartesian(xmin=marginleft,xmax=marginright),Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt))
10 | 
11 | 
12 | 
13 | return p
14 | 
15 | end
16 | 


--------------------------------------------------------------------------------
/Julia/Plots/plot_posterior_ttest.jl:
--------------------------------------------------------------------------------
 1 | function plot_posterior_ttest(i,j,d,mur,sigmar,dofr,marginleft,marginright)
 2 | 
 3 | xs1=marginleft:0.0001:marginright
 4 | xs2=-0.01:0.001:0.01
 5 | 
 6 | f=x -> pdf(TDist(dofr[1]), (x-mur[1])/sigmar[1])/sigmar[1]
 7 | 
 8 | df1 = DataFrame(DeltaAcc=xs1,pdf=f(xs1),ymin=xs1*0,ymax=f(xs1),legend="pdf")
 9 | df2 = DataFrame(DeltaAcc=xs2,pdf=0.0001,ymin=xs2*0,ymax=0.0001,legend="rope")
10 | df = vcat(df1, df2)
11 | 
12 | p=plot(df, x=:DeltaAcc, y=:pdf, ymin=:ymin, ymax=:ymax, color=:legend, xintercept=[-0.01, 0.01],Geom.vline(color="orange", size=1mm),Geom.line, Geom.ribbon,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Coord.Cartesian(xmin=marginleft,xmax=marginright))
13 | 
14 | 
15 | draw(PDF("Plots/output$i$j$d.pdf", 6inch, 3inch), p)
16 | 
17 | return p
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/Julia/Plots/plot_posterior_ttest2.jl:
--------------------------------------------------------------------------------
 1 | function plot_posterior_ttest(i,j,d,mur,sigmar,dofr,marginleft,marginright)
 2 | 
 3 | xs1=marginleft:0.0001:marginright
 4 | xs3=marginleft:0.0001:0
 5 | xs2=-0.01:0.001:0.01
 6 | 
 7 | f=x -> pdf(TDist(dofr[1]), (x-mur[1])/sigmar[1])
 8 | 
 9 | df1 = DataFrame(DeltaAcc=xs1,pdf=f(xs1),ymin=xs1*0,ymax=f2(xs2),legend="pdf")
10 | df3 = DataFrame(DeltaAcc=xs3,pdf=0,ymin=xs3*0,ymax=f(xs3),legend="pdf")
11 | df2 = DataFrame(DeltaAcc=xs2,pdf=0.0001,ymin=xs2*0,ymax=0.0001,legend="rope")
12 | df = vcat(df1, df2)
13 | 
14 | p=plot(df, x=:DeltaAcc, y=:pdf, ymin=:ymin, ymax=:ymax, color=:legend, xintercept=[-0.01, 0.01],Geom.vline(color="orange", size=1mm),Geom.line, Geom.ribbon,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Coord.Cartesian(xmin=marginleft,xmax=marginright))
15 | 
16 | 
17 | draw(PDF("Plots/output$i$j$d.pdf", 6inch, 3inch), p)
18 | 
19 | return p
20 | 
21 | end
22 | 


--------------------------------------------------------------------------------
/Julia/Plots/plot_simplex.jl:
--------------------------------------------------------------------------------
 1 | function projectSimplex(points)
 2 |     
 3 |     #Project probabilities on the 3-simplex to a 2D triangle
 4 |     
 5 |     #N points are given as N x 3 array
 6 |     ss=size(points,1)
 7 |     # Convert points one at a time
 8 |     tripts = zeros(size(points,1),2)
 9 |     for idx=1:size(points,1)
10 |         # Init to triangle centroid
11 |         x = 1.0 / 2
12 |         y = 1.0 / (2 * sqrt(3))
13 |         # Vector 1 - bisect out of lower left vertex 
14 |         p1 = points[idx, 3]
15 |         x = x - (1.0 / sqrt(3)) * p1 * cos(pi / 6)
16 |         y = y - (1.0 / sqrt(3)) * p1 * sin(pi / 6)
17 |         # Vector 2 - bisect out of lower right vertex  
18 |         p2 = points[idx, 1]  
19 |         x = x + (1.0 / sqrt(3)) * p2 * cos(pi / 6)
20 |         y = y - (1.0 / sqrt(3)) * p2 * sin(pi / 6)        
21 |         # Vector 3 - bisect out of top vertex
22 |         p3 = points[idx, 2]
23 |         y = y + (1.0 / sqrt(3) * p3)
24 |         tripts[idx,:] = [x,y]
25 |     end
26 |     return tripts
27 |         
28 |     end
29 |     
30 | 
31 | function plot_simplex(data,name1,name2)
32 | dataproj=projectSimplex(data')
33 | 
34 | 
35 | vert=projectSimplex([1 0 0;0 1 0;0 0 1])
36 | 
37 | vert0=projectSimplex([0.3333 0.3333 0.3333; 0.5 0.5 0;0.5 0 0.5;0 0.5 0.5])
38 | 
39 | 
40 | df = DataFrame(Pleft=dataproj[:,1][:], Prope=dataproj[:,2][:])
41 | 
42 | 
43 | p=plot(df, x=:Pleft, y=:Prope, Guide.xlabel(""), Guide.ylabel(""),  Guide.annotation(
44 |          compose(context(), text(-0.04, -0.09, name1 ))),Guide.annotation(compose(context(), text(0.88, -0.09, name2 ))),Guide.annotation(compose(context(), text(0.44, 0.9, "rope" ))), Geom.hexbin,Theme(
45 | key_label_font_size=11pt),  Coord.Cartesian(xmin=-0.1,xmax=1.05,ymin=-0.1,ymax=1.05),Guide.xticks(ticks=nothing), Guide.yticks(ticks=nothing), Guide.annotation(
46 | compose(context(), polygon([(vert[1,1], vert[1,2]), (vert[2,1], vert[2,2]), (vert[3,1], vert[3,2])]), fill(nothing),
47 |        stroke(colorant"orange"))), Guide.annotation(
48 | compose(context(), polygon([(vert0[1,1], vert0[1,2]),(vert0[2,1], vert0[2,2]),(vert0[1,1], vert0[1,2]),(vert0[3,1], vert0[3,2]),(vert0[1,1], vert0[1,2]),(vert0[4,1], vert0[4,2])]), fill(nothing),
49 |        stroke(colorant"orange"),linewidth(0.125mm))))
50 | #
51 | return p
52 | 
53 | end
54 | 


--------------------------------------------------------------------------------
/Julia/Plots/plotmany.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/plotmany.pdf


--------------------------------------------------------------------------------
/Julia/Plots/plotmanytriangles.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/plotmanytriangles.pdf


--------------------------------------------------------------------------------
/Julia/Plots/postSignRankA12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/postSignRankA12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/postSignRankB12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/postSignRankB12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/postSignRanknorope12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/postSignRanknorope12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/postSignTestA12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/postSignTestA12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/postSignTestB12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/postSignTestB12.pdf


--------------------------------------------------------------------------------
/Julia/Plots/postSignTestnorope12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Julia/Plots/postSignTestnorope12.pdf


--------------------------------------------------------------------------------
/Julia/Tests/Bsignranktest.jl:
--------------------------------------------------------------------------------
 1 | function Bsignranktest(y,x,rope)
 2 | include("Tests/heaviside.jl")
 3 | 
 4 | nsamples=150000
 5 | 
 6 | #Dirichlet process prior strength
 7 | s=0.5
 8 | 
 9 | #differences
10 | zm=y-x
11 | nm=length(zm)
12 | 
13 | #
14 | z0=0 #to check sensitivity to prior zet also z0=-Inf and z0=Inf
15 | 
16 | 
17 | z=[zm;z0]
18 | n=nm+1
19 | 
20 | if rope>0
21 | 	X=repmat(z,1,n);
22 | 	Y=repmat(-z'+2*rope,n,1);
23 | 
24 | 	Aright = heaviside(X-Y)
25 | 
26 | 
27 | 	X=repmat(-z,1,n);
28 | 	Y=repmat(z'+2*rope,n,1);
29 | 
30 | 	Aleft = heaviside(X-Y)
31 | 
32 | 
33 | 	dataresl=zeros(3,nsamples)
34 | 	dataresu=zeros(3,nsamples)
35 | 
36 | 	for i=1:nsamples
37 | 	    data = rand(Dirichlet([ones(1,nm) s][:]),1)'
38 | 
39 |     	dataresl[3,i]=dot((data[:,1:end]*Aright)[:],(data[:,1:end])[:]) #+(2-data[:,end][1])*data[:,end][1]/2
40 |     	dataresl[1,i]=dot((data[:,1:end]*Aleft )[:],(data[:,1:end])[:]) #+(2-data[:,end][1])*data[:,end][1]/2
41 |     	dataresl[2,i]=1- dataresl[1,i]- dataresl[3,i]
42 |     	#data12u[i,:]=(data12l[i]+(data1[:,end].*(2-data1[:,end])))[:]
43 | 	end
44 | 
45 | else
46 | 
47 | 	X=repmat(z,1,n)
48 | 	Y=repmat(-z',n,1)
49 | 
50 | 	A = heaviside(X-Y)
51 | 
52 | 	dataresl=zeros(2,nsamples)
53 | 	dataresu=zeros(2,nsamples)
54 | 
55 | 	for i=1:nsamples
56 | 	    data = rand(Dirichlet([ones(1,n) s][:]),1)'
57 | 	    dataresl[2,i]=dot((data[:,1:end-1]*A)[:],(data[:,1:end-1])[:])
58 | 	    dataresl[1,i]=1- dataresl[2,i]
59 | 	end
60 | 
61 | end
62 | 
63 | 
64 | 
65 | return dataresl
66 | 
67 | end
68 | 


--------------------------------------------------------------------------------
/Julia/Tests/Bsignranktest.jl~:
--------------------------------------------------------------------------------
 1 | function Bsignranktest(y,x,rope)
 2 | include("Tests/heaviside.jl")
 3 | 
 4 | nsamples=15000
 5 | 
 6 | #Dirichlet process prior strength
 7 | s=1
 8 | 
 9 | #differences
10 | zm=y-x
11 | nm=length(zm)
12 | 
13 | #
14 | z0=0 #to check sensitivity to prior zet also z0=-Inf and z0=Inf
15 | 
16 | 
17 | z=[zm;z0]
18 | n=nm+1
19 | 
20 | if rope>0
21 | 	X=repmat(z,1,n);
22 | 	Y=repmat(-z'+2*rope,n,1);
23 | 
24 | 	Aright = heaviside(X-Y)
25 | 
26 | 
27 | 	X=repmat(-z,1,n);
28 | 	Y=repmat(z'+2*rope,n,1);
29 | 
30 | 	Aleft = heaviside(X-Y)
31 | 
32 | 
33 | 	dataresl=zeros(3,nsamples)
34 | 	dataresu=zeros(3,nsamples)
35 | 
36 | 	for i=1:nsamples
37 | 	    data = rand(Dirichlet([ones(1,nm) s][:]),1)'
38 | 
39 |     	dataresl[3,i]=dot((data[:,1:end]*Aright)[:],(data[:,1:end])[:]) #+(2-data[:,end][1])*data[:,end][1]/2
40 |     	dataresl[1,i]=dot((data[:,1:end]*Aleft )[:],(data[:,1:end])[:]) #+(2-data[:,end][1])*data[:,end][1]/2
41 |     	dataresl[2,i]=1- dataresl[1,i]- dataresl[3,i]
42 |     	#data12u[i,:]=(data12l[i]+(data1[:,end].*(2-data1[:,end])))[:]
43 | 	end
44 | 
45 | else
46 | 
47 | 	X=repmat(z,1,n)
48 | 	Y=repmat(-z',n,1)
49 | 
50 | 	A = heaviside(X-Y)
51 | 
52 | 	dataresl=zeros(2,nsamples)
53 | 	dataresu=zeros(2,nsamples)
54 | 
55 | 	for i=1:nsamples
56 | 	    data = rand(Dirichlet([ones(1,n) s][:]),1)'
57 | 	    dataresl[2,i]=dot((data[:,1:end-1]*A)[:],(data[:,1:end-1])[:])
58 | 	    dataresl[1,i]=1- dataresl[2,i]
59 | 	end
60 | 
61 | end
62 | 
63 | 
64 | 
65 | return dataresl
66 | 
67 | end
68 | 


--------------------------------------------------------------------------------
/Julia/Tests/Bsigntest.jl:
--------------------------------------------------------------------------------
 1 | function Bsigntest(y,x,rope)
 2 | 
 3 | 
 4 | s=1
 5 | nsamples=100000
 6 | diff=y-x
 7 | 
 8 | if rope>0
 9 | #Compute counts
10 | nright=length(find(z->z> rope,diff[:]))
11 | nleft=length(find(z->(z<-rope),diff[:]))
12 | nrope=length(find(z->(z<=rope && z>=-rope),diff[:]))
13 | 
14 | data = rand(Dirichlet([nleft+s/3 nrope+s/3 nright+s/3 ][:]),nsamples)
15 | 
16 | else
17 | #Compute counts
18 | nright=length(find(z->z> 0,diff[:]))
19 | nleft=length(find(z->(z<=0),diff[:]))
20 | 
21 | if nright<nleft
22 | 
23 | data = rand(Dirichlet([nleft nright+s][:]),nsamples)
24 | elseif nright>nleft
25 | data = rand(Dirichlet([nleft+s nright][:]),nsamples)
26 | 
27 | else
28 | 
29 | data = rand(Dirichlet([nleft+s/2 nright+s/2][:]),nsamples)
30 | 
31 | 
32 | end
33 | 
34 | end
35 | return data
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/Julia/Tests/Bttest_correlated.jl:
--------------------------------------------------------------------------------
 1 | function Bttest_correlated(x,rho,m,rope_min_value,rope_max_value,prob)
 2 | #Bayesian correlated t-test
 3 | #x: differences of accuracies
 4 | #rho: correlation
 5 | #m: mean to be tested (m=0)
 6 | #the rope interval is [rope_min_value,rope_max_value]
 7 | #prob: probability for HDI credible interval
 8 | 
 9 | #Return parameters of the posterior tildemu, sigma_student, dof
10 | # probability of the three regions right, left, rope p_right, p_left, p_rope,
11 | # hdi interval
12 | 
13 | #parameters of the non-informative prior, to match the results of the
14 | #frequentist correlated t-test
15 | k0=1000000;
16 | a=-0.5;
17 | b=0;
18 | mu0=0;
19 | m=0;
20 | 
21 | #used for HDI computation
22 | alpha=1-prob
23 | 
24 | if rope_min_value>rope_max_value
25 |     error("rope_min_value larger than rope_max_value")
26 | end
27 | 
28 | 
29 | n=length(x)
30 | H=ones(n,1)
31 | 
32 | #correlation matrix
33 | M=rho*ones(n,n)+diagm(ones(n))*(1-rho)
34 | #posterior mean
35 | tildemu=inv((H'*inv(M)*H+1/k0))*(H'*inv(M)*x+mu0/k0)
36 | 
37 | #sample mean
38 | hatmu=inv((H'*inv(M)*H))*(H'*inv(M)*x)
39 | 
40 | #posterior parameters of the Gamma distribution
41 | tildea=a+n/2
42 | tildeb0=0.5*((x-H*hatmu)'*inv(M)*(x-H*hatmu)+2*b-(H*hatmu)'*inv(M)*(H*hatmu)-mu0^2/k0+((H*tildemu)'*inv(M)*(H*tildemu)+tildemu'*tildemu/k0));
43 | tildeb=max(tildeb0,0);
44 | 
45 | #parameters of the Student distribution
46 | sigma_student=sqrt((tildeb/tildea/(H'*inv(M)*H+1/k0))); 
47 | dof=2*tildea
48 | 
49 | #for numerical problems
50 | if sigma_student[1]<0.000000001
51 | 	sigma_student[1]=0.000000001
52 | end
53 | t=(tildemu-m)/sigma_student;
54 | 
55 | #compute HDI interval for the posterior
56 | crit =  invlogcdf(TDist(2*tildea),log((1 - alpha / 2))).* sigma_student;
57 | hdi = [tildemu - crit; tildemu + crit];
58 | 
59 | 
60 | 
61 | #Computations of probability of three regions for accuracy=class1-class2: left (classif2 better than classif1)
62 | # right (classif1 better than classif2) and rope (classif1=classif2)
63 | 
64 | p_left=0.5
65 | p_right=0.5
66 | p_rope=0.0;
67 | 
68 | 
69 | p_right=1-cdf(TDist(2*tildea), (rope_max_value-tildemu)/sigma_student)
70 | p_left=cdf(TDist(2*tildea), (rope_min_value-tildemu)/sigma_student)
71 | p_rope=cdf(TDist(2*tildea), (rope_max_value-tildemu)/sigma_student)-cdf(TDist(2*tildea), (rope_min_value-tildemu)/sigma_student)
72 | 
73 | 
74 | 
75 | if rope_max_value==rope_min_value
76 |     p_rope=0.0;
77 |     p_left=1-p_right;
78 | end
79 | 
80 | 
81 | 
82 | #return parameters of the posterior tildemu, sigma_student, dof
83 | #probability of the three regions right, left, rope p_right, p_left, p_rope,
84 | # hdi interval
85 | 
86 | return tildemu, sigma_student, dof, p_right, p_left, p_rope, hdi
87 | 
88 | 
89 | end
90 | 


--------------------------------------------------------------------------------
/Julia/Tests/heaviside.jl:
--------------------------------------------------------------------------------
 1 | function heaviside(X)
 2 | #HEAVISIDE    Step function.
 3 | #    HEAVISIDE(X) is 0 for X < 0, 1 for X > 0, and .5 for X == 0.
 4 | 
 5 | 
 6 | 
 7 | Y = zeros(size(X));
 8 | Y[find( X-> X> 0,X)] = 1;
 9 | Y[find(X-> X== 0,X)] = .5;
10 | 
11 | return Y
12 | 
13 | end
14 | 


--------------------------------------------------------------------------------
/Julia/Tests/makedecision.jl:
--------------------------------------------------------------------------------
 1 | function makedecision(p_r,p_l,p_rope)
 2 | #resdec
 3 | #wl wr sl sr e
 4 | resdec=zeros(1,5)
 5 | if p_r>0.90
 6 | resdec[1,4]=1
 7 | elseif p_l>0.9
 8 | resdec[1,3]=1
 9 | elseif p_rope>0.95
10 | resdec[1,5]=1
11 | elseif p_r<0.05
12 | resdec[1,1]=1
13 | elseif p_l<0.05
14 | resdec[1,2]=1
15 | 
16 | end
17 | 
18 | return resdec
19 | 
20 | end
21 | 


--------------------------------------------------------------------------------
/Julia/Tests/ttest_correlated.jl:
--------------------------------------------------------------------------------
 1 | function ttest_correlated(x,m,rho,tail,alpha)
 2 | #Frequentist correlated t-test
 3 | #x: differences of accuracies
 4 | #m: mean to be tested (m=0)
 5 | #rho: correlation
 6 | #tail: tail=0 (two-tailed), tail=1 (right one-tailed test), tail=2 (left one-tailed test)
 7 | #alpha: alpha level
 8 | 
 9 | #return p-value and confidence interval
10 | 
11 | samplesize=length(x)
12 | 
13 | #define the correction
14 | te_tr_ratio=rho/(1-rho)
15 | 
16 | #degrees of freedom
17 | df = max(samplesize - 1,0);
18 | 
19 | #statistics
20 | xmean = mean(x);
21 | sdpop = std(x);
22 | #for numerical stability
23 | if sdpop ==0
24 |     sdpop=10.0^-10;
25 | end
26 | 
27 | #correlation correction of the standard error
28 | ser = sdpop .* sqrt(1/samplesize+te_tr_ratio);
29 | 
30 | #t statistics
31 | tval = (xmean - m) ./ ser;
32 | 
33 | 
34 | # Compute the correct p-value for the test, and confidence intervals
35 | if tail == 0 # two-tailed test
36 |     p = 2 * cdf(TDist(df),-abs(tval))
37 |     crit =  invlogcdf(TDist(df),log((1 - alpha / 2))).* ser;
38 |     ci = [xmean - crit; xmean + crit];
39 | elseif tail == 1 # right one-tailed test
40 |     p = cdf(TDist(df),-tval)
41 |         crit = invlogcdf(TDist(df),log((1 - alpha ))).* ser;  
42 |         ci = [xmean - crit; Inf];
43 | else tail == -1 # left one-tailed test
44 |     p = cdf(TDist(df),tval)
45 |          crit = invlogcdf(TDist(df),log((1 - alpha ))).* ser;   
46 |   ci = [-Inf; xmean + crit];
47 | end
48 | 
49 | #return p-value and confidence interval
50 | return p, ci
51 | 
52 | 
53 | end
54 | 


--------------------------------------------------------------------------------
/Python/Content of the package.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Comparing multiple classifiers through Bayesian analysis\n",
 8 |     "\n",
 9 |     "This is the Python implementation of Bayesian tests to compare the performance of classifiers (more in general algorithms) assessed  via cross-validation.\n",
10 |     "The package `bayesiantests` includes the following tests:\n",
11 |     "* `correlated_ttest`  performs the correlated t-test on the performance of two classifiers that have been assessed by $m$-runs of $k$-fold cross-validation  on the same dataset. It return probabilities that, based on the measured performance, one model is better than another or vice versa or they are within the region of practical equivalence\n",
12 |     "* `signtest` computes the probabilities that, based on the measured performance, one classifier is better than another or vice versa or they are within the region of practical equivalence\n",
13 |     "* `signrank` computes the Bayesian equivalent of the Wilcoxon signed-rank test. It return probabilities that, based on the measured performance, one model is better than another or vice versa or they are within the region of practical equivalence.\n",
14 |     "* `hierarchical` compares the performance of two classifiers that have been assessed by *m*-runs of *k*-fold cross-validation on *q* datasets by using a bayesian hierarchical model.\n",
15 |     "\n",
16 |     "We have written three notebooks that explain how to use these tests. Moreover, the notebook `The importance of the Rope` discusses with an example the importance of setting a region of practical equivalence"
17 |    ]
18 |   }
19 |  ],
20 |  "metadata": {
21 |   "kernelspec": {
22 |    "display_name": "Python 3",
23 |    "language": "python",
24 |    "name": "python3"
25 |   },
26 |   "language_info": {
27 |    "codemirror_mode": {
28 |     "name": "ipython",
29 |     "version": 3
30 |    },
31 |    "file_extension": ".py",
32 |    "mimetype": "text/x-python",
33 |    "name": "python",
34 |    "nbconvert_exporter": "python",
35 |    "pygments_lexer": "ipython3",
36 |    "version": "3.5.2"
37 |   }
38 |  },
39 |  "nbformat": 4,
40 |  "nbformat_minor": 0
41 | }
42 | 


--------------------------------------------------------------------------------
/Python/The importance of the Rope.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# The importance of the region of practical equivalence (ROPE)\n",
  8 |     "This notebook demonstrates the importance of setting a region of practical equivalence (ROPE)."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "The difference between two classifiers (algorithms) can be very small; however *there are no two classifiers whose \n",
 16 |     "accuracies are perfectly equivalent*. \n",
 17 |     "\n",
 18 |     "By using an null hypothesis significance test (NHST), the null hypothesis is that the classifiers are equal. However, the null hypothesis is practically always false!\n",
 19 |     "By rejecting the null hypothesis NHST indicates that the null hypothesis is unlikely; **but this is known even before running the experiment**."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Bayesian tests can assess when two classifiers are practically equivalent \n",
 27 |     "Can we say anything about the probability that two classifiers are practically equivalent (e.g., *j48* is practically equivalent to *j48gr*)?   \n",
 28 |     "NHST cannot answer this question, while Bayesian analysis can.\n",
 29 |     "\n",
 30 |     "We need to define the meaning of **practically equivalent**. \n",
 31 |     "\n",
 32 |     "\n",
 33 |     "  \n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## How to define a rope?\n",
 41 |     "The rope depends:\n",
 42 |     "1. on the **metric** we use for comapring classifiers (accuracy, logloss etc.);\n",
 43 |     "2. on our **subjective** definition of practical equivalence (**domain specific**).\n",
 44 |     "\n",
 45 |     "Accuracy is a number in $[0,1]$. For practical applications, it is sensible to define that two classifiers whose mean difference of accuracies is less that $1\\%$ ($0.01$) are practically equivalent. \n",
 46 |     "A difference  of accuracy of  $1\\%$ is neglegible in practice.\n",
 47 |     "\n",
 48 |     "The interval $[-0.01,0.01]$ can thus be used to define a **region of practical equivalence** for classifiers. \n",
 49 |     "\n",
 50 |     "See it in action."
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "We load the classification accuracies of J48 and J48gr on 54 UCI datasets from the file `Data/accuracy_j48_j48gr.csv`. For simplicity, we will skip the header row and the column with data set names."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 2,
 63 |    "metadata": {
 64 |     "collapsed": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import numpy as np\n",
 69 |     "scores = np.loadtxt('Data/accuracy_j48_j48gr.csv', delimiter=',', skiprows=1, usecols=(1, 2))\n",
 70 |     "names = (\"J48\", \"J48gr\")"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "### Bayesian sign test \n",
 78 |     "Function `signtest(x, rope, prior_strength=1, prior_place=ROPE, nsamples=50000, verbose=False, names=('C1', 'C2'))` computes the Bayesian signed-rank test and returns the probabilities that the difference (the score of the first classifier minus the score of the first) is negative, within rope or positive."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 3,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "P(J48 > J48gr) = 0.0, P(rope) = 1.0, P(J48gr > J48) = 0.0\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "import bayesiantests as bt\n",
 98 |     "left, within, right = bt.signtest(scores, rope=0.01,verbose=True,names=names)\n"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "The first value (`P(J48 > J48gr)`) is the probability that the first classifier (the left column of `x`) has a higher score than the second (or that the differences are negative, if `x` is given as a vector).\n",
106 |     "\n",
107 |     "The second value (`P(rope)`) is the probability that they are practically equivalent.\n",
108 |     "\n",
109 |     "The third value (`P(J48gr > J48)`) is equal to `1-P(J48 > J48gr)-P(rope)`.\n",
110 |     "\n",
111 |     "The probability of the rope is equal to $1$ and, therefore, we can say that they are equivalent (for the given rope)."
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## Zoom in\n",
119 |     "Decision tree grafting (**J48gr**) was developed to demonstrate that a preference for less complex trees (**J48**) does not serve to improve accuracy. The point is that c has a consistent (albeit small) improvements in accuracy than **J48**.  \n",
120 |     "\n",
121 |     "The advanatge of having a rope is that we can test this hypothesis from a statistical point of view."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "## Is the difference more than 0.001 (1/1000)?"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 10,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "P(J48 > J48gr) = 0.0, P(rope) = 0.99822, P(J48gr > J48) = 0.00178\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "left, within, right = bt.signtest(scores, rope=0.001,verbose=True,names=names)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "No the difference is less than 0.001 with probability 0.99"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "## Is the difference more than 0.0001 (1/10000)?"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 11,
167 |    "metadata": {
168 |     "collapsed": false
169 |    },
170 |    "outputs": [
171 |     {
172 |      "name": "stdout",
173 |      "output_type": "stream",
174 |      "text": [
175 |       "P(J48 > J48gr) = 0.00164, P(rope) = 0.14482, P(J48gr > J48) = 0.85354\n"
176 |      ]
177 |     }
178 |    ],
179 |    "source": [
180 |     "left, within, right = bt.signtest(scores, rope=0.0001,verbose=True,names=names)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "The difference is therefore in the order of 0.0001. The difference is very small (around 1/10000), but in favour of J48gr. \n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "## Is it due to the prior?"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 12,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "P(J48 > J48gr) = 0.00118, P(rope) = 0.0868, P(J48gr > J48) = 0.91202\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "left, within, right = bt.signtest(scores, rope=0.0001,prior_place=bt.RIGHT,verbose=True,names=names)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "The conclusions are in this case sensitive to the prior (posterior changes 0.05 points). However, the overall conclusion does not change much.  The difference is very small (around 1/10000), but in favour of J48gr. "
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "## Let us plot them\n",
228 |     "We can plot the three probabilities as function of the Rope width."
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 88,
234 |    "metadata": {
235 |     "collapsed": false
236 |    },
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "<matplotlib.text.Text at 0x7fb81a5fc0b8>"
242 |       ]
243 |      },
244 |      "execution_count": 88,
245 |      "metadata": {},
246 |      "output_type": "execute_result"
247 |     },
248 |     {
249 |      "data": {
250 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAEPCAYAAAC6Kkg/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4VOX1wPHvISD7WkFZRQXrCi4FtbgEBQXcV0BrrVgE\nFQHrivBTtLVSXApIrSsupYpiRVEQBCEFQUUEWQSsKCib1iJIgAhZzu+PdwJDmCQzydx5Zzmf58nD\nLHfunFySOXnXI6qKMcYYE4sqvgMwxhiTeix5GGOMiZklD2OMMTGz5GGMMSZmljyMMcbEzJKHMcaY\nmAWaPERknIh8LyLLyjhmjIh8KSJLROSEIOMxxhgTH0G3PJ4HupX2pIj0ANqoalvgBuDvAcdjjDEm\nDgJNHqo6F9hSxiEXAi+Gjv0YaCAiBwUZkzHGmMrzPebRHFgXdn890MJTLMYYY6LkO3kASIn7tl+K\nMcYkuaqe338D0DLsfovQY/sQEUsoxhhTAapa8g/0uPCdPCYDA4AJInIKsFVVv490oG3g6AwfPpzh\nw4f7DiMp2LXYy67FXvG8FoWFkJsL27bt+xXpsdIeL36salWoVw/q1nX/lvyK9Hikx2rVgipR9hmJ\nBJI3gICTh4i8ApwJHCgi64D7gGoAqvqUqk4VkR4ishrYAVwXZDzGmPSnCjt3RveBXt7jP/8c3Yd6\ns2bwy1+WflzdunDAAb6vTHwFmjxUtXcUxwwIMgZjTGooKIj+g76sx7ZuhYcfju4v+mbNyv8rP8A/\n3lOa724rE6Ps7GzfISQNuxZ7+boWFf0rP9Jju3dH91d+ixZlH7doUTZduni5HBlFUmEsQUQ0FeI0\nJlXk5+//AV6RfvzcXKhePfZ++0iP1axpf+XHm4gENmCeOsnju++gqAg2b4bvv4eWLeGII3yHZkzC\nqMKOHZXvx9+2zXURRfuhXlYCqFvXDQQnSpADwKku0me5JQ8R1UaN3E/pgQdCkyawYgV8+CEcdpjv\n8Iwp0+7d0f2VX96Hf26u++s81r/oIz1eo0Zq/pUf+jD0HUbSKe26WPIQUS0q2venffRoePFFmDfP\n/UYZk2A//giPPeYaw2V9+BcWRv/XfHl/5Wdl+f6u/bLkEZklj1JEHPNQhd69oU4dePZZP4GZjJWb\nC126uOmZp55a9od/9eqp+Vd+MrLkEZklj1KUOmC+fTt07Ai33QbXX5/4wExGysuDHj3ckNuTT1pi\nSCRLHpFZ8ihFmbOtVq6EM86A6dPhxBMTG5jJOPn5cOmlrsE7frx1IyWaJY/IfCSPZNgYsXKOOgr+\n9je4/HLXCW1MQAoL4be/dbdfeskSh8lsqd/yKHbrrTB/PvzqV+63vKwvVRg6FE46KTHfgEl5qtCv\nH6xeDVOm2BwNX1Kh5VFQUEDVRM5fxloelTNyJPTt61oi7dtDhw7QqRN07gznngsXXACXXeYG2Y88\n0iUPY6KgCnfeCUuWwFtvWeIw+2vdujUjR46kXbt21KlTh8mTJ3PMMcfQsGFDOnfuzKpVq/Y5dsSI\nERxzzDE0atSIPn36sGvXrj3Pv/POOxx//PE0bNiQTp06sWxZqVW8/VLVpP9yYcbRzz+rHnyw6uef\nx/e8Ji398Y+qxx6runmz70hM3D8L4uSQQw7RE044QdevX69LlizR2rVr68yZM7WgoEBHjhypbdq0\n0fz8/D3HHnfccbp+/Xr98ccftVOnTjps2DBVVV20aJE2adJEFyxYoEVFRfriiy9q69atddeuXWW+\nf2nXJfR4IJ/L6dPyiEX16nDjjW6tiDFlGDPGLSd67z1o1Mh3NKY8IvH5iv19hYEDB9K8eXPeeust\nzj//fM4++2yysrK4/fbbycvLY/78+XuOHTBgAM2bN6dhw4YMHTqUV155BYCnn36afv360aFDB0SE\n3/72t1SvXp2PPvoonpcpLjIzeQD07w+vvQb/+5/vSEySeuEFeOQRmDEDmjb1HY2Jhmp8viqiZUtX\n127Tpk20atVqz+MiQsuWLdmwYcN+xwK0atWKjRs3AvDNN9/w6KOP0rBhwz1f69evZ9OmTRULKkCZ\nmzyaNIFLLoGnn/YdiUlC//oXDBniWhytW/uOxqSC4n23mjVrxjfffLPncVVl3bp1NG/efM9j3377\n7T63i59r1aoVQ4cOZcuWLXu+tm/fTs+ePRP0XUQvc5MHwODBbprv7t2+IzFJZPp016s5daqbW2FM\nLK688kqmTJnCrFmzyM/P59FHH6VGjRr8+te/BlwyeeKJJ9iwYQM//vgjDz744J7k0LdvX5588kkW\nLFiAqrJjxw6mTJnC9u3bfX5LEWV28mjXzn06TJzoOxKTJD74AH7zG5g0CU44wXc0JhUdccQRjB8/\nnltuuYXGjRszZcoU3n777T3Td0WEq666inPOOYfDDz+ctm3bMmzYMABOOukknnnmGQYMGECjRo1o\n27YtL730ks9vp1Tps86jot55B+6/HxYssH0mMtyiRdCtm1s5fs45vqMxkaTCOo/yHHrooTz33HOc\nddZZcTunrfPwoUcPV7dy3jzfkRiPVq6E885ze1VZ4jCmfJY8qlSBQYNg1CjfkRhP1q51CWPECLdv\nlTGmfNZtBW533tatYeFCm1qTYTZtgtNPd3MnBgzwHY0pTzp0WwXBuq18qVMHrrsOHn/cdyQmgTZv\nhq5d3X+9JQ5jYmMtj2Lffuum16xd66r5mLSWmwtnnw3Z2fCXv9hciVRhLY/IrOXhU6tW7tPk+ed9\nR2IClpfn9sk88URLHMZUlLU8ws2f7wo2fPGFFWtIU7t3u0Hx+vWtJkcqspZHZNby8O3UU+EXv3AF\nG0zaKS7mVKWK27fKEocxFWfJI5yIm3bz17/6jsTEmarbC/O//3X7YVar5jsik05at27N+++/X+5x\nkyZNomXLltStW5fPPvssAZEFx5JHSZdfDl9+CSn+H2v2UoU77oBly1wxpxo1fEdk0o2I7NkYsSy3\n3347TzzxBLm5uTRo0IAqVapQVFSUgAjjz5JHSdWquXmbtmgwbfzpT2533KlTbSKd8UdV+fbbbzn6\n6KP3ezwVWfKI5IYb3J+o333nOxJTSaNHu4FxK+ZkEkFVGTFiBG3atOHAAw+kZ8+ebNmyhV27dlG3\nbl0KCwtp3749bdq04cwzzwSgQYMG1K1bl48//thz9LGx5BFJo0bQq5fb6MikrOefh0cfhZkz4eCD\nfUdj0p2qMmbMGCZPnsycOXPYtGkTDRs25Oabb6Z69ep7tlVfunQpq1evZs6cOQD89NNP5ObmcvLJ\nJ/sMP2ZVfQeQtAYOhM6d4e67rZM8Bb3+OtxzD+TkwCGH+I7GJIrcH59ZqXpfxbqSnnrqKcaOHUuz\nZs0AuO+++zjkkEMYP348Vars+7d6qnZXFbPkUZqjjnIrzl95xe1fYVLGtGlw002uq+qXv/QdjUmk\nin7ox8s333zDJZdcsk+iqFq1Kt9//z1N06yWsXVbleXWW9203RT/CyGTzJ0L11wDb74Jxx/vOxqT\naVq2bMm0adP2KSO7c+fOiIkjmtlZycySR1m6dnUry2bP9h2JicKiRXDZZfDyyxCq+GlMQvXv3597\n7rlnT43yH374gcmTJ0c8tnHjxlSpUoWvvvoqkSHGjSWPshQvGrRpu0mvuJjTU0+5nG9MookIgwYN\n4sILL+Scc86hXr16nHrqqSxYsGCfY4rVqlWLoUOH0qlTJxo2bLjPcakg0L2tRKQbMArIAp5V1b+U\neL4+MB5oiRt/eURVX4hwnsTsbRVJXp4bcZ03D9q29RODKdOaNXDGGfDgg277EZO+bG+ryNJqbysR\nyQLGAt2Ao4HeInJUicNuBpar6vFANvCoiCTXIH7NmtC3L4wZ4zsSE8HGjdCli5sUZ4nDmMQJstuq\nI7BaVdeqaj4wAbioxDFFQL3Q7XrAZlUtCDCmirn5ZvjnP12tc5M0Nm925WOvv979FxljEifI5NEc\nWBd2f33osXBjgaNFZCOwBBgUYDwV16wZ9OgBzz7rOxITkpsL3bu7cY4hQ3xHY0zmCbKLKJqOyW7A\nIlXtLCKHAzNEpL2q5pY8cPjw4XtuZ2dnk52dHa84o3Prra4QxODBUDW5etYyTXExp5NOghEjrJiT\nMcVycnLIyclJyHsFNmAuIqcAw1W1W+j+EKAofNBcRN4BHlLVeaH77wN3qerCEufyN2Ae7vTT3crz\nK67wHUnGCi/m9I9/uNocJnPYgHlkaTVgDiwE2opIaxE5AOgJlJzw/C3QBUBEDgJ+CXwdYEyVY9N2\nvSou5pSV5Yo5WeIwxp/Afv1CA98DgOnACuBVVV0pIv1EpF/osD8CvxaRpcBM4E5V/TGomCrt4ovd\n9J4Um4+dDoqLOf3wA7z6qhVzMsY3q2Eeq8ceg4UL3TJmkxDFxZw++MDtkFunju+IjC/WbRVZunVb\npafrr3c7761f7zuSjBFezMkSh0kFxx577J4t18sTbQnbZGPJI1b167ud9/72N9+RZITRo93AuBVz\nMqlk+fLlnHHGGVEdW1YJ25ycHFq2bBnP0OLGkkdFDBzo1nzs3Ok7krT2/POul3DGDCvmZFJHQUHy\nrXMOgiWPijj8cOjUydU3NYF4/XUYOtS1OKyYk0l2rVu3ZuTIkbRv3546derQsmXLPV1ReXl5XHvt\ntTRq1Iijjz6akSNH7teaWLx4Me3bt6dBgwb06tWLXbt2sWPHDrp3787GjRupW7cu9erV47skKo1t\nyaOiBg92fSpFRb4jSTvTprntRqZOtWJOJnVMmDCBqVOnsnXrVqpWrbqnK+r+++/n22+/Zc2aNcyY\nMYPx48fv002lqkycOJHp06ezZs0ali5dygsvvEDt2rWZNm0azZo1Izc3l23btnFwEjXBLXlU1Jln\nQvXq7k9jEzdz57q1HJMmWTEnUwEi8fmK+W2FgQMH0rx5c2qUKFs9ceJE7rnnHurXr0/z5s0ZNGjQ\nPjOjil978MEH07BhQy644AI+++wzILlL1VryqCiRvZUGTVxYMSdTaarx+aqA0ga2N27cuM9zLVq0\n2O+Y8BZFzZo12b59e4ViSCRLHpXRqxcsXQqff+47kpRXXMzp6afdFuvGpJrSZkw1bdqUdev27hEb\nfrui50wGljwqo3p1uPFGN/ZhKmzNGre1+siRbhG/Menkyiuv5KGHHmLr1q1s2LCBsWPHRp0UDjro\nIDZv3sy2bdsCjjJ2ljwqq39/mDgR/vc/35GkpI0bXdnYIUPc8hlj0s29995LixYtOPTQQznnnHO4\n4oorOOCAA0o9Pnzdx5FHHknv3r057LDDaNSoUVLNtrLtSeKhTx9o0wbuucd3JCll82Y37+Dqq60m\nh4lOOmxP8ve//53XXnuN2bNnx+2ctj1Jqho82K04373bdyQpY9s26NYNzj/fEodJb9999x3z5s2j\nqKiIL774gscee4xLLrnEd1iVZskjHtq1g6OPdqO9plzFxZw6dICHHvIdjTHB2r17N/3796devXqc\nffbZXHzxxdx0002+w6o067aKl5UrXbGoTz6BQw/1HU3S2r0bLrkEGjZ0C/StJoeJRTp0WwXBuq1S\n2VFHwZ13ul13bdV5RIWFblC8WjW3b5UlDmNSl/36xtMf/gA7dlj3VQTFxZw2b4YJE6yYkzGpzrqt\n4m3FCjjjDFcwqnVr39EkBVW4/XaYP9/tkGs1OUxFWbdVZD66rSx5BGHECFfybsaMCu2Tk24eeAD+\n9S/IyXFjHcZUVDKvuPbNkkcEKZc8Cgrg1FOhb1+44Qbf0Xg1erSbxTx3Lhx0kO9ojMksljxSLXkA\nLF8O2dnw6acZW5Bi3Di4/36XOFq18h2NMZnHkkcqJg+AP//Z9dVMn55x3VcTJ8KgQe7bP+II39EY\nk5lsqm6quuMON73oued8R5JQ06bBgAHw7ruWOIxJV9byCNrSpXDWWa5YRQb03cyd62pyvPWWG/Yx\nxvhjLY9U1q6d67+54YYKF5lJFZ9+ureYkyUOY9KbJY9EuPtu+P57t6w6Ta1Y4TY5fOYZK+ZkTCaw\nbqtEWbLEfaouXgwRylCmsjVr3LrIhx6C3/zGdzTGmGLWbZUO2rd3o8j9+qVV99XGjS4nDhliicOY\nTGLJI5GGDIENG9x2smngf/9zVQD79oU02GHaGBMD67ZKtM8+cwW7Fy+G5s19R1Nh27a5SWRdu1pN\nDmOSlS0STKfkAXDffW5q0ttvp+TiwZ07oXt3OPZYGDs2Jb8FYzKCjXmkm6FD4dtvYfx435HEbPdu\nuPxyt2Tl8cctcRiTqazl4cuiRa6I95Il0LSp72iiUlgIvXu7BPL661C1qu+IjDFlsZZHOjrxRDfz\nKkVmX6m6UH/80RVzssRhTGaz5OHTsGFukcTLL/uOpEyqcNttbiHgm29CjRq+IzLG+GbdVr4tXAjn\nnee6rw4+2Hc0Ed1/P0yaBLNnWzEnY1JJynZbiUg3EVklIl+KyF2lHJMtIotFZLmI5AQZT1L61a/g\n9793Bb6TMEGOGuUaRtOnW+IwxuwVWMtDRLKAL4AuwAbgE6C3qq4MO6YBMA84V1XXi8iBqvq/COdK\n35YHwK5dbgxk2DA3Ip0kxo1zJWTnzMmIDYGNSTup2vLoCKxW1bWqmg9MAC4qccxVwL9UdT1ApMSR\nEapXhxdegMGD3QaKSWDiRJfL3nvPEocxZn/lJg8RuVBEKpJkmgPrwu6vDz0Wri3QSERmi8hCEbmm\nAu+THjp0gD594MYbvXdfvfuu24Zr2jQr5mSMiSyapNATWC0iI0XkyBjOHc0nYDXgRKAHcC7wfyLS\nNob3SC/33QerVsFrr3kLYc4cuPZaV8ypXTtvYRhjkly5s/VV9WoRqQ/0Bl4QEQWeB15R1dwyXroB\naBl2vyWu9RFuHfA/Vc0D8kRkDtAe+LLkyYYPH77ndnZ2NtnZ2eWFnnpq1HA1Py66CDp3hiZNEvr2\nCxe61eOvvAKnnJLQtzbGxEFOTg45OTkJea+oB8xF5EDgGmAwsALX5TRGVceUcnxV3ID52cBGYAH7\nD5gfCYzFtTqqAx8DPVV1RYlzpfeAeUl33QVff+0GHhJkxQq30eFTT7ncZYxJfV4HzEXkIhGZBOTg\nupk6qGp3oB3wh9Jep6oFwABgOi7ZvKqqK0Wkn4j0Cx2zCpgGLMUljmdKJo6MdP/9sHx5wpLH11/D\nuefCo49a4jDGRKfcloeIvAg8p6pzIjzXRVVnBhVc2PtkVssD4KOP4OKLYdkyaNw4sLfZsMFVAbzj\nDrfUxBiTPnxP1f2+ZOIQkb8AJCJxZKxTToFrrnHTngJSXMzphhsscRhjYhNN8uga4bEe8Q7ERPDA\nA6541Ouvx/3UP/3kNvW9+GI3xGKMMbEotdtKRG4EbgIOB74Ke6ouME9Vrw4+vD2xZF63VbH58+Gy\ny1z31YEHxuWUO3e6xNGundXkMCadeakkGJqe2xAYAdwFFAeQq6qbgwimNBmdPMBtabtxo5tDW0m7\nd7tB8caN3aL2KravsjFpy1fyqKeq20TkF0RY8KeqPwYRUCmxZHby2LkTjj8eRoyASy+t8GlU4eqr\nIS/PTeSymhzGpLcgk0dZHx+vAOcBnxJ5tfihQQRkIqhVy+1SeOWVcOaZ8ItfVOg0L7/sZgAvWGCJ\nwxhTOVbPI5Xceiv897/wz3/G/NLvvoP27WHqVDjppABiM8YkHV/dVieW9UJVXRREQKXEYskDXPdV\n+/bw8MNumlSUVF1v19FHw4MPBhifMSap+EoeOZSxuaGqdg4ioFJiseRRbO5c6NXLzb5q1Ciql0yY\nAH/8Iyxa5HZ/N8ZkBi/JI5lY8ihh0CDYsgVeeqncQ//7Xzcl9+233a7vxpjM4avlcZaqzhKRy4g8\n2+qNIAIqJRZLHuF27HAZYdQouOCCMg+94go4/HA3UcsYk1l8zbY6E5gFXEDk7quEJQ9TQu3abvbV\n1VfDaaeVWlx84kQ3u+of/0hwfMaYtGfdVqnsllsgN9et9ivhhx9c42TSJKvNYUym8jrmEarjcR9w\nGq4FMhd4IJGrzC15lGL79r17jJx33j5P9e4NzZvDI494is0Y453vXXUnAP8FLgUuB34AXg0iGBOj\nOnXguefclrhbt+55eNIkN7Pqj3/0GJsxJq1F0/JYrqrHlnhsmaoeF2hk+76ftTzKcvPNbs+RcePY\nvBmOO86Nd3Tq5DswY4xPvlse74lIbxGpEvrqCbwXRDCmgv7yF5g9G959l0GDoGdPSxzGmGCVNVV3\nO3tnWdUGikK3qwA7VLVu8OHticVaHuWZNYudPX/Hr+ssY/7n9alVy3dAxhjfbJGgJY9y/fgjvNPy\nRrp2zqfpO8/6DscYkwS8Jw8RaQi0BWoUPxappnlQLHmU79proUnNXB6edhw89RSce67vkIwxnvla\nJFj85n2BgUBLYDFwCvAhcFYQAZnYTZnitrxatqwuXP4s9Onj9r6qX993aMaYNBXNgPkgoCOwNrQZ\n4gnAT4FGZaK2daubqfvcc27hOV26QPfucMcdvkMzxqSxaJLHz6qaByAiNVR1FfDLYMMy0brtNre9\nVefwPY4ffhimT4f3bFKcMSYY0dSTWxca83gTmCEiW4C1gUZlojJtGsyaBUuXlniiXj145hno29d1\nX9Wr5yU+Y0z6imm2lYhkA/WAaaq6O6igIryvDZiX8NNPbjHguHGupyqivn0hKwuefDKhsRljkkMy\nzLY6ib17W32QyCqCofe35FHCDTeAiJtYVaqoMowxJl15XWEuIvcCLwCNgAOB50Xk/4IIxkRn5kw3\npPHww+UcWL++yy6//73bfdcYY+Ikmr2t/gO0U9WfQ/drAktU9YgExFccg7U8QnJzXWMipqUcffpA\njRrwxBOBxmaMSS6+97baANQMu18DWB9EMKZ8d90FZ58d4xrAxx5zdWhnzQosLmNMZil1tpWIPB66\n+RPwuYgUz/vsCiwIOjCzv1mzXA5YtizGFzZo4AbNf/97NzWrTp1A4jPGZI6yNkb8HXs3RpSSt1X1\nxcCj2xtLxndbFdd9GjsWevSo4El+9zuXOMaOjWdoxpgklQyzraoDxWMcq1Q1P4hgynj/jE8eZVSc\njd6WLS4DPfKI27fdGJPWfO9tlQ28CHwTeqiViFyrqv8OIiCzv3//G954A5Yvr+SJGjZ0G2F17Qp1\n61aiCWOMyXTRzLZaBPRW1S9C948AJqjqiQmIrziGjG157NgB7dvDX//qtiGJi48+cid7/XU488w4\nndQYk2x8z7aqWpw4AFT1P0S3rYmJg6FD4ZRT4pg4wJ1wwgS44gpYuDCOJzbGZIpoWh7PA4XAeNxg\n+dVAFVXtE3x4e2LIyJbHBx/AlVe62VW/+EUAb/DWW9CvH7z/PhxzTABvYIzxyXfLoz+wElfT4xbg\nc+DGaE4uIt1EZJWIfCkid5VxXAcRKRCRS6M5bybYudOt7fvb3wJKHAAXXQSPPuoWjXz9dUBvYoxJ\nR2W2PESkKrBcVY+M+cQiWcAXQBfcQsNPcGMnKyMcNwPYCTyvqv+KcK6Ma3ncfjts2ACvvJKAN3vy\nSRg50lWUat48AW9ojEkEb7OtVLVARL4QkUNU9Zuyjo2gI7BaVdcCiMgE4CJcKybcLcDrQIcYz5+2\nPvwQ/vnPCiwGrKj+/d0mil27uqldjRsn6I2NMakqmoHvRrgV5guAHaHHVFUvLOd1zYF1YffXAyeH\nHyAizXEJ5Sxc8sis5kUEeXlw3XXw+ONw4IEJfOO77nIJpFs3t5TdStgaY8oQTfIYFvo3vOkTzYd8\nNMeMAu5WVRURKfEeGWn4cLfx4eWXe3jzBx+Ebdvg/PPdtr21ankIwhiTCsra26ombrC8DbAUGBfj\nyvINQMuw+y3Zf0PFk4AJLm9wINBdRPJVdXLJkw0fPnzP7ezsbLKzs2MIJTUsWAAvvhihMmCiiMCY\nMW4bk8suc7OxDjjAUzDGmFjl5OSQk5OTkPcqa2+r14DdwFygB7BWVQdFfWI32P4FcDawEbeZ4n4D\n5mHHPw+8rapvRHgu7QfMf/4ZTjoJ7r03CXYOKShwa0CqVnUj9lVtWY8xqcjXVN2jVPU3qvoUcBlw\nRiwnVtUCYAAwHVgBvKqqK0Wkn4j0q3DEaeqBB+CXv3TrOryrWtUtIty61ZUsLCryHZExJsmU1fJY\nrKonlHY/kdK95bFwIZx3HixZAgcf7DuaMDt2uBlYHTu6/VEk44ekjEkpXnbVFZFC3NqLYjWBvNBt\nVdV6QQRUSixpmzx27YJf/Qruvhuuvtp3NBFs3QrZ2W5B4f33+47GGBMDL+s8VDUriDc0+3rwQTjs\nMLjqKt+RlKJBA3jvPTj9dDd99w9/8B2RMSYJ2EioR4sXu8XdS5YkeY9QkyYwc6ZLIPXquYqExpiM\nZsnDk9273WLARx6Bpk19RxOFli1hxgzXhVW3bhJMCTPG+GTJw5OHHnLbSF1zje9IYtC2Lbz7rhtE\nr1PHjfIbYzJSVGVofUu3AfMlS9zn7+LFKboPoRWTMiYl+N6S3cRRfr7rrhoxIkUTB7hiUq++6hYS\nfvKJ72iMMR5Y8kiwkSPd+PN11/mOpJLOOguefda1QCpdXN0Yk2pszCOBli+HUaNg0aIkn10VrQsv\nhO3b3U68//43HH6474iMMQliySNBCgpca+PPf3YTl9LGVVdBbi506eKKSbVo4TsiY0wCWPJIkEce\ncevt0nKJRL9+e4tJzZljxaSMyQA22yoBVqxwk5I++QRat/YdTYCGDnVTeWfPtmJSxiQBL3tbJZNU\nTh4FBdCpkyuRceONvqMJmCoMHAiffWbFpIxJAjZVN4X99a9Qu7br2Ul7IjB6tNus69JL3a6Pxpi0\nZC2PAK1aBaed5ioEHnaY72gSqKDAFSapUsXVBbFiUsZ4YS2PFFRYCH36uJrkGZU4YG8Fwm3boG9f\nKyZlTBqy5BGQMWOgWjW46SbfkXhSvTpMmgT/+Q/ceqsbDzHGpA3rtgrAl1/Cqae6LaDatPEdjWdb\nt0Lnzm4l+gMP+I7GmIzipRiUqZiiItddNWyYJQ7ALW6ZPh3OOMNN373tNt8RGWPiwJJHnI0d63po\nbrnFdyRsTTd0AAARbElEQVRJpEkTVwukuJhU376+IzLGVJIljzj66ivXMzN/PmRZEd99lSwm1auX\n74iMMZVgySNOiorg+uthyBA44gjf0SSptm1h2jS3D1bdulZMypgUZrOt4uTJJ92auMGDfUeS5I47\nDiZPdkvuc3J8R2OMqSCbbRUHu3a5PaumT4d27XxHkyJmz3Z10N95Bzp29B2NMWnJFgkmuVdfhWOP\ntcQRk86d4bnnXE0QKyZlTMqx5FFJqm47J+uuqoALLnCbf517Lqxe7TsaY0wMbMC8kubNc7WQunf3\nHUmK6t3bXcCuXa2YlDEpxJJHJY0a5XYhr2JtuIq74QYrJmVMirEB80r45hs48URYu9bNPDWVNGwY\nTJ0Ks2a5lenGmEqxYlBJmjzuuMPtnvvYY74jSROqMGgQLFrkpq7Vru07ImNSmiWPJEweO3bAIYe4\n0rKHHuo7mjRSvDnYpk1uPUj16r4jMiZl2VTdJPTSS26rJksccValCjz7LNSpA1dd5QpLGWOSjrU8\nKqCoCI4+Gp56Cs4803c0aWrXLrcGpGlTGDfOZiQYUwHW8kgy770HNWq4XcZNQKpXhzfecOs/Bg+2\nYlLGJBlLHhUwapT7PJNA8rnZo3Ztt33J3Llw772+ozHGhLF1HjFauRI++wzefNN3JBmiZDGp22/3\nHZExhgS0PESkm4isEpEvReSuCM9fLSJLRGSpiMwTkaTeIWrMGLemrUYN35FkkCZNYOZM+Nvf4Omn\nfUdjjCHgloeIZAFjgS7ABuATEZmsqivDDvsaOENVfxKRbsDTwClBxlVRW7bAhAmwYoXvSDJQixau\nmNSZZ7oVmb17+47ImIwWdLdVR2C1qq4FEJEJwEXAnuShqh+GHf8xkLSbGz37LJx/vpsAZDxo02bf\nYlLnn+87ImMyVtDdVs2BdWH314ceK831wNRAI6qgggJXn9x2z/XsuOPg7bfdQsLZs31HY0zGCrrl\nEfX8ShHpDPQBOkV6fvjw4XtuZ2dnk52dXcnQYvPmm64M90knJfRtTSQdO8Jrr8GVV7pEcvLJviMy\nJink5OSQk6AKnYEuEhSRU4DhqtotdH8IUKSqfylxXDvgDaCbqu5X2CEZFgmedpprdVx+udcwTLh3\n3nGF42fOdC0SY8w+UnmR4EKgrYi0FpEDgJ7A5PADRKQVLnH8JlLiSAYLF8K6dXDxxb4jMfs4/3xX\niatbNysmZUyCBdptpaoFIjIAmA5kAc+p6koR6Rd6/ingXqAh8Hdxq+7yVTWpilqPHg033wxVbVVM\n8unVC7Zt21sLpGVL3xEZkxFsb6tybNrk9rH66ito1MhLCCYajzzipsPNmePWhRhjAu22sr+ly/Hk\nk+6PW0scSe7222HrVlcPffZsKyZlTMCs5VGGn392NTtycuCooxL+9iZWxcWkPv3U7V5pxaRMhkvl\nAfOUNmECnHCCJY6UIeJ2rWzbFi65xG3rbowJhLU8SqHqEseIEW4yj0khBQXQs6f7T3ztNZvpYDKW\ntTw8mDPHdVudc47vSEzMqlaFl192tYKvv95V7zLGxJUlj1KMGgUDB1oBu5RVXEzqq6/cOEgKtLCN\nSSXWbRXBmjXQoQOsXetKaZsUtnUrdO4M550Hf/qT72iMSSibqptgY8fCdddZ4kgLJYtJ3XGH74iM\nSQuWPErIzYUXXoBFi3xHYuKmuJjU6adDvXrQr5/viIxJeZY8SnjxRdfLccghviMxcRVeTKpePSsm\nZUwlWfIIU1TkysyOG+c7EhOI8GJSderABRf4jsiYlGVzicK8+64rUNcpYkURkxbCi0nNmuU7GmNS\nliWPMKNHu1mdEsjcBJM0OnaEiRPdQsKPP/YdjTEpyabqhnz+uevNWLvWLREwGaC4mNSMGdCune9o\njIk7W2GeAGPGQP/+ljgySnExqe7d4csvfUdjTEqxAXNg82a3BdKqVb4jMQkXXkxq7lwrJmVMlCx5\nAM88AxddBAcd5DsS48UNN7gE0qWLSyBWTMqYcmX8mEd+Phx2GEye7HbRNRns//7PjYNYMSmTJmzM\nI0BvvOGShyUOwwMPuFXo553nduQ1xpQq45NH8fRcY/YUkzriCCsmZUw5Mjp5LFgAmza58Q5jALcH\n/zPP7N3CpKDAd0TGJKWMTh6jR8OAAZCV5TsSk1SqVoV//tN1XfXpY8WkjIkgYwfMN2xwO1V8/bWN\njZpS7NgB554Lxx8Pjz9uWw+YlGMD5gH4+9/hqqsscZgy1K4NU6bA/PkwbJjvaIxJKhnZ8sjLc1uu\nf/CBGxs1pkw//OCKSV13Hdx5p+9ojImaVRKMwu7d8PzzLjFkZblxz6ysyLcXLXJlZi1xmKg0buz2\nvyouJtW/v++IjPEubZLHbbfBp5+6pFBY6MY4Cwsj3y4qggcf9B2xSSkli0lddZXviIzxKi2Sx/jx\nrsbPJ5/YGIYJUJs2rh762We7YlIXXug7ImO8SfkxjyVL3JZEs2a52VPGBO6TT6BHD5gwwSUSY5KU\nzbYqxZYtcNllbjt1SxwmYTp0cMWkevVyGynm5/uOyJiES9mWR1GRK0Hdtq3bUcKYhJsyxc3A2rzZ\nFYKpV8991a2793asj9WqZetJTNwE2fJImeSxZYsi4n6/ROD++2HmTNddVa2a7whNRlOFnTshN9dt\n7R7+Femx0h7PzXX7adWpU/HkE/6Y/WJkPEseIlq7tkseBQVu4svOnbBwITRt6js6Y+KooGDfxBJt\nQor0WLVq8WkN1a5traEUZckjrNtqxw5Yv97NqrLiTcaUQtUteqpM8il+LC8vfq2hAw7wfWUyiiWP\nAItBGWPKUVAA27dXviW0bZtbqVtakom1NVQlpef7JETKJg8R6QaMArKAZ1X1LxGOGQN0B3YCv1PV\nxRGOseRhTKpTdWM6lW0Jbdvm+q1r1y4/yUSTkKpX931lApOSyUNEsoAvgC7ABuAToLeqrgw7pgcw\nQFV7iMjJwGhVPSXCuSx5hOTk5JCdne07jKRg12KvjLsWhYWuNRQhyeR88gnZzZpF3xoqnokTbaun\ntMfr1Em61lCq7m3VEVitqmsBRGQCcBGwMuyYC4EXAVT1YxFpICIHqer3AcaV0jLuQ6IMdi32yrhr\nkZUF9eu7rxJyli8nO5byoCVbQ6W1er7+uuzjduxwU60r0g0XqTWU5JMUgkwezYF1YffXAydHcUwL\nYL/ksez7ZVTLqkZBUQG7C3eTX5hPQVHBPl+FWrjPa4R9L76E/WdkSRbVsqpRrUq1Pf8ekHXAfo+V\n/DerilWOMiatVK/uNr9s3Lhy5ykqKrU1tM9jmzfDmjVlH1dUVPlZcvXqxef6lCLI5BFtP1PJ9Brx\ndb3+1Yv8wnyqZVWjelZ1qmVVo2qVqvt8VZEqexKGljhNeLeXohQWFZJflE9+Yf5+/+4u3F3qc0CZ\niUUQRCSwf9ctXsf7z79f6fOkgy+WfcGnr3zqO4ykYNdir6S8FtWBxqGv/dQLfe1VNb+QWnmF1MzL\np+bPhdTKy6dW3g/UyttErbwCam4qoNbXBdTMK6BWXgG18vKpmVd83N7HgxTkmMcpwHBV7Ra6PwQo\nCh80F5EngRxVnRC6vwo4s2S3lYjYgIcxxlRAKo55LATaikhrYCPQE+hd4pjJwABgQijZbI003hHU\nN2+MMaZiAkseqlogIgOA6bipus+p6koR6Rd6/ilVnSoiPURkNbADuC6oeIwxxsRPSiwSNMYYk1wS\nMilZRLqJyCoR+VJE7irlmDGh55eIyAnlvVZEGonIDBH5j4i8JyINwp4bEjp+lYicE+x3F5tEXgsR\n6SoiC0VkaejfzsF/h9FL9M9F6PlWIrJdRG4L7juLnYffkXYi8qGILA/9fCTNSrkE/45UE5EXQ9dg\nhYjcHfx3GL2ArsUVIvK5iBSKyIklzhX9Z6eqBvqF67JaDbQGqgGfAUeVOKYHMDV0+2Tgo/JeC4wE\n7gzdvgsYEbp9dOi4aqHXrQaqBP19Jum1OB44OHT7GGC972vg61qEnfN14FXgNt/XwOPPRVVgCXBc\n6H7DDP4duQp4JXS7JrAGaOX7OgR8LY4EjgBmAyeGnSumz85EtDz2LBZU1XygeLFguH0WCwINROTg\ncl675zWhfy8O3b4I98OQr26B4urQeZJBQq+Fqn6mqt+FHl8B1BSRZNmnO9E/F4jIxcDXuGuRTBJ9\nLc4BlqrqstD5tqhqUTDfWswSfS2KgNridsSoDewGtgXyncUukGuhqqtU9T8R3i+mz85EJI9ICwGb\nR3lMszJeG74S/XugeI/dZqHjyno/XxJ9LcJdBnwa+kFKBgm9FiJSB7gTGB6H2OMt0T8XRwAqItNE\n5FMRuaPy30LcJPpavI7bV28TsBZ4WFW3Vu5biJugrkVpYvrsDHKqbrGKLhYs7Zj9zqeqKmWvBUmW\nWQFeroWIHAOMALpG+f6JkOhrMRz4q6ruFEm6VZKJvhZVgdOAXwF5wPsi8qmqzooyjiAl+lqcDBQA\nTYFGwFwReV9V10QZR5DieS3iHkMikscGoGXY/Zbsm90iHdMidEy1CI9vCN3+XkQOVtXvRKQp8N8y\nzrWB5JDoa4GItADeAK5Jkl+IYom+Fh2By0RkJNAAKBKRPFV9Ii7fTeUk+lqsA+ao6o8AIjIVOBFI\nhuSR6GtxFTBNVQuBH0RkHi6pJsPvSjyvRaTXlvd+ZX92JmDQpyrwFW4A5gDKH/Q5hb2DPqW+FjcA\ndlfo9t3sP2B+AHBo6PUS9PeZpNeiAW5g9GLf37vva1HivPcBf/B9DTz+XDQEPsUNEFcFZgDdfV8H\nT9fiTmBc6HZt4HPgWN/XIchrEfba2cBJYfdj+uxM1EXojtuefTUwJPRYP6Bf2DFjQ88vYd8ZAPu9\nNvR4I2Am8B/gPaBB2HP3hI5fBZzr+4fA17UAhgHbgcVhXwf6vga+fi7Cjkmq5OHjWgBXA8uBZURI\nsJlyLXAJ47XQtficJJqFF+C1uATX+swDvgPeDXsu6s9OWyRojDEmZslVucQYY0xKsORhjDEmZpY8\njDHGxMyShzHGmJhZ8jDGGBMzSx7GGGNiZsnDpKXQdtOLRWSZiEwWkfqe45lXyuMviMhloduDRaRm\n2HPbExWfMbGy5GHS1U5VPUFVjwN+BG72GYyqdirtKfbuHzQIqFXiOWOSkiUPkwk+JLQ7qIgcLyIf\nhQrnvBFWFChHREaFtVY6hB6vLSLjRORjEVkkIheWPLmIjBWRC0K3J4nIc6HbfUTkT6Hb20P/Suj4\nVSIyA2gSevgW3K6ms0Xk/bBz/0lEPhNXuKlJgNfImJhY8jBpLVSn4WzgrdBDLwF3qGp73NYc94Ue\nV6Cmqp4A3ASMCz0+FHhfVU8GzgIeFpHw1gHAXOD00O3mwFGh26cD/w47P7itIY4IHfNb4Ne4jV4f\nBzYC2ap6dujY2sCHqno8MAfoW6GLYEwALHmYdFVTRBbj6jQcBMwIjXvUV9W5oWNeBM4Ie80rAKHn\n64WOPwe4O3Su2UB19t15FELJQ0SOwu2P9H2oIM8pwPwSx54BvKzOJsreyXa3qk4J3f4Ut8mdMUkh\nEVuyG+NDnqqeEBqAng4MYG8luWLl1UEobi1cqqpflnqQ6sZQ91c3XAuhEdAT2K6qOyKcM9r6C+GF\nu4qw31eTRKzlYdKaquYBA4HbgB3AFhE5LfT0NUBO6LbgPvAJPb9VVbfhEs/A4vOJyAmlvNVHwGBc\nN9Vc4HZcIilpDtBTRKqE6kp0DnsuF6gX47dojBf2l4xJV3tmKqnqZyKyFOgFXAs8GRq3+Aq4Luz4\nn0VkEe73ok/o8T8Co0Kvr4Krgb7foDkuYXRV1a9FZB2uZsbcsOc1FMskETkLV0f9W/bt1noamCYi\nG0LjHlri9Tb7yiQN25LdGEBEZuNqOSzyHYsxqcC6rYwxxsTMWh7GGGNiZi0PY4wxMbPkYYwxJmaW\nPIwxxsTMkocxxpiYWfIwxhgTM0sexhhjYvb/LxRwyXeo88gAAAAASUVORK5CYII=\n",
251 |       "text/plain": [
252 |        "<matplotlib.figure.Figure at 0x7fb81a594198>"
253 |       ]
254 |      },
255 |      "metadata": {},
256 |      "output_type": "display_data"
257 |     }
258 |    ],
259 |    "source": [
260 |     "%matplotlib inline\n",
261 |     "import matplotlib.pyplot as plt\n",
262 |     "left=np.zeros((10,1))\n",
263 |     "within=np.zeros((10,1))\n",
264 |     "right=np.zeros((10,1))\n",
265 |     "for i in range(9,-1,-1):\n",
266 |     "    left[i], within[i], right[i] = bt.signtest(scores, rope=0.001/2**i,names=names)\n",
267 |     "plt.plot(0.001/(2**np.arange(0,10,1)),within)\n",
268 |     "plt.plot(0.001/(2**np.arange(0,10,1)),left)\n",
269 |     "plt.plot(0.001/(2**np.arange(0,10,1)),right)\n",
270 |     "plt.legend(('rope','left','right'))\n",
271 |     "plt.xlabel('Rope width')\n",
272 |     "plt.ylabel('Probability')"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## Using Signrank test\n",
280 |     "We can also use the signrank that is more sensitivie to differences."
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 13,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [
290 |     {
291 |      "name": "stdout",
292 |      "output_type": "stream",
293 |      "text": [
294 |       "P(J48 > J48gr) = 0.0, P(rope) = 0.82288, P(J48gr > J48) = 0.17712\n"
295 |      ]
296 |     }
297 |    ],
298 |    "source": [
299 |     "left, within, right = bt.signrank(scores, rope=0.001,verbose=True,names=names)"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 14,
305 |    "metadata": {
306 |     "collapsed": false
307 |    },
308 |    "outputs": [
309 |     {
310 |      "name": "stdout",
311 |      "output_type": "stream",
312 |      "text": [
313 |       "P(J48 > J48gr) = 6e-05, P(rope) = 0.0007, P(J48gr > J48) = 0.99924\n"
314 |      ]
315 |     }
316 |    ],
317 |    "source": [
318 |     "left, within, right = bt.signrank(scores, rope=0.0001,verbose=True,names=names)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "However, the conclusion is very similar. The difference is very small (1/10000), but in favour of J48gr."
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {
332 |     "collapsed": true
333 |    },
334 |    "outputs": [],
335 |    "source": []
336 |   }
337 |  ],
338 |  "metadata": {
339 |   "kernelspec": {
340 |    "display_name": "Python 3",
341 |    "language": "python",
342 |    "name": "python3"
343 |   },
344 |   "language_info": {
345 |    "codemirror_mode": {
346 |     "name": "ipython",
347 |     "version": 3
348 |    },
349 |    "file_extension": ".py",
350 |    "mimetype": "text/x-python",
351 |    "name": "python",
352 |    "nbconvert_exporter": "python",
353 |    "pygments_lexer": "ipython3",
354 |    "version": "3.5.2"
355 |   }
356 |  },
357 |  "nbformat": 4,
358 |  "nbformat_minor": 0
359 | }
360 | 


--------------------------------------------------------------------------------
/Python/bayesiantests.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.matlib
  3 | 
  4 | LEFT, ROPE, RIGHT = range(3)
  5 | 
  6 | def correlated_ttest_MC(x, rope, runs=1,  nsamples=50000):
  7 |     """
  8 |     See correlated_ttest module for explanations
  9 |     """
 10 |     if x.ndim == 2:
 11 |         x = x[:, 1] - x[:, 0]
 12 |     diff=x
 13 |     n = len(diff)
 14 |     nfolds = n / runs
 15 |     x = np.mean(diff)
 16 |     # Nadeau's and Bengio's corrected variance
 17 |     var = np.var(diff, ddof=1) * (1 / n + 1 / (nfolds - 1))
 18 |     if var == 0:
 19 |         return int(x < rope), int(-rope <= x <= rope), int(rope < x)
 20 |     
 21 |     return x+np.sqrt(var)*np.random.standard_t( n - 1, nsamples)
 22 |                                   
 23 |     
 24 | 
 25 | ## Correlated t-test
 26 | def correlated_ttest(x, rope, runs=1, verbose=False, names=('C1', 'C2')):
 27 |     import scipy.stats as stats
 28 |     """
 29 |     Compute correlated t-test
 30 |  
 31 |     The function uses the Bayesian interpretation of the p-value and returns
 32 |     the probabilities the difference are below `-rope`, within `[-rope, rope]`
 33 |     and above the `rope`. For details, see `A Bayesian approach for comparing
 34 |     cross-validated algorithms on multiple data sets
 35 |     <http://link.springer.com/article/10.1007%2Fs10994-015-5486-z>`_,
 36 |     G. Corani and A. Benavoli, Mach Learning 2015.
 37 |  
 38 |     |
 39 |     The test assumes that the classifiers were evaluated using cross
 40 |     validation. The number of folds is determined from the length of the vector
 41 |     of differences, as `len(diff) / runs`. The variance includes a correction
 42 |     for underestimation of variance due to overlapping training sets, as
 43 |     described in `Inference for the Generalization Error
 44 |     <http://link.springer.com/article/10.1023%2FA%3A1024068626366>`_,
 45 |     C. Nadeau and Y. Bengio, Mach Learning 2003.)
 46 |  
 47 |     |
 48 |     Args:
 49 |     x (array): a vector of differences or a 2d array with pairs of scores.
 50 |     rope (float): the width of the rope  
 51 |     runs (int): number of repetitions of cross validation (default: 1)
 52 |     return: probablities (tuple) that differences are below -rope, within rope or
 53 |         above rope
 54 |     """
 55 |     if x.ndim == 2:
 56 |         x = x[:, 1] - x[:, 0]
 57 |     diff=x
 58 |     n = len(diff)
 59 |     nfolds = n / runs
 60 |     x = np.mean(diff)
 61 |     # Nadeau's and Bengio's corrected variance
 62 |     var = np.var(diff, ddof=1) * (1 / n + 1 / (nfolds - 1))
 63 |     if var == 0:
 64 |         return int(x < rope), int(-rope <= x <= rope), int(rope < x)
 65 |     pr = 1-stats.t.cdf(rope, n - 1, x, np.sqrt(var))
 66 |     pl = stats.t.cdf(-rope, n - 1, x, np.sqrt(var))
 67 |     pe=1-pl-pr
 68 |     if verbose:
 69 |         print('P({c1} > {c2}) = {pl}, P(rope) = {pe}, P({c2} > {c1}) = {pr}'.
 70 |               format(c1=names[0], c2=names[1], pl=pl, pe=pe, pr=pr))
 71 |     return pl, pe, pr
 72 |     
 73 | ## SIGN TEST
 74 | def signtest_MC(x, rope, prior_strength=1, prior_place=ROPE, nsamples=50000):
 75 |     """
 76 |     Args:
 77 |         x (array): a vector of differences or a 2d array with pairs of scores.
 78 |         rope (float): the width of the rope  
 79 |         prior_strength (float): prior strength (default: 1)
 80 |         prior_place (LEFT, ROPE or RIGHT): the region to which the prior is
 81 |             assigned (default: ROPE)
 82 |         nsamples (int): the number of Monte Carlo samples
 83 |     
 84 |     Returns:
 85 |         2-d array with rows corresponding to samples and columns to
 86 |         probabilities `[p_left, p_rope, p_right]`
 87 |     """
 88 |     if prior_strength < 0:
 89 |         raise ValueError('Prior strength must be nonegative')
 90 |     if nsamples < 0:
 91 |         raise ValueError('Number of samples must be a positive integer')
 92 |     if rope < 0:
 93 |         raise ValueError('Rope must be a positive number')
 94 |  
 95 |     if x.ndim == 2:
 96 |         x = x[:, 1] - x[:, 0]
 97 |     nleft = sum(x < -rope)
 98 |     nright = sum(x > rope)
 99 |     nrope = len(x) - nleft - nright
100 |     alpha = np.array([nleft, nrope, nright], dtype=float)
101 |     alpha += 0.0001  # for numerical stability
102 |     alpha[prior_place] += prior_strength
103 |     return np.random.dirichlet(alpha, nsamples)
104 | 
105 | def signtest(x, rope, prior_strength=1, prior_place=ROPE, nsamples=50000,
106 |              verbose=False, names=('C1', 'C2')):
107 |     """
108 |     Args:
109 |         x (array): a vector of differences or a 2d array with pairs of scores.
110 |         rope (float): the width of the rope  
111 |         prior_strength (float): prior strength (default: 1)
112 |         prior_place (LEFT, ROPE or RIGHT): the region to which the prior is
113 |             assigned (default: ROPE)
114 |         nsamples (int): the number of Monte Carlo samples
115 |         verbose (bool): report the computed probabilities
116 |         names (pair of str): the names of the two classifiers
117 | 
118 |     Returns:
119 |         p_left, p_rope, p_right 
120 |     """
121 |     samples = signtest_MC(x, rope, prior_strength, prior_place, nsamples)
122 |     
123 |     winners = np.argmax(samples, axis=1)
124 |     pl, pe, pr = np.bincount(winners, minlength=3) / len(winners)
125 |     if verbose:
126 |         print('P({c1} > {c2}) = {pl}, P(rope) = {pe}, P({c2} > {c1}) = {pr}'.
127 |               format(c1=names[0], c2=names[1], pl=pl, pe=pe, pr=pr))
128 |     return pl, pe, pr
129 | 
130 | ## SIGNEDRANK
131 | def heaviside(X):
132 |     Y = np.zeros(X.shape);
133 |     Y[np.where(X  > 0)] = 1;
134 |     Y[np.where(X == 0)] = 0.5;
135 |     return Y #1 * (x > 0)
136 | 
137 | def signrank_MC(x, rope, prior_strength=0.6, prior_place=ROPE, nsamples=50000):
138 |     """
139 |     Args:
140 |         x (array): a vector of differences or a 2d array with pairs of scores.
141 |         rope (float): the width of the rope  
142 |         prior_strength (float): prior strength (default: 0.6)
143 |         prior_place (LEFT, ROPE or RIGHT): the region to which the prior is
144 |             assigned (default: ROPE)
145 |         nsamples (int): the number of Monte Carlo samples
146 |     
147 |     Returns:
148 |         2-d array with rows corresponding to samples and columns to
149 |         probabilities `[p_left, p_rope, p_right]`
150 |     """
151 |     if x.ndim == 2:
152 |         zm = x[:, 1] - x[:, 0]
153 |     else:
154 |         zm = x
155 |     nm=len(zm)
156 |     if prior_place==ROPE:
157 |         z0=[0]
158 |     if prior_place==LEFT:
159 |         z0=[-float('inf')]
160 |     if prior_place==RIGHT:
161 |         z0=[float('inf')]
162 |     z=np.concatenate((zm,z0))
163 |     n=len(z)
164 |     z=np.transpose(np.asmatrix(z))
165 |     X=np.matlib.repmat(z,1,n)
166 |     Y=np.matlib.repmat(-np.transpose(z)+2*rope,n,1)
167 |     Aright = heaviside(X-Y)
168 |     X=np.matlib.repmat(-z,1,n)
169 |     Y=np.matlib.repmat(np.transpose(z)+2*rope,n,1)
170 |     Aleft = heaviside(X-Y)
171 |     alpha=np.concatenate((np.ones(nm),[prior_strength]),axis=0)
172 |     samples=np.zeros((nsamples,3), dtype=float)
173 |     for i in range(0,nsamples):
174 |         data = np.random.dirichlet(alpha, 1)
175 |         samples[i,2]=numpy.inner(np.dot(data,Aright),data)
176 |         samples[i,0]=numpy.inner(np.dot(data,Aleft),data)
177 |         samples[i,1]=1-samples[i,0]-samples[i,2]
178 |      
179 |     return samples
180 | 
181 | def signrank(x, rope, prior_strength=0.6, prior_place=ROPE, nsamples=50000,
182 |              verbose=False, names=('C1', 'C2')):
183 |     """
184 |     Args:
185 |         x (array): a vector of differences or a 2d array with pairs of scores.
186 |         rope (float): the width of the rope 
187 |         prior_strength (float): prior strength (default: 0.6)
188 |         prior_place (LEFT, ROPE or RIGHT): the region to which the prior is
189 |             assigned (default: ROPE)
190 |         nsamples (int): the number of Monte Carlo samples
191 |         verbose (bool): report the computed probabilities
192 |         names (pair of str): the names of the two classifiers
193 | 
194 |     Returns:
195 |         p_left, p_rope, p_right
196 |     """
197 |     samples = signrank_MC(x, rope, prior_strength, prior_place, nsamples)
198 |     
199 |     winners = np.argmax(samples, axis=1)
200 |     pl, pe, pr = np.bincount(winners, minlength=3) / len(winners)
201 |     if verbose:
202 |         print('P({c1} > {c2}) = {pl}, P(rope) = {pe}, P({c2} > {c1}) = {pr}'.
203 |               format(c1=names[0], c2=names[1], pl=pl, pe=pe, pr=pr))
204 |     return pl, pe, pr
205 | 
206 | 
207 | def hierarchical(diff, rope, rho,  upperAlpha=2, lowerAlpha =1, lowerBeta = 0.01, upperBeta = 0.1,std_upper_bound=1000, verbose=False, names=('C1', 'C2') ):
208 |      # upperAlpha, lowerAlpha, upperBeta, lowerBeta, are the upper and lower bound for alpha and beta, which are the parameters of 
209 |     #the  Gamma distribution used as a prior for the degress of freedom.
210 |     #std_upper_bound is a constant which multiplies the sample standard deviation, to set the upper limit of the prior on the
211 |     #standard deviation.  Posterior inferences are insensitive to this value as this is large enough, such as 100 or 1000.
212 |     
213 |     samples=hierarchical_MC(diff, rope, rho, upperAlpha, lowerAlpha, lowerBeta, upperBeta, std_upper_bound,names )
214 |     winners = np.argmax(samples, axis=1)
215 |     pl, pe, pr = np.bincount(winners, minlength=3) / len(winners)
216 |     if verbose:
217 |         print('P({c1} > {c2}) = {pl}, P(rope) = {pe}, P({c2} > {c1}) = {pr}'.
218 |               format(c1=names[0], c2=names[1], pl=pl, pe=pe, pr=pr))
219 |     return pl, pe, pr
220 | 
221 | def hierarchical_MC(diff, rope, rho,   upperAlpha=2, lowerAlpha =1, lowerBeta = 0.01, upperBeta = 0.1, std_upper_bound=1000, names=('C1', 'C2') ):
222 |     # upperAlpha, lowerAlpha, upperBeta, lowerBeta, are the upper and lower bound for alpha and beta, which are the parameters of 
223 |     #the  Gamma distribution used as a prior for the degress of freedom.
224 |     #std_upper_bound is a constant which multiplies the sample standard deviation, to set the upper limit of the prior on the
225 |     #standard deviation.  Posterior inferences are insensitive to this value as this is large enough, such as 100 or 1000.
226 | 
227 |     import scipy.stats as stats
228 |     import pystan
229 |     #data rescaling, to have homogenous scale among all dsets
230 |     stdX = np.mean(np.std(diff,1)) #we scale all the data by the mean of the standard deviation of data sets
231 |     x = diff/stdX
232 |     rope=rope/stdX
233 |     
234 |     #to avoid numerical problems with zero variance
235 |     for i in range(0,len(x)):
236 |         if np.std(x[i,:])==0:
237 |             x[i,:]=x[i,:]+np.random.normal(0,np.min(1/1000000000,np.abs(np.mean(x[i,:])/100000000)))
238 |   
239 |     
240 |     #This is the Hierarchical model written in Stan
241 |     hierarchical_code = """
242 |     /*Hierarchical Bayesian model for the analysis of competing cross-validated classifiers on multiple data sets.
243 |     */
244 | 
245 |       data {
246 | 
247 |         real deltaLow;
248 |         real deltaHi;
249 | 
250 |         //bounds of the sigma of the higher-level distribution
251 |         real std0Low; 
252 |         real std0Hi; 
253 | 
254 |         //bounds on the domain of the sigma of each data set
255 |         real stdLow; 
256 |         real stdHi; 
257 | 
258 | 
259 |         //number of results for each data set. Typically 100 (10 runs of 10-folds cv)
260 |         int<lower=2> Nsamples; 
261 | 
262 |         //number of data sets. 
263 |         int<lower=1> q; 
264 | 
265 |         //difference of accuracy between the two classifier, on each fold of each data set.
266 |         matrix[q,Nsamples] x;
267 | 
268 |         //correlation (1/(number of folds))
269 |         real rho; 
270 | 
271 |         real upperAlpha;
272 |         real lowerAlpha;
273 |         real upperBeta;
274 |         real lowerBeta;
275 | 
276 |          }
277 | 
278 | 
279 |       transformed data {
280 | 
281 |         //vector of 1s appearing in the likelihood 
282 |         vector[Nsamples] H;
283 | 
284 |         //vector of 0s: the mean of the mvn noise 
285 |         vector[Nsamples] zeroMeanVec;
286 | 
287 |         /* M is the correlation matrix of the mvn noise.
288 |         invM is its inverse, detM its determinant */
289 |         matrix[Nsamples,Nsamples] invM;
290 |         real detM;
291 | 
292 |         //The determinant of M is analytically known
293 |         detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);
294 | 
295 |         //build H and invM. They do not depend on the data.
296 |         for (j in 1:Nsamples){
297 |           zeroMeanVec[j]<-0;
298 |           H[j]<-1;
299 |           for (i in 1:Nsamples){
300 |             if (j==i)
301 |               invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
302 |             else
303 |               invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
304 |            }
305 |         }
306 |         /*at this point invM contains the adjugate of M.
307 |         we  divide it by det(M) to obtain the inverse of M.*/
308 |         invM <-invM/detM;
309 |       }
310 | 
311 |       parameters {
312 |         //mean of the  hyperprior from which we sample the delta_i
313 |         real<lower=deltaLow,upper=deltaHi> delta0; 
314 | 
315 |         //std of the hyperprior from which we sample the delta_i
316 |         real<lower=std0Low,upper=std0Hi> std0;
317 | 
318 |         //delta_i of each data set: vector of lenght q.
319 |         vector[q] delta;               
320 | 
321 |         //sigma of each data set: : vector of lenght q.
322 |         vector<lower=stdLow,upper=stdHi>[q] sigma; 
323 | 
324 |         /* the domain of (nu - 1) starts from 0
325 |         and can be given a gamma prior*/
326 |         real<lower=0> nuMinusOne; 
327 | 
328 |         //parameters of the Gamma prior on nuMinusOne
329 |         real<lower=lowerAlpha,upper=upperAlpha> gammaAlpha;
330 |         real<lower=lowerBeta, upper=upperBeta> gammaBeta;
331 | 
332 |       }
333 | 
334 |      transformed parameters {
335 |         //degrees of freedom
336 |         real<lower=1> nu ;
337 | 
338 |         /*difference between the data (x matrix) and 
339 |         the vector of the q means.*/
340 |         matrix[q,Nsamples] diff; 
341 | 
342 |         vector[q] diagQuad;
343 | 
344 |         /*vector of length q: 
345 |         1 over the variance of each data set*/
346 |         vector[q] oneOverSigma2; 
347 | 
348 |         vector[q] logDetSigma;
349 | 
350 |         vector[q] logLik;
351 | 
352 |         //degrees of freedom
353 |         nu <- nuMinusOne + 1 ;
354 | 
355 |         //1 over the variance of each data set
356 |         oneOverSigma2 <- rep_vector(1, q) ./ sigma;
357 |         oneOverSigma2 <- oneOverSigma2 ./ sigma;
358 | 
359 |         /*the data (x) minus a matrix done as follows:
360 |         the delta vector (of lenght q) pasted side by side Nsamples times*/
361 |         diff <- x - rep_matrix(delta,Nsamples); 
362 | 
363 |         //efficient matrix computation of the likelihood.
364 |         diagQuad <- diagonal (quad_form (invM,diff'));
365 |         logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
366 |         logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);  
367 |         logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;
368 | 
369 |       }
370 | 
371 |       model {
372 |         /*mu0 and std0 are not explicitly sampled here.
373 |         Stan automatically samples them: mu0 as uniform and std0 as
374 |         uniform over its domain (std0Low,std0Hi).*/
375 | 
376 |         //sampling the degrees of freedom
377 |         nuMinusOne ~ gamma ( gammaAlpha, gammaBeta);
378 | 
379 |         //vectorial sampling of the delta_i of each data set
380 |         delta ~ student_t(nu, delta0, std0);
381 | 
382 |         //logLik is computed in the previous block 
383 |         increment_log_prob(sum(logLik));   
384 |      }
385 |     """
386 |     datatable=x
387 |     std_within=np.mean(np.std(datatable,1))
388 | 
389 |     Nsamples = len(datatable[0])
390 |     q= len(datatable)
391 |     if q>1:
392 |         std_among=np.std(np.mean(datatable,1))
393 |     else:
394 |         std_among=np.mean(np.std(datatable,1))
395 | 
396 |     #Hierarchical data in Stan
397 |     hierachical_dat = {'x': datatable,
398 |                    'deltaLow' : -np.max(np.abs(datatable)),
399 |                    'deltaHi' : np.max(np.abs(datatable)),
400 |                    'stdLow' : 0,
401 |                    'stdHi' : std_within*std_upper_bound,
402 |                    'std0Low' : 0,
403 |                    'std0Hi' : std_among*std_upper_bound,
404 |                    'Nsamples' : Nsamples,
405 |                    'q' : q,
406 |                    'rho' : rho,
407 |                    'upperAlpha' : upperAlpha,
408 |                    'lowerAlpha' : lowerAlpha,
409 |                    'upperBeta' : upperBeta,
410 |                    'lowerBeta' : lowerBeta}
411 | 
412 |     #Call to Stan code
413 |     fit = pystan.stan(model_code=hierarchical_code, data=hierachical_dat,
414 |                       iter=1000, chains=4)
415 |     
416 |     la = fit.extract(permuted=True)  # return a dictionary of arrays
417 |     mu = la['delta0']
418 |     stdh = la['std0']
419 |     nu = la['nu']
420 |     
421 |     samples=np.zeros((len(mu),3), dtype=float)
422 |     for i in range(0,len(mu)):
423 |         samples[i,2]=1-stats.t.cdf(rope, nu[i], mu[i], stdh[i])
424 |         samples[i,0]=stats.t.cdf(-rope, nu[i], mu[i],  stdh[i])
425 |         samples[i,1]=1-samples[i,0]-samples[i,2]
426 |      
427 |     return samples
428 | 
429 | def plot_posterior(samples, names=('C1', 'C2')):
430 |     """
431 |     Args:
432 |         x (array): a vector of differences or a 2d array with pairs of scores.
433 |         names (pair of str): the names of the two classifiers
434 | 
435 |     Returns:
436 |         matplotlib.pyplot.figure
437 |     """
438 |     return plot_simplex(samples, names)
439 | 
440 | 
441 | def plot_simplex(points, names=('C1', 'C2')):
442 |     import matplotlib.pyplot as plt
443 |     from matplotlib.lines import Line2D
444 |     from matplotlib.pylab import rcParams
445 | 
446 |     def _project(points):
447 |         from math import sqrt, sin, cos, pi
448 |         p1, p2, p3 = points.T / sqrt(3)
449 |         x = (p2 - p1) * cos(pi / 6) + 0.5
450 |         y = p3 - (p1 + p2) * sin(pi / 6) + 1 / (2 * sqrt(3))
451 |         return np.vstack((x, y)).T 
452 | 
453 |     vert0 = _project(np.array(
454 |         [[0.3333, 0.3333, 0.3333], [0.5, 0.5, 0], [0.5, 0, 0.5], [0, 0.5, 0.5]]))
455 | 
456 |     fig = plt.figure()
457 |     fig.set_size_inches(8, 7)  
458 |     
459 |     nl, ne, nr = np.max(points, axis=0)
460 |     for i, n in enumerate((nl, ne, nr)):
461 |         if n < 0.001:
462 |             print("p{} is too small, switching to 2d plot".format(names[::-1] + ["rope"]))
463 |             coords = sorted(set(range(3)) - i)
464 |             return plot2d(points[:, coords], labels[coords])
465 | 
466 |     # triangle
467 |     fig.gca().add_line(
468 |         Line2D([0, 0.5, 1.0, 0],
469 |                [0, np.sqrt(3) / 2, 0, 0], color='orange'))
470 |     # decision lines
471 |     for i in (1, 2, 3):
472 |         fig.gca().add_line(
473 |             Line2D([vert0[0, 0], vert0[i, 0]],
474 |                    [vert0[0, 1], vert0[i, 1]], color='orange'))
475 |     # vertex labels
476 |     rcParams.update({'font.size': 16})
477 |     fig.gca().text(-0.08, -0.08, 'p({})'.format(names[0]), color='orange')
478 |     fig.gca().text(0.44, np.sqrt(3) / 2 + 0.05, 'p(rope)', color='orange')
479 |     fig.gca().text(1.00, -0.08, 'p({})'.format(names[1]), color='orange')
480 | 
481 |     # project and draw points
482 |     tripts = _project(points[:, [0, 2, 1]])
483 |     plt.hexbin(tripts[:, 0], tripts[:, 1], mincnt=1, cmap=plt.cm.Blues_r)            
484 |     # Leave some padding around the triangle for vertex labels
485 |     fig.gca().set_xlim(-0.2, 1.2)
486 |     fig.gca().set_ylim(-0.2, 1.2)
487 |     fig.gca().axis('off')
488 |     return fig
489 | 


--------------------------------------------------------------------------------
/Python/triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Python/triangle.png


--------------------------------------------------------------------------------
/Python/triangle_hierarchical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/Python/triangle_hierarchical.png


--------------------------------------------------------------------------------
/R/BayesianSignTest.R:
--------------------------------------------------------------------------------
 1 | BayesianSignTest <- function(diffVector,rope_min,rope_max) {
 2 | 
 3 |   library(MCMCpack)
 4 |   
 5 |   samples <- 3000
 6 | 
 7 |   #build the vector 0.5 1 1 ....... 1 
 8 |   weights <- c(0.5,rep(1,length(diffVector)))
 9 |   
10 |   #add the fake first observation in 0
11 |   diffVector <- c (0, diffVector)  
12 |   
13 |   
14 |   #for the moment we implement the sign test. Signedrank will follows
15 |   probLeft <- mean (diffVector < rope_min)
16 |   probRope <- mean (diffVector > rope_min & diffVector < rope_max)
17 |   probRight <- mean (diffVector > rope_max)
18 |   
19 |   
20 |   
21 |   results = list ("probLeft"=probLeft, "probRope"=probRope,
22 |                   "probRight"=probRight)
23 |   
24 |   return (results)
25 |   
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/R/BayesianSignedRank.R:
--------------------------------------------------------------------------------
 1 | BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
 2 |   
 3 |   library(MCMCpack)
 4 |   
 5 |   samples <- 30000
 6 |   
 7 |   #build the vector 0.5 1 1 ....... 1
 8 |   weights <- c(0.5,rep(1,length(diffVector)))
 9 |   
10 |   #add the fake first observation in 0
11 |   diffVector <- c (0, diffVector)
12 |   
13 |   sampledWeights <- rdirichlet(samples,weights)
14 |   
15 |   winLeft <- vector(length = samples)
16 |   winRope <- vector(length = samples)
17 |   winRight <- vector(length = samples)
18 |   
19 |   for (rep in 1:samples){
20 |     currentWeights <- sampledWeights[rep,]
21 |     for (i in 1:length(currentWeights)){
22 |       for (j in 1:length(currentWeights)){
23 |         product= currentWeights[i] * currentWeights[j]
24 |         if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
25 |           winRight[rep] <- winRight[rep] + product
26 |         }
27 |         else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
28 |           winRope[rep] <- winRope[rep] + product
29 |         }
30 |         else {
31 |           winLeft[rep] <- winLeft[rep] + product
32 |         }
33 |       }
34 |     }
35 |     maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
36 |     winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
37 |     winRight[rep] <- (winRight[rep]==maxWins)*1/winners
38 |     winRope[rep] <- (winRope[rep]==maxWins)*1/winners
39 |     winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
40 |   }
41 |   
42 |   
43 |   results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
44 |                   "winRight"=mean(winRight) )
45 |   
46 |   return (results)
47 |   
48 | }


--------------------------------------------------------------------------------
/R/correlatedBayesianTtest.R:
--------------------------------------------------------------------------------
 1 | #diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
 2 | #If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
 3 | #You should have run cross-validation on the same folds for the two classifiers.
 4 | #Then diff_a_b is the difference fold-by-fold.
 5 | 
 6 | #rho is the correlation of the cross-validation results: 1/(number of folds)
 7 | #rope_min and rope_max are the lower and the upper bound of the rope
 8 |  correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
 9 |    if (rope_max < rope_min){
10 |      stop("rope_max should be larger than rope_min")
11 |    }
12 |      
13 |   delta <- mean(diff_a_b)
14 |   n <- length(diff_a_b)
15 |   df <- n-1
16 |   stdX <- sd(diff_a_b)
17 |   sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
18 |   p.left <- pt((rope_min - delta)/sp, df)
19 |   p.rope <- pt((rope_max - delta)/sp, df)-p.left
20 |   results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
21 |   return (results)
22 | }
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repo contains the code used in the following paper, including tests and  code that is necessary to replicate all the analyses performed in the paper:
 2 | 
 3 | * [Time for a Change: a Tutorial for Comparing Multiple Classifiers Through Bayesian Analysis](http://jmlr.org/papers/volume18/16-305/16-305.pdf), Alessio Benavoli, Giorgio Corani, Janez Demšar, Marco Zaffalon. Journal of Machine Learning Research, 18 (2017) 1-36.
 4 | 
 5 | A more recent version of the Python library is available here:
 6 | * git repo: [https://github.com/janezd/baycomp](https://github.com/janezd/baycomp)
 7 | * documentation: [https://baycomp.readthedocs.io/en/latest/](https://baycomp.readthedocs.io/en/latest/)).
 8 | 
 9 | You can find the R implementation of triangle plot for Bayesian comparison of classifiers here:
10 | [https://github.com/liboliba/Triangle_plot_bayes_cmp](https://github.com/liboliba/Triangle_plot_bayes_cmp)
11 | We thank Libo.
12 | 


--------------------------------------------------------------------------------
/hierarchical/Utils.R:
--------------------------------------------------------------------------------
  1 | #store multiple useful functions: getActuals, KL
  2 | 
  3 | #computes MSEshr and MSEmle for simulation with hierarchical models, 
  4 | #assuming data to be laready loaded
  5 | computeMSE <- function(){
  6 |   reps <- length(hierModels)
  7 |   mseShr <- vector(length = reps)
  8 |   mseMle <- vector(length = reps)
  9 |   for (i in 1:reps){
 10 |     actual <- trainData[[i]]$actualTrainDeltaI
 11 |     mle <- trainData[[i]]$currentMleDiffLdaCart
 12 |     shrinkage <- hierModels[[i]]$delta_each_dset
 13 |     mseMle[i] <- mean ( (actual - mle)^2 )
 14 |     mseShr[i] <- mean ( (actual - shrinkage)^2 )
 15 |   }
 16 |   df <- data.frame('mseMle'=mseMle,'mseShr'=mseShr)
 17 |   write.table(df,sep=',',col.names = TRUE, row.names = FALSE, file = 'mse.tex')
 18 | }
 19 | 
 20 | 
 21 | getActuals <- function (){
 22 |   
 23 |   actualFileName <- 'csvResults/actualAccFriedman1.csv'
 24 |   actualAccFriedman1 <- read.csv(actualFileName)
 25 |   actualFileName <- 'csvResults/actualAccFriedman2.csv'
 26 |   actualAccFriedman2 <- read.csv(actualFileName)
 27 |   actualFileName <- 'csvResults/actualAccFriedman3.csv'
 28 |   actualAccFriedman3 <- read.csv(actualFileName)
 29 |   actualAccFriedman <- rbind (actualAccFriedman1, actualAccFriedman2, actualAccFriedman3)
 30 |   ropeMin <- - 0.01
 31 |   ropeMax <- 0.01
 32 |   
 33 |   diff <- actualAccFriedman$ldaAccuracy - actualAccFriedman$cartAccuracy
 34 |   actuals <- list (delta0 = mean(diff))
 35 |   actuals$pDeltaRight <- length(which(diff>ropeMax)) / length(diff)
 36 |   actuals$pDeltaLeft <- length(which(diff<ropeMin)) / length(diff)
 37 |   actuals$pDeltaRope <- length(which(diff>ropeMin & diff<ropeMax )) / length(diff)
 38 |   actuals$eachSetting <- actualAccFriedman
 39 |   
 40 |   return (actuals)
 41 |   
 42 | }
 43 | 
 44 | #computes the KL between the actual distribution of the deltaI
 45 | #in the population and the distribution estimated by the model, sampling
 46 | #from the posterior. We consider rope, left, right
 47 | KL <- function (fittedModel,actuals, sampling='student'){
 48 |   ropeMin <- - 0.01
 49 |   ropeMax <- 0.01
 50 |   #here we sample the Deltas from the posterior
 51 |   postSamples <- length(fittedModel$stanResults$delta0)
 52 |   sampledDeltas <- vector (length = postSamples)
 53 |   
 54 |   std <- fittedModel$stanResults$std0
 55 |   mu  <- fittedModel$stanResults$delta0
 56 |   if (sampling=='student'){
 57 |     nu  <- fittedModel$stanResults$nu
 58 |     for (r in 1:postSamples){
 59 |       sampledDeltas[r] <- 1000
 60 |       while (abs(sampledDeltas[r]) > 1) {
 61 |         sampledDeltas[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * fittedModel$stdX
 62 |       }
 63 |     } 
 64 |   }
 65 |   if (sampling=='gaussian'){
 66 |     for (r in 1:postSamples){
 67 |       sampledDeltas[r] <- 1000
 68 |       while (abs(sampledDeltas[r]) > 1) {
 69 |         sampledDeltas[r]   <- rnorm (1, sd= std[r], mean= mu[r]) * fittedModel$stdX
 70 |       }
 71 |     }
 72 |   }
 73 |   pSampleRight <- length(which(sampledDeltas>ropeMax)) / postSamples
 74 |   pSampleLeft <- length(which(sampledDeltas<ropeMin)) / postSamples
 75 |   pSampleRope <- length(which(sampledDeltas>ropeMin & sampledDeltas<ropeMax )) / postSamples
 76 |   
 77 |   actualP <- c(actuals$pDeltaRight,actuals$pDeltaRope,actuals$pDeltaLeft)
 78 |   sampleP <- c(pSampleRight, pSampleRope, pSampleLeft)
 79 |   eps <- 0.0001
 80 |   sampleP[sampleP<eps] <- eps
 81 |   actualP[actualP<eps] <- eps
 82 |   klDiv <- sum( actualP * log (actualP/sampleP) )
 83 |   return (klDiv)
 84 |   
 85 | }
 86 | 
 87 | getActualTrainDeltaI <- function (actuals, trainSettings){
 88 |   actualDiff <- vector (length = nrow(trainSettings))
 89 |   for (i in 1:nrow(trainSettings)){
 90 |     idx = which ( 
 91 |       actuals$eachSetting$redundantFeats == trainSettings$redundantFeats[i] &
 92 |         actuals$eachSetting$sampleSize == trainSettings$sampleSize[i] &
 93 |         actuals$eachSetting$friedmanSd == trainSettings$friedmanSd[i])
 94 |     
 95 |     actualDiff[i] <- actuals$eachSetting$ldaAccuracy[idx] - actuals$eachSetting$cartAccuracy[idx] 
 96 |   }
 97 |   return(actualDiff)
 98 | }
 99 | 
100 | 
101 | plotPosterior <- function  (hierPosterior, hierPosteriorSens, class1=99, class2=99) {
102 |   
103 |   postSamples <- length(hierPosterior$stanResults$delta0)
104 |   sampleDelta <- vector (length = postSamples)
105 |   std <- hierPosterior$stanResults$std0
106 |   mu  <- hierPosterior$stanResults$delta0
107 |   nu  <- hierPosterior$stanResults$nu
108 |   redone <- -1 * postSamples
109 |   redoneSens <- -1 * postSamples
110 |   deltaShr <- hierPosterior$delta_each_dset
111 |   for (r in 1:postSamples){
112 |     sampleDelta[r] <- 1000
113 |     while (abs(sampleDelta[r]) > 1) {
114 |       sampleDelta[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * hierPosterior$stdX
115 |       redone <- redone + 1
116 |     }
117 |   }
118 |   
119 |   sampleDeltaSens <- vector (length = postSamples)
120 |   std <- hierPosteriorSens$stanResults$std0
121 |   mu  <- hierPosteriorSens$stanResults$delta0
122 |   nu  <- hierPosteriorSens$stanResults$nu
123 |   
124 |   for (r in 1:postSamples){
125 |     sampleDeltaSens[r] <- 1000
126 |     while (abs(sampleDeltaSens[r]) > 1) {
127 |       sampleDeltaSens[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * hierPosteriorSens$stdX
128 |       redoneSens <- redoneSens + 1
129 |     }
130 |   }
131 |   
132 |   d1 <- density(deltaShr, from = 2*min(deltaShr), to=2*max(deltaShr), n = 512 )
133 |   d2 <- density(sampleDelta, from = 2*min(deltaShr), to=2*max(deltaShr), n = 512 )
134 |   d3<- density(sampleDeltaSens, from = 2*min(deltaShr), to=2*max(deltaShr), n = 512  )
135 |   
136 |   filename <- paste('plotPost',class1,class2,'.pdf',sep = '')
137 |   plot(d1, col=1, xlim = c(-.1,.1))
138 |   lines(d2,col=2)
139 |   lines(d3,col=3)
140 |   legend(0.02,10,legend=c('shr','hier','hierSens'), lty=c(1,1,1),col=c(1,2,3))
141 |   # dev.off()
142 | }
143 | 
144 | #this function parses the list of hierarchical models and return relevant facts about the estimated probabilities.
145 | postEstimatedDelta <- function (hierModels){
146 |   #gets a list of hierarchical models and estimates via sampling the probability of the next delta being left, rope and right.
147 |   reps <- length(hierModels)
148 |   EstimPLeft <- vector (length = reps)
149 |   EstimPRope <- vector (length = reps)
150 |   EstimPRight <- vector (length = reps)
151 |   EstimPLeftBias <- vector (length = reps)
152 |   EstimPRopeBias <- vector (length = reps)
153 |   EstimPRightBias <- vector (length = reps)
154 |   
155 |   postSamples <- length(hierModels[[1]]$stanResults$delta0)
156 |   sampledDeltas <- vector (length = postSamples)
157 |   
158 |   for (i in 1:reps){
159 |     fittedModel <- hierModels[[i]]
160 |     std <- fittedModel$stanResults$std0
161 |     mu  <- fittedModel$stanResults$delta0
162 |     nu  <- fittedModel$stanResults$nu
163 |     for (r in 1:postSamples){
164 |       sampledDeltas[r] <- 1000
165 |       while (abs(sampledDeltas[r]) > 1) {
166 |         sampledDeltas[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * fittedModel$stdX
167 |       }
168 |     } 
169 |     #probabilities of the sampled deltas to be left rigth rope
170 |     EstimPLeft[i] <- mean (sampledDeltas < -0.01)
171 |     EstimPRight[i] <- mean (sampledDeltas > 0.01)
172 |     EstimPRope[i] <- mean (sampledDeltas < 0.01 & sampledDeltas > -0.01)
173 |     EstimPLeftBias[i] <- hierModels[[i]]$nextDelta$left
174 |     EstimPRopeBias[i] <- hierModels[[i]]$nextDelta$rope
175 |     EstimPRightBias[i] <- hierModels[[i]]$nextDelta$right
176 |     
177 |   }
178 |   actuals <- getActuals()
179 |   #probability of delta belonging to left, rope and right
180 |   estimatedDelta <- data.frame('EstimPLeft'=EstimPLeft, 'EstimPRight'= EstimPRight, 'EstimPRope' = EstimPRope)
181 |   #probability that we return as inference
182 |   estimatedDeltaBias <- data.frame('pLeft'=EstimPLeftBias, 'pRight'= EstimPRightBias, 'pRope' = EstimPRopeBias)
183 |   
184 |   return (list('actuals'=actuals, 'estimatedDelta'=estimatedDelta, 'estimatedDeltaBias'=estimatedDeltaBias))
185 | }
186 | 
187 | 
188 | 
189 | 
190 | #computes the Kl div between the posterior and the shrinkage estimates
191 | KLPostShrinkage <- function (fittedModel,shrinkageEstimates){
192 |   library('flexmix')
193 |   #from the posterior t or the posterior gaussian 
194 |   postSamples <- length(fittedModel$stanResults$delta0)
195 |   sampledDeltas <- vector (length = postSamples)
196 |   
197 |   std <- fittedModel$stanResults$std0
198 |   mu  <- fittedModel$stanResults$delta0
199 |   
200 |   
201 |   #if the model is Student
202 |   if (!is.null(fittedModel$nu)){
203 |     nu  <- fittedModel$stanResults$nu
204 |     for (r in 1:postSamples){
205 |       sampledDeltas[r] <- 1000
206 |       while (abs(sampledDeltas[r]) > 1) {
207 |         sampledDeltas[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * fittedModel$stdX
208 |       }
209 |     } 
210 |   }
211 |   #otherwise gaussian sampling
212 |   else{
213 |     for (r in 1:postSamples){
214 |       sampledDeltas[r] <- 1000
215 |       while (abs(sampledDeltas[r]) > 1) {
216 |         sampledDeltas[r]   <- rnorm (1, sd= std[r], mean= mu[r]) * fittedModel$stdX
217 |       }
218 |     }
219 |   }
220 |   d1 <- density(shrinkageEstimates, from = 2*min(shrinkageEstimates), to=2*max(shrinkageEstimates), n = 512 )
221 |   d2 <- density(sampledDeltas, from = 2*min(shrinkageEstimates), to=2*max(shrinkageEstimates), n = 512 )
222 |   estimatedKL <- KLdiv(cbind(shrink=d1$y,fitted=d2$y))
223 |   return(estimatedKL)
224 | }
225 | 
226 | 
227 | 
228 | #produces boxplot and scatter plots for
229 | #comparing MLE and shrunken estimates
230 | plotShrinkageMle <- function (){
231 |   library(tikzDevice)
232 |   load("~/Documents/devel/tutorialML/hierarchical/Rdata/cvalFriedmanPredictive.Rdata")
233 |   tikz("bplotHierShr.tex", width= 6.5, height=4.5)
234 |   boxplot( hierModels[[2]]$delta_each_dset, trainData[[2]]$currentMleDiffLdaCart, 
235 |            xlab=c('hier','mle'), ylab="Estimate of $delta_i$")
236 |   dev.off()
237 |   
238 |   tikz("scatterHierActual.tex", width= 6.5, height=4.5)
239 |   plot(dset$averageTime ~ dset$AthleteName, xlab="", ylab="Execution time")
240 |   dev.off()
241 |   
242 |   tikz("scatterMleActual.tex", width= 6.5, height=4.5)
243 |   plot(dset$averageTime ~ dset$AthleteName, xlab="", ylab="Execution time")
244 |   dev.off()
245 |   
246 | }
247 | 
248 | plotPosteriorGGplot2 <- function  (hierPosterior, hierPosteriorKru, hierPosteriorJua, suffix) {
249 |   # library(tikzDevice)
250 |   library(ggplot2)
251 |   postSamples <- length(hierPosterior$stanResults$delta0)
252 |   sampleDelta <- vector (length = postSamples)
253 |   std <- hierPosterior$stanResults$std0
254 |   mu  <- hierPosterior$stanResults$delta0
255 |   nu  <- hierPosterior$stanResults$nu
256 |   redone <- -1 * postSamples
257 |   redoneSens <- -1 * postSamples
258 |   deltaShr <- hierPosterior$delta_each_dset
259 |   for (r in 1:postSamples){
260 |     sampleDelta[r] <- 1000
261 |     while (abs(sampleDelta[r]) > 1) {
262 |       sampleDelta[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * hierPosterior$stdX
263 |       redone <- redone + 1
264 |     }
265 |   }
266 |   
267 |   sampleDeltaKru <- vector (length = postSamples)
268 |   std <- hierPosteriorKru$stanResults$std0
269 |   mu  <- hierPosteriorKru$stanResults$delta0
270 |   nu  <- hierPosteriorKru$stanResults$nu
271 |   redoneKru <- -1 * postSamples
272 |   for (r in 1:postSamples){
273 |     sampleDeltaKru[r] <- 1000
274 |     while (abs(sampleDeltaKru[r]) > 1) {
275 |       sampleDeltaKru[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * hierPosteriorKru$stdX
276 |       redoneKru <- redoneKru + 1
277 |     }
278 |   }
279 |   
280 |   sampleDeltaJua <- vector (length = postSamples)
281 |   std <- hierPosteriorJua$stanResults$std0
282 |   mu  <- hierPosteriorJua$stanResults$delta0
283 |   nu  <- hierPosteriorJua$stanResults$nu
284 |   for (r in 1:postSamples){
285 |     sampleDeltaJua[r] <- 1000
286 |     while (abs(sampleDeltaJua[r]) > 1) {
287 |       sampleDeltaJua[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * hierPosteriorJua$stdX
288 |     }
289 |   }
290 |   
291 |   limit<-max(abs(min(deltaShr)),abs(max(deltaShr)))
292 |   d1 <- density(deltaShr, from = -2*limit, to=2*limit, n = 512 )
293 |   d2 <- density(sampleDelta, from = -2*limit, to=2*limit, n = 512 )
294 |   d3 <- density(sampleDeltaKru, from = -2*limit, to=2*limit, n = 512  )
295 |   d4 <- density(sampleDeltaJua, from = -2*limit, to=2*limit, n = 512  )
296 |   
297 |   #save the density to file
298 |   stopifnot(mean(d1$x==d2$x)==1)
299 |   stopifnot(mean(d1$x==d3$x)==1)
300 |   df<- data.frame(x=d1$x,shr=d1$y,hier=d2$y,kru=d3$y,jua=d4$y) 
301 |   fileName <- paste('csvResults/density',suffix,'.tex',sep='')
302 |   write.table(df,file=fileName,sep=',',row.names = FALSE, quote = FALSE)
303 |   
304 |   
305 |   filename <- paste('plotPostHierKru',suffix,'.pdf',sep = '')
306 |   pdf(file=filename)
307 |   plot(d1, col='black', xlim = c(-.1,.1), main = '' )
308 |   lines(d2,col='blue')
309 |   lines(d3,col='green')
310 |   # lines(d3,col='red')
311 |   legend('topright',c('shr','hier','jua'),lty=c(1,1,1),
312 |          col=c('black','blue', 'green'))
313 |   #    legend('topright',c('shr','hier','kru','jua'),lty=c(1,1,1),
314 |   #           col=c('black','blue', 'green','red'))
315 |   
316 |   
317 |   
318 |   dev.off()
319 |   
320 |   # deltaShrNA<- c(deltaShr,rep(NA,length(sampleDelta) - length(deltaShr)))
321 |   # m <- data.frame(hier=sampleDelta,kru=sampleDeltaKru,shrinkage=deltaShrNA)
322 |   # dfs <- stack(m)
323 |   # outFile <- paste('densityPlot',suffix,'.pdf',sep='')
324 |   # pdf(outFile)
325 |   # tikz(outFile, width= 6.5, height=4.5)
326 |   # p <- ggplot(dfs, aes(x=values)) + geom_density(aes(group=ind, colour=ind, fill=ind), alpha=0.3) + xlim(-0.1, 0.1)
327 |   
328 |   # dev.off()
329 | }
330 | 


--------------------------------------------------------------------------------
/hierarchical/actualFriedmanAccuracy.R:
--------------------------------------------------------------------------------
 1 | actualFriedmanAccuracy <- function(friedmanType=1, reps=500) {
 2 |   #estimates actual accuracy of real classifiers on data sets of the Friedman family
 3 |   #friedmanType is the friedman function (1,2,3) while reps is the number of repetitions asked for the assessment.
 4 |   library(mlbench)
 5 |   library(caret)
 6 |   source('generateFriedmanData.R')
 7 |   source('genFriedmanSettings.R')
 8 |   set.seed(42)
 9 |   
10 |   
11 |   settings <-  genFriedmanSettings(friedmanType)
12 |   testSettings <- settings
13 |   #every test set will have size 5000
14 |   testSettings$sampleSize <- rep(5000,length(testSettings$sampleSize)) 
15 |   settings$ldaAccuracy <- vector (length = length(testSettings$sampleSize))
16 |   settings$cartAccuracy <- vector (length = length(testSettings$sampleSize))
17 |   settings$knnAccuracy <- vector (length = length(testSettings$sampleSize))
18 |   
19 |   
20 |   #making sure that no cross-validation is performed while fitting the models.
21 |   control <- trainControl(method="none") 
22 |   
23 |   
24 |   for (currentSetting in 1:dim(settings)[1]){
25 |     
26 |     cat('setting: ',currentSetting,'/',dim(settings)[1],'\n')
27 |     ldaAccuracy <- vector(length = reps)
28 |     cartAccuracy  <- vector(length = reps)
29 |     knnAccuracy  <- vector(length = reps)
30 |     
31 |     for (currentRep in 1:reps){
32 |     cat(currentRep,'\n')
33 |       #the following functions should generate the data according to the given settings and discretize according to
34 |       #the supplied threshold
35 |       trainingData <- generateFriedmanData(friedmanType,settings[currentSetting,])
36 |       testData <- generateFriedmanData(friedmanType,testSettings[currentSetting,])
37 |       
38 |       
39 |       #fit lda
40 |       fit.lda <- train(trainingData$x, trainingData$class, method="lda", trControl=control, tuneLength = 1)
41 |       ldaPredictions <- predict(fit.lda,testData$x)
42 |       ldaAccuracy[currentRep] <- mean (ldaPredictions == testData$class)
43 |       
44 |       fit.cart <- train(trainingData$x, trainingData$class, method="rpart1SE", trControl=control, tuneLength = 1)
45 |       cartPredictions <- predict (fit.cart,testData$x)
46 |       cartAccuracy[currentRep] <- mean (cartPredictions == testData$class)
47 |       
48 |       fit.knn <- train(trainingData$x, trainingData$class, method="knn", trControl=control, tuneLength = 1)
49 |       knnPredictions <- predict (fit.knn,testData$x)
50 |       knnAccuracy[currentRep] <- mean (knnPredictions == testData$class)      
51 |     }
52 |     #at this point all the simulation for the given settings have been done.
53 |     #let's store the results
54 |     settings$ldaAccuracy[currentSetting] <- mean (ldaAccuracy)
55 |     settings$cartAccuracy[currentSetting] <- mean (cartAccuracy)
56 |     settings$knnAccuracy[currentSetting] <- mean (knnAccuracy)
57 |   }
58 |   settings$repetitions <- rep(reps,length(testSettings$sampleSize))
59 |   settings$FriedmanType <- rep(friedmanType,length(testSettings$sampleSize))
60 |   #at this points we save the result to file
61 |   csvFilename <- paste('csvResults/actualAccFriedman',friedmanType,".csv",sep='')
62 |   write.matrix(settings,file=csvFilename, sep=",")
63 |   
64 |   
65 |   
66 | }


--------------------------------------------------------------------------------
/hierarchical/analyzeFriedmanResults.R:
--------------------------------------------------------------------------------
 1 | analyzeFriedmanResults <- function (friedmanType=1) {
 2 |   
 3 |   #the case in which individual families have to be analyzed
 4 |   if (friedmanType < 4){
 5 |     actualFileName <- paste('csvResults/actualAccFriedman',friedmanType,'.csv',sep = '')
 6 |     actualAccFriedman <- read.csv(actualFileName)
 7 |     
 8 |     cvalFileName <- paste('csvResults/cvalAccFriedman',friedmanType,'.csv',sep = '')
 9 |     cvalAccFriedman <- read.csv(cvalFileName)
10 |   }
11 |   
12 |   #the case in which the three families are jointly experimented
13 |   #following code is a bit rough but works
14 |   if (friedmanType== 4){
15 |     actualFileName <- 'csvResults/actualAccFriedman1.csv'
16 |     actualAccFriedman1 <- read.csv(actualFileName)
17 |     actualFileName <- 'csvResults/actualAccFriedman2.csv'
18 |     actualAccFriedman2 <- read.csv(actualFileName)
19 |     actualFileName <- 'csvResults/actualAccFriedman3.csv'
20 |     actualAccFriedman3 <- read.csv(actualFileName)
21 |     actualAccFriedman <- rbind (actualAccFriedman1, actualAccFriedman2, actualAccFriedman3)
22 |     
23 |     cvalFileName <- paste('csvResults/cvalAccFriedman',friedmanType,'.csv',sep = '')
24 |     cvalAccFriedman <- read.csv(cvalFileName)
25 |   }
26 |   
27 |   #mean estimation error on the whole family, averaged over the repetitions
28 |   #check how many repetitions: how many times the first dset has been run
29 |   idx <- 1 #which setting we use as referneces
30 |   repetitions <- sum(
31 |     cvalAccFriedman$redundantFeats == actualAccFriedman$redundantFeats[1] &
32 |       cvalAccFriedman$sampleSize == actualAccFriedman$sampleSize[1] &
33 |       cvalAccFriedman$friedmanSd == actualAccFriedman$friedmanSd[1])
34 |   #proportion of times hier leads lower error than MLE (out of all the dsets of the family), averaged over repetitions
35 |   
36 |   maeHier <- vector(length = repetitions)
37 |   maeHierGauss <- vector(length = repetitions)
38 |   maeHierKru <- vector(length = repetitions)
39 |   maeHierJua <- vector(length = repetitions)
40 |   maeMle <- vector(length = repetitions)
41 |   proportionDsetsHierBeatsMLe <- vector(length = repetitions)
42 |   HierBeatsMLeJointMae <- vector(length = repetitions)
43 |   HierBeatsKruJointMae <- vector(length = repetitions)
44 |   KruBeatsGaussJointMae <- vector(length = repetitions)
45 |   KruBeatsJuaJointMae <- vector(length = repetitions)
46 |   
47 |   counter <- 1
48 |   howManySettings <- dim(actualAccFriedman)[1]
49 |   for (currentRep in 1:repetitions){
50 |     #those two variables will store the Mae (aggregated over all data sets for the current iteration)
51 |     currentMaeHier <- vector (length = howManySettings)
52 |     currentMaeMle <- vector  (length = howManySettings)
53 |     currentMaeGauss <- vector  (length = howManySettings)
54 |     currentMaeKru <- vector  (length = howManySettings)
55 |     currentMaeJua <- vector  (length = howManySettings)
56 |     
57 |     #now loop over the data sets (each data set is also a setting)
58 |     for (currentSetting in 1:howManySettings){
59 |       currentMleEstim <- cvalAccFriedman$mleDiffLdaCart[counter]
60 |       currentHierEstim <- cvalAccFriedman$hierDiffLdaCart[counter]
61 |       currentGaussEstim <- cvalAccFriedman$gaussDiffLdaCart[counter]
62 |       currentKruEstim <- cvalAccFriedman$kruDiffLdaCart[counter]
63 |       currentJuaEstim <- cvalAccFriedman$juaDiffLdaCart[counter]
64 |       
65 |       idx = which (actualAccFriedman$redundantFeats == cvalAccFriedman$redundantFeats[counter] &
66 |                      actualAccFriedman$sampleSize == cvalAccFriedman$sampleSize[counter] &
67 |                      actualAccFriedman$friedmanSd == cvalAccFriedman$friedmanSd[counter] 
68 |       )
69 |       actualDifference <- actualAccFriedman$ldaAccuracy[idx] - actualAccFriedman$cartAccuracy[idx] 
70 |       currentMaeHier[currentSetting] <-  abs (currentHierEstim - actualDifference)
71 |       currentMaeMle[currentSetting] <-  abs (currentMleEstim - actualDifference)
72 |       currentMaeGauss[currentSetting] <-  abs (currentGaussEstim - actualDifference)
73 |       currentMaeKru[currentSetting] <-  abs (currentKruEstim - actualDifference)
74 |       currentMaeJua[currentSetting] <-  abs (currentJuaEstim - actualDifference)
75 |       
76 |       counter <- counter + 1
77 |     }
78 |     maeHier[currentRep] <- mean(currentMaeHier)
79 |     maeMle[currentRep] <- mean(currentMaeMle)
80 |     maeHierKru[currentRep] <- mean(currentMaeKru)
81 |     maeHierJua[currentRep] <- mean(currentMaeJua)
82 |     maeHierGauss[currentRep] <- mean(currentMaeGauss)
83 |     proportionDsetsHierBeatsMLe[currentRep] <- mean (currentMaeHier < currentMaeMle)
84 |     HierBeatsMLeJointMae[currentRep] <- ifelse ( maeHier[currentRep] < maeMle[currentRep], 1, 0)
85 |     HierBeatsKruJointMae[currentRep] <- ifelse ( maeHier[currentRep] < maeHierKru[currentRep], 1, 0)
86 |     KruBeatsGaussJointMae[currentRep] <- ifelse ( maeHierKru[currentRep] < maeHierGauss[currentRep], 1, 0)
87 |     KruBeatsJuaJointMae[currentRep] <- ifelse ( maeHierKru[currentRep] < maeHierJua[currentRep], 1, 0)
88 |   }
89 |   #at this points we save the result to file
90 |   csvFilename <- paste('csvResults/comparisonMleHierFriedman',friedmanType,".csv",sep='')
91 |   dsetResults <- data.frame(maeMle, maeHierGauss, maeHier, maeHierKru, maeHierJua, HierBeatsMLeJointMae, HierBeatsKruJointMae, 
92 |                             KruBeatsGaussJointMae,KruBeatsJuaJointMae)
93 |   write.matrix(dsetResults,file=csvFilename, sep=",")
94 |   
95 |   results <- list ('dsetResults'=dsetResults, 'HierBeatsMLeJointMae' = HierBeatsMLeJointMae)
96 |   return (results)
97 | }


--------------------------------------------------------------------------------
/hierarchical/analyze_uci_with_rope.R:
--------------------------------------------------------------------------------
  1 | analyze_uci_with_rope <- function() {
  2 |   #load from file "uci_data.RData" the cross-validation results of different classifiers on different data sets.
  3 |   #the relevant information is all included in the self-explaining list called uci_classification.
  4 |   #It contains accuracy results referring to 10 runs of 10-folds cross-validation for each classifier on 54 data sets.
  5 |   #The considered classifiers are ('naive Bayes','aode','hnb','j48','j48_grafted'), coded as 1,2,3,4,5.
  6 |   #It performs all the pairwise comparisons among such classifiers, on this collection of data sets.
  7 |   #As a result it returns the p-value of the signed rank, and the posterior probabilities computed by
  8 |   #the hierarchical test (posterior probability of the next delta being positive/negative; of the next delta
  9 |   #lying within rope, at the left and at the right of the rope)
 10 |   library(MASS)
 11 |   library(matrixStats)
 12 |   library(rstan)
 13 |   rstan_options(auto_write = TRUE)
 14 |   
 15 |   options(mc.cores = parallel::detectCores())
 16 |   source ("hierarchical_test.R")
 17 |   
 18 |   std_upper_bound <- 1000
 19 |   standardization <- 1 #we run on standardized data
 20 |   
 21 |   #utiliy function which gets the average score of the given classifier on each data set
 22 |   getAvgScore <- function (currentClassifier,score) {
 23 |     avg_score <- vector(length = how_many_dsets, mode = "double");
 24 |     for ( ii in 1:how_many_dsets ){
 25 |       avg_score[ii] <- mean ( score [classifierId==currentClassifier & dsetId==dsetsList[ii]] )
 26 |     }
 27 |     return (avg_score)
 28 |   }
 29 |   
 30 |   load("uci_data.RData")
 31 |   chains = 4
 32 |   
 33 |   
 34 |   #check arguments
 35 |   if (std_upper_bound<=1){
 36 |     stop("std_std_upper_bound should be larger than 1")
 37 |   }
 38 |   
 39 |   samplingType="student" #default choice
 40 |   if (!  ( (samplingType=="normal") | (samplingType=="student") )){
 41 |     stop("wrong samplingType")
 42 |   }
 43 |   
 44 |   #for each couple of classifiers: p_value_sign_rank,prob_classANextDelta,prob_ropeNextDelta,prob_classBNextDelta
 45 |   
 46 |   dsetId<- uci_classification$DatasetID
 47 |   classifierId <- uci_classification$ClassifierID
 48 |   #we run on accuracy
 49 |   score <- uci_classification$Percent.correct
 50 |   
 51 |   foldID <- uci_classification$Key.Fold
 52 |   nFolds <- max (foldID)
 53 |   rho=1/nFolds
 54 |     
 55 |   filename <- paste("uciResults_StdUpper",as.character(std_upper_bound),"samplingType",samplingType, sep="-")
 56 |   if (rho==0)  {
 57 |     filename <- paste(filename,"noCorrel",sep="-")
 58 |   }
 59 |   
 60 |   Rdata_filename <- paste (filename,"Rdata",sep=".")
 61 |   
 62 |   
 63 |   runID <- uci_classification$Key.Run
 64 |   #the fold ID discriminates between different folds and different runs
 65 |   foldID <- runID*10+foldID-10
 66 |   
 67 |   rope_min  <- -0.01
 68 |   rope_max  <- 0.01
 69 |   
 70 |   how_many_classifiers <- max(unique(classifierId))
 71 |   
 72 |   classifier_names <- c('naive Bayes','aode','hnb','j48','j48_grafted')  #hard-coded
 73 |   how_many_comparisons <- how_many_classifiers*(how_many_classifiers-1)/2 #number of pairwise comparisons
 74 |   
 75 |   #fields to be filled during the multiple comparisons
 76 |   p_value_sign_rank <- vector(length = how_many_comparisons, mode = "double")
 77 |   p_value_t_test <- vector(length = how_many_comparisons, mode = "double")
 78 |   median_difference <- vector(length = how_many_comparisons, mode = "double")
 79 |   prob_classANextDelta <- vector(length = how_many_comparisons, mode = "double")
 80 |   prob_ropeNextDelta <- vector(length = how_many_comparisons, mode = "double")
 81 |   prob_classBNextDelta <- vector(length = how_many_comparisons, mode = "double")
 82 |   prob_positiveNextDelta<- vector(length = how_many_comparisons, mode = "double")
 83 |   prob_negativeNextDelta<- vector(length = how_many_comparisons, mode = "double")
 84 |   classifierI <- vector(length = how_many_comparisons, mode = "integer")
 85 |   classifierJ <- vector(length = how_many_comparisons, mode = "integer")
 86 |   
 87 |   
 88 |   dsetsList <- unique(dsetId);
 89 |   
 90 |   how_many_dsets <- length(dsetsList)
 91 |   
 92 |   hierarchicalResults <- list()
 93 |   counter <- 1
 94 |   for (i in 1: (how_many_classifiers-1) ) {
 95 |     for (j in (i+1) : how_many_classifiers){
 96 |       chains <-4
 97 |       show(c(i,j))
 98 |       
 99 |       classifierI[counter] <- i
100 |       classifierJ[counter] <- j
101 |       
102 |       #run the signed rank
103 |       avgScoreI <- getAvgScore (i,score)         
104 |       avgScoreJ <- getAvgScore (j,score)
105 |       median_difference[counter] <- median(avgScoreI-avgScoreJ)
106 |       wilcoxonStat <-  wilcox.test (avgScoreI-avgScoreJ,alternative = "two.sided");
107 |       p_value_sign_rank[counter] <- wilcoxonStat$p.value
108 |       tTestStat <-  t.test (avgScoreI-avgScoreJ,alternative = "two.sided");
109 |       p_value_t_test[counter] <- tTestStat$p.value
110 |       
111 |       #prepare the data for  the hierarchical test
112 |       results <- cbind (classifierId, dsetId, score, foldID)
113 |       resultsI  <- results[classifierId==i,] 
114 |       resultsJ  <- results[classifierId==j,]
115 |       
116 |       #sort by dataset and then by fold
117 |       #resultsI<-mat.sort(resultsI, c(2,4))
118 |       #resultsJ<-mat.sort(resultsJ, c(2,4))
119 |       
120 |       #check results are consistently paired as for dataset and foldID
121 |       #data are already properly sorted, so this control pases
122 |       #otherwise we should sort the matrixes
123 |       stopifnot( mean (resultsI[,c(2,4)]==resultsJ[,c(2,4)]) == 1)
124 |       diffIJ <- cbind (resultsI[,2] , resultsI[,3]-resultsJ[,3])
125 |       
126 |       
127 |       #build matrix of results to be parsed by hierarchical test
128 |       x<-matrix(ncol  = max(foldID), nrow = how_many_dsets )
129 |       for (dsetIdx in 1:how_many_dsets) {
130 |         tmp <- diffIJ [diffIJ[,1] == dsetIdx,] 
131 |         x[dsetIdx,]  <- t (tmp [,2])
132 |       }
133 |       
134 |       
135 |       #run the hierarchical test
136 |       #we do not provide a simulation ID as this is run locally
137 |       startTime<-Sys.time()
138 |       simulationID <- paste(as.character(i*10 + j),as.character(std_upper_bound),samplingType,".dat",sep = "-")
139 |       
140 |       hierarchicalResults[[counter]] <- hierarchical.test (x=x, rope_min=rope_min, rope_max=rope_max, sample_file=simulationID,
141 |                                                            chains = chains,
142 |                                                            samplingType = 'student')
143 |       
144 |       stopTime<-Sys.time()
145 |       show(startTime-stopTime)
146 |       
147 |       #classA is the first classifier, classB the second classifier
148 |       #delta = acc(classA) - acc(classB)
149 |       #thus in the posterior the right tail of delta is the tail in favor
150 |       #of classA
151 |       
152 |       prob_classANextDelta[counter]  <- hierarchicalResults[[counter]]$nextDelta$right
153 |       prob_ropeNextDelta[counter]  <- hierarchicalResults[[counter]]$nextDelta$rope
154 |       prob_classBNextDelta[counter] <- hierarchicalResults[[counter]]$nextDelta$left
155 |       
156 |       prob_positiveNextDelta[counter]  <- hierarchicalResults[[counter]]$nextDelta$positive
157 |       prob_negativeNextDelta[counter]  <- hierarchicalResults[[counter]]$nextDelta$negative
158 |       
159 |       counter <- counter + 1
160 |     }
161 |   }
162 |   
163 |   classifierIString <-vector(length = how_many_comparisons, mode = "character")
164 |   classifierJString <-vector(length = how_many_comparisons, mode = "character")
165 |   
166 |   classifier_names <- c('naive Bayes','aode','hnb','j48','j48_grafted');
167 |   
168 |   results_matrix<-data.frame(
169 |     classifierI=classifierI,
170 |     classifierJ=classifierJ,
171 |     median_difference=median_difference,
172 |     p_value_sign_rank=p_value_sign_rank,
173 |     p_value_t_test=p_value_t_test,
174 |     prob_classANextDelta=prob_classANextDelta,
175 |     prob_ropeNextDelta=prob_ropeNextDelta,
176 |     prob_classBNextDelta=prob_classBNextDelta,
177 |     prob_positiveNextDelta=prob_positiveNextDelta,
178 |     prob_negativeNextDelta=prob_negativeNextDelta,
179 |     rope_min=rope_min,
180 |     rope_max=rope_max
181 |   )
182 |   
183 |   csv_filename <- paste (filename,"csv",sep=".")
184 |   
185 |   write.matrix(results_matrix, file = csv_filename, sep = ",")
186 |   
187 |   results <- list() 
188 |   results[[1]] <- list('classifierI'=classifierI,
189 |                        'classifierJ'=classifierJ,
190 |                        'p_value_sign_rank'=p_value_sign_rank,
191 |                        'median_difference'=median_difference,
192 |                        'prob_classANextDelta'=prob_classANextDelta,
193 |                        'prob_ropeNextDelta'=prob_ropeNextDelta,
194 |                        'prob_classBNextDelta'=prob_classBNextDelta
195 |   )
196 |   
197 |   
198 |   save(hierarchicalResults, file = Rdata_filename)
199 |   
200 |   return (results)
201 |   
202 | }
203 | 
204 | 


--------------------------------------------------------------------------------
/hierarchical/batchSensivity.R:
--------------------------------------------------------------------------------
 1 | batchSensitivity <- function (){
 2 |   library('MASS')
 3 |   source ('sensitivityNormalStudent.R')
 4 |   for (i in 1:4) {
 5 |     for (j in (i+1) : 5){
 6 |       cat (i,j)
 7 |       sensitivityNormalStudent(i,j)
 8 |     }
 9 |   }
10 | }


--------------------------------------------------------------------------------
/hierarchical/compTable.R:
--------------------------------------------------------------------------------
 1 | compTable <- function() {
 2 |   source('Utils.R')
 3 |   suffix <- c('12','13','14','15','23','24','25','34','35','45')
 4 |   KLmatrix <- matrix(ncol=4,nrow=10)
 5 |   colnames(KLmatrix)<-c('klGauss','klJua','klHier','klKru')
 6 |   idx <- c(2,3,1)#to pick left, rope, right probabilities from the structures
 7 |   
 8 |   for (i in 1:length(suffix)){
 9 |     # fileName <- paste('sensitivityNormalStudent',suffix[i],'.Rdata',sep='')
10 |     # fileName <- paste('sensitivityStudentAlphaBeta',suffix[i],'.Rdata',sep='')
11 |     fileName <- paste('Rdata/sensitivityNovelAlphaBeta',suffix[i],'.Rdata',sep='')
12 |     load(file = fileName)
13 |     currentHierDataFrame <- as.numeric(hierPosteriorNovel$nextDelta)[idx]
14 |     # currentKruDataFrame <- as.numeric(hierPosteriorKru$nextDelta)[idx]
15 |     # currentJuaDataFrame <- as.numeric(hierPosteriorJua$nextDelta)[idx]
16 |     
17 |     
18 | #     tmp <- matrix(nrow=10,ncol=4)
19 | #     for (j in 1:10){
20 | #       tmp[j,1]=KLPostShrinkage(hierPosteriorGauss,hierPosteriorGauss$delta_each_dset)[1,2] 
21 | #       tmp[j,2]=KLPostShrinkage(hierPosteriorJua,hierPosteriorJua$delta_each_dset)[1,2]
22 | #       tmp[j,3]=KLPostShrinkage(hierPosterior,hierPosterior$delta_each_dset)[1,2]
23 | #       tmp[j,4]=KLPostShrinkage(hierPosteriorKru,hierPosteriorKru$delta_each_dset)[1,2]
24 | #     }
25 | #     KLmatrix[i,] <- apply(tmp,MARGIN = 2, median)
26 |     
27 |     # plotPosteriorGGplot2(hierPosterior, hierPosteriorKru, hierPosteriorJua, suffix[i])
28 |     
29 |     #     currentDataFrame <-
30 |     #       as.data.frame(cbind(halfPosteriorGauss$nextDelta, halfPosteriorJua$nextDelta,halfPosterior$nextDelta,
31 |     #                           halfPosteriorKru$nextDelta))
32 |     #     logPredictive <- c (sum(halfPosteriorGauss$logPredictiveEachDset),sum(halfPosteriorJua$logPredictiveEachDset),
33 |     #                         sum(halfPosterior$logPredictiveEachDset), sum(halfPosteriorKru$logPredictiveEachDset))
34 |     #     currentDataFrame <- rbind(logPredictive,currentDataFrame)
35 |     if (i==1){
36 |       hierDataFrame <- currentHierDataFrame
37 |       # kruDataFrame <- currentKruDataFrame
38 |       # juaDataFrame <- currentJuaDataFrame
39 |       }
40 |     else{
41 |       hierDataFrame <- rbind(hierDataFrame,currentHierDataFrame)
42 |       # kruDataFrame <- rbind(kruDataFrame,currentKruDataFrame)
43 |       # juaDataFrame <- rbind(juaDataFrame,currentJuaDataFrame)
44 |     }
45 |   }
46 |     # colnames (dataFrame) <- c('gauss','jua','gc','kru')
47 |     colnames(hierDataFrame) <- c('left','rope','right')
48 |     # colnames(juaDataFrame) <- c('left','rope','right')
49 |     # colnames(kruDataFrame) <- c('left','rope','right')
50 |     rownames(hierDataFrame)<- suffix
51 |     # rownames(juaDataFrame)<- suffix
52 |     # rownames(kruDataFrame)<- suffix
53 |     
54 |     # df <- as.data.frame(cbind(hierDataFrame,kruDataFrame,juaDataFrame))
55 |     df <- as.data.frame(cbind(hierDataFrame))
56 |      roundedDF <- round(df,digits=2)
57 |     # write.table(roundedDF, file='csvResults/PredictionsHierKruJua.csv', sep=',')
58 |     write.table(roundedDF, file='csvResults/PredictionsHierAlpha05-5-Beta05-015.csv', sep=',')
59 |     # write.table(KLmatrix, file='csvResults/KLNormalJuaKruHier.csv', sep=',', col.names = TRUE)
60 |     
61 |   }


--------------------------------------------------------------------------------
/hierarchical/cv_nbc.R:
--------------------------------------------------------------------------------
  1 | cv_competing_nbc <- function(nruns,nfolds,n,theta_diff,theta_star,seed=0) {
  2 |   # [results]=cv_competing_nbc(nruns,nfolds,n,theta_diff,theta_star)
  3 |   #   implements cross-validation, given the number or runs, folds, 
  4 |   #   data set size (n) and theta. Compares 2 nbc which uses two different feature for the prediction .
  5 |   #   Theta determines the conditional prob of the feature while
  6 |   #   generating the data. As a consequence the expected accuracy of naive Bayes is
  7 |   #   theta1 and theta2 respectively for the two features. 
  8 |   #   theta1=1, namely the first feature is a copty of the class and yields a
  9 |   #   perfect classifier.
 10 |   #   theta2=1-theta_diff; theta2 is typically high and allows to correclty
 11 |   #   learnt the bias.
 12 |   #   Both classifiers are random guessers and detected significance are type I
 13 |   #   errors.
 14 |   #   cv_results contains:
 15 |   #     -nbc1 accuracy
 16 |   #     -nbc2 accuracy
 17 |   #     -delta = nbc1 accuracy - nbc2 accuracy
 18 |   #     -var=variance  (nbc1 accuracy - nbc2 accuracy)
 19 |   
 20 |   
 21 |   c_val <- function(nfolds) {
 22 |     cv <-list()
 23 |     cv$nbc1 <- matrix(1,nfolds);
 24 |     cv$nbc2 <- cv$nbc1;
 25 |     cv$delta <- cv$nbc1;
 26 |     cv$var <- cv$nbc1;
 27 |     #column of the feature in the data set for nbc1 and nbc2
 28 |     feat_idx_nbc1 <- 2;
 29 |     feat_idx_nbc2 <- 3;
 30 |     boring_idx <- rep(1:nfolds,ceiling(dim(data)[1]/nfolds));
 31 |     boring_idx <- boring_idx[1:dim(data)[1]];
 32 |     permuted_idx <- sample(dim(data)[1]);
 33 |     permuted_sorted_data <- data[permuted_idx,];
 34 |     permuted_sorted_data <- permuted_sorted_data[order(permuted_sorted_data[,1]),];
 35 |        
 36 |     for (j in 1:nfolds) {
 37 |       train <- permuted_sorted_data[which(boring_idx!=j),];
 38 |       test <- permuted_sorted_data[which(boring_idx==j),];
 39 |       #with the perfect feature
 40 |       nbc1 <- learn_nbc(train[,c(1,2)]);
 41 |       #with the stochastic feature
 42 |       nbc2 <- learn_nbc(train[,c(1,3)]);
 43 |       preds_nbc1 <- test_nbc(nbc1,test,feat_idx_nbc1);
 44 |       preds_nbc2 <- test_nbc(nbc2,test,feat_idx_nbc2);
 45 |       #acc_nbc
 46 |       nbc1_correct <- preds_nbc1==test[,1];
 47 |       cv$nbc1[j] <- mean(nbc1_correct);
 48 |       #acc_zero
 49 |       nbc2_correct <- preds_nbc2==test[,1];
 50 |       cv$nbc2[j] <- mean(nbc2_correct);
 51 |       cv$delta[j] <- mean(nbc1_correct-nbc2_correct);
 52 |       #var_delta_acc
 53 |       cv$var[j] <- var(nbc1_correct-nbc2_correct);
 54 |     }
 55 |     cv
 56 |   }
 57 |   
 58 |   
 59 |   theta1 <- theta_star;
 60 |   theta2 <- theta1-theta_diff;
 61 |   data <- generate_data_with_two_feats(n,theta1,theta2);
 62 |   
 63 |   
 64 |   for (i in 1:nruns) {
 65 |     if (i==1) {
 66 |       cv_results <- c_val(nfolds) 
 67 |     }
 68 |     else {
 69 |       tmp <- c_val(nfolds);
 70 |       cv_results$nbc1 <- c(cv_results$nbc1, tmp$nbc1);
 71 |       cv_results$nbc2 <- c(cv_results$nbc2, tmp$nbc2);
 72 |       cv_results$delta <- c(cv_results$delta, tmp$delta);
 73 |       cv_results$var <- c(cv_results$var, tmp$var);
 74 |     }
 75 |   }
 76 |   
 77 |  return (cv_results)
 78 |   
 79 | }
 80 | 
 81 | generate_data_with_two_feats <- function(n,theta1,theta2) {
 82 |   ##function data=generate_data(n,theta1,theta2)
 83 |   #generate n artificial instances with binary class and two binary features.
 84 |   #The bias of feature1 and feature2 is theta1 and theta2, namely P(f1|c1)=theta1;
 85 |   #P(~f1|~c1)=theta1; P(f2|c1)=theta2, P(~f2|~c1)=theta2.
 86 |   
 87 |   generate_feature <- function(theta) {
 88 |     #feature is obtained by flipping randomly (1-theta)# of class labels
 89 |     
 90 |     feature <- class-1;
 91 |     
 92 |     tmp <- runif(n)>theta;
 93 |     rand_idx <- tmp>theta;
 94 |     feature[tmp] <- 1-feature[tmp];
 95 |     
 96 |     feature+1
 97 |   }
 98 |   
 99 |   class <- 1*(runif(n)>0.5)+1;
100 |   
101 |   feature1 <- generate_feature(theta1);
102 |   feature2 <- generate_feature(theta2);
103 |   
104 |   data <- cbind(class,feature1,feature2);
105 |   
106 |   data
107 |   
108 | }
109 | 
110 | learn_nbc <- function(train) {
111 |   ##nbc=learn_nbc(train)
112 |   #learns naive Bayes (BDeu prior) from train data, assuming the class to be in first
113 |   #column and the feature to be in second column.
114 |   class_idx <- 1;
115 |   feat_idx <- 2;
116 |   
117 |   marg <- (sum(train[,class_idx]==1)+.5)/(dim(train)[1]+1);
118 |   marg <- c(marg,1-marg);
119 |   nbc <- list('marg'=marg);
120 |   
121 |   cond <- matrix(0,2,2)
122 |   cond[1,1] <- (sum(train[,class_idx]==1 & train[,feat_idx]==1) + .25) / (sum(train[,feat_idx]==1)+.5);
123 |   cond[2,1] <- 1-cond[1,1];
124 |   cond[1,2] <- (sum(train[,class_idx]==2 & train[,feat_idx]==1) + .25) / ( sum(train[,feat_idx]==2) +.5);
125 |   cond[2,2] <- 1-cond[1,2];
126 |   nbc$cond <- cond;
127 |   
128 |   nbc
129 | }
130 | 
131 | 
132 | test_nbc <- function(nbc,test,feat_idx) {
133 |   ##preds_nbc=test_nbc(nbc,test,feat_idx)
134 |   #Returns the most probable class predicted by naiveBayes for each instance of the test set.
135 |   #The test set is assumed to contain a binary class and a
136 |   #a binary feature.
137 |   
138 |   nargin <- length(as.list(match.call()))-1
139 |   if (nargin<3) feat_idx <- 2;
140 |   
141 |   preds_nbc <- matrix(1,dim(test)[1]);  
142 |   for (ii in 1:dim(test)[1]) {
143 |     current_feat <- test[ii,feat_idx];
144 |     prob <- c(nbc$marg[1]*nbc$cond[current_feat,1], nbc$marg[2]*nbc$cond[current_feat,2]);
145 |     if (prob[2]>prob[1]) { 
146 |       preds_nbc[ii] <- 2;
147 |     }
148 |   }
149 |   
150 |   preds_nbc
151 | }
152 | 


--------------------------------------------------------------------------------
/hierarchical/cvalFriedmanAccuracy.R:
--------------------------------------------------------------------------------
  1 | cvalFriedmanAccuracy <- function(friedmanType=1, reps=250) {
  2 |   #estimates  accuracy of  classifiers on data sets of the Friedman family via cross-validation 
  3 |   #(10 runs of 10 folds)
  4 |   #friedmanType is the friedman function (1,2,3) while reps is the number of repetitions
  5 |   #of the process data generation/cross-validation.
  6 |   #the family contains right onw 18 data sets.
  7 |   library(mlbench)
  8 |   library(caret)
  9 |   source('generateFriedmanData.R')
 10 |   source('genFriedmanSettings.R')
 11 |   source('hierarchical_test.R')
 12 |   
 13 |   
 14 |   if (friedmanType < 4){
 15 |     settings <- genFriedmanSettings(friedmanType)
 16 |     friedmanTypeVec <- rep (friedmanType,nrow(settings))
 17 |   }
 18 |   
 19 |   if (friedmanType == 4){
 20 |     settings <- rbind(genFriedmanSettings(1),genFriedmanSettings(2),genFriedmanSettings(3)) 
 21 |     friedmanTypeVec <- 
 22 |       cbind ( t(rep(1,nrow(genFriedmanSettings(1)))), t(rep(2,nrow(genFriedmanSettings(2)))),
 23 |               t(rep(3,nrow(genFriedmanSettings(3))))  )
 24 |     #friedman Type is a vector, containing the friedman Type of each setting
 25 |   }
 26 |   
 27 |   totExperiments <- dim(settings)[1] * reps
 28 |   mleDiffLdaCart <-  vector (length = totExperiments)
 29 |   hierDiffLdaCart <- vector (length = totExperiments)
 30 |   gaussDiffLdaCart <- vector (length = totExperiments)
 31 |   kruDiffLdaCart <- vector (length = totExperiments)
 32 |   juaDiffLdaCart <- vector (length = totExperiments)
 33 |   #knnAccuracy <-  vector (length = totExperiments)
 34 |   redundantFeats <- vector (length = totExperiments)
 35 |   sampleSize <- vector (length = totExperiments)
 36 |   friedmanSd <- vector (length = totExperiments)
 37 |   
 38 |   #10 runs of 10-folds cross-validation, hard-coded
 39 |   nRuns <- 10
 40 |   nFolds <- 10
 41 |   control <- trainControl(method = "repeatedcv", number=nFolds, repeats=nRuns)
 42 |   
 43 |   for (currentRep in 1:reps){
 44 |     cat('Repetition:', currentRep,'\n')
 45 |     crossValResults <-  matrix(nrow=dim(settings)[1], ncol = nRuns * nFolds);
 46 |     
 47 |     for (currentSetting in 1:dim(settings)[1]){
 48 |       
 49 |       #the following functions should generate the data according to the given setting
 50 |       data <- generateFriedmanData(friedmanTypeVec[currentSetting],settings[currentSetting,])
 51 |       
 52 |       #for simplicity we focus on the difference between  lda and cart
 53 |       #we need to set the seed in order to pair the folds
 54 |       #we also need the seed to be different between each execution
 55 |       currentSeed <- as.numeric(Sys.time())
 56 |       set.seed(currentSeed)
 57 |       
 58 |       fit.lda <- train(data$x, data$class, method="lda", trControl=control, tuneLength = 1)
 59 |       set.seed(currentSeed)
 60 |       fit.cart <- train(data$x, data$class, method="rpart1SE", trControl=control, tuneLength = 1)
 61 |       crossValResults[currentSetting,] <- fit.lda$resample$Accuracy - fit.cart$resample$Accuracy
 62 |       
 63 |       #we exploit the fact that results vectors are initially filled with FALSE
 64 |       #and we save the settings of the experiments (equal for all data sets)
 65 |       firstAvailable <- min (which (sampleSize == FALSE))
 66 |       redundantFeats[firstAvailable] <- settings[currentSetting,]$redundantFeats
 67 |       sampleSize[firstAvailable] <- settings[currentSetting,]$sampleSize
 68 |       friedmanSd[firstAvailable] <- settings[currentSetting,]$friedmanSd
 69 |       
 70 |       
 71 |     }
 72 |     currentMleDiffLdaCart <- apply (crossValResults, 1, mean )
 73 |     
 74 |     #at this point all the simulation for the given setting and repetitions have been done.
 75 |     stanFileName <- paste ('Stan',friedmanType, sep='')
 76 |     stanResults <- hierarchical.test(x = crossValResults, rho = 1/nFolds, sample_file = stanFileName, chains=4)
 77 |     currentHierDiffLdaCart <- stanResults$delta_each_dset
 78 |     
 79 | #     #Gaussian hierarchical model.
 80 | #     stanGaussianResults <-  hierarchical.test(x = crossValResults, rho = 1/nFolds, sample_file = stanFileName, chains=4, samplingType = "gaussian")
 81 | #     currentHierGaussDiffLdaCart <- stanGaussianResults$delta_each_dset
 82 | #     
 83 | #     #Kru prior
 84 | #     kruResults <-  hierarchical.test(x = crossValResults, rho = 1/nFolds, sample_file = stanFileName, chains=4, samplingType = "studentKruschke")
 85 | #     currentHierKruDiffLdaCart <- kruResults$delta_each_dset
 86 | #     
 87 | #     #Jua prior
 88 | #     juaResults <-  hierarchical.test(x = crossValResults, rho = 1/nFolds, sample_file = stanFileName, chains=4, samplingType = "studentJuanez")
 89 | #     currentHierJuaDiffLdaCart <- juaResults$delta_each_dset
 90 |     
 91 |     #we exploit the fact that both vectors are initially filled with FALSE
 92 |     firstAvailable <- min (which (mleDiffLdaCart == FALSE))
 93 |     mleDiffLdaCart  [ firstAvailable : (firstAvailable + length(currentMleDiffLdaCart) -1 ) ] <- currentMleDiffLdaCart
 94 |     hierDiffLdaCart [ firstAvailable : (firstAvailable + length(currentHierDiffLdaCart) -1) ] <- currentHierDiffLdaCart
 95 | #     gaussDiffLdaCart [ firstAvailable : (firstAvailable + length(currentHierGaussDiffLdaCart) -1) ] <- currentHierGaussDiffLdaCart
 96 | #     kruDiffLdaCart [ firstAvailable : (firstAvailable + length(currentHierKruDiffLdaCart) -1) ] <- currentHierKruDiffLdaCart
 97 | #     juaDiffLdaCart [ firstAvailable : (firstAvailable + length(currentHierJuaDiffLdaCart) -1) ] <- currentHierJuaDiffLdaCart
 98 |     
 99 |   }
100 |   
101 |   #at this points we save the result to file
102 |   csvFilename <- paste('csvResults/cvalAccFriedman',friedmanType,".csv",sep='')
103 |   # results <- data.frame(redundantFeats, sampleSize, friedmanSd, mleDiffLdaCart, hierDiffLdaCart,gaussDiffLdaCart,kruDiffLdaCart,juaDiffLdaCart)
104 |   results <- data.frame(redundantFeats, sampleSize, friedmanSd, mleDiffLdaCart, hierDiffLdaCart)
105 |   write.matrix(results,file=csvFilename, sep=",")
106 |   
107 | }


--------------------------------------------------------------------------------
/hierarchical/cvalFriedmanPredictive.R:
--------------------------------------------------------------------------------
  1 | cvalFriedmanPredictive <- function(reps=50) {
  2 |   #estimates  accuracy of  classifiers on data sets of the Friedman family via cross-validation 
  3 |   #(10 runs of 10 folds)
  4 |   #we jointly run all the three Friedman families. 
  5 |   #tracks the MAE of MLE and of different variants of hier models on the delta_i
  6 |   #tracks MAE on delta0 and log_loss on P(delta) belonging to left, right, rope 
  7 |   #in the current version it only runs the hier version, no Gauss no Kurschke
  8 |   library(mlbench)
  9 |   library(caret)
 10 |   source('generateFriedmanData.R')
 11 |   source('genFriedmanSettings.R')
 12 |   source('hierarchical_test.R')
 13 |   source('selectTrainSettings.R')
 14 |   source('Utils.R')
 15 |   
 16 |   settings <- rbind(genFriedmanSettings(1),genFriedmanSettings(2),genFriedmanSettings(3)) 
 17 |   friedmanTypeVec <- 
 18 |     cbind ( t(rep(1,nrow(genFriedmanSettings(1)))), t(rep(2,nrow(genFriedmanSettings(2)))),
 19 |             t(rep(3,nrow(genFriedmanSettings(3))))  )
 20 |   actuals <- getActuals()
 21 |   
 22 |   totExperiments <- dim(settings)[1] * reps
 23 |   mleMaeTrainDeltaI <-  vector (length = reps)
 24 |   hierMaeTrainDeltaI <- vector (length = reps)
 25 |   gaussMaeTrainDeltaI <- vector (length = reps)
 26 |   kruMaeTrainDeltaI <- vector (length = reps)
 27 |   hierMaeDelta0 <- vector (length = reps)
 28 |   gaussMaeDelta0 <- vector (length = reps)
 29 |   kruMaeDelta0 <- vector (length = reps)
 30 |   hierKLDeltaI <- vector (length = reps)
 31 |   gaussKLDeltaI <- vector (length = reps)
 32 |   kruKLDeltaI <- vector (length = reps)
 33 |   # gaussModels <- list()
 34 |   hierModels <- list()
 35 |   trainData <- list()
 36 |   # kruModels <- list()
 37 |   # trainData <- list()
 38 |   
 39 |   
 40 |   #10 runs of 10-folds cross-validation, hard-coded
 41 |   nRuns <- 10
 42 |   nFolds <- 10
 43 |   control <- trainControl(method = "repeatedcv", number=nFolds, repeats=nRuns)
 44 |   
 45 |   for (currentRep in 1:reps){
 46 |     set.seed(currentRep)
 47 |     cat('Repetition:', currentRep,'/',reps,'\n')
 48 |     trainIdx <- selectTrainSettings(friedmanTypeVec)
 49 |     crossValResults <-  matrix(nrow=length(trainIdx), ncol = nRuns * nFolds)
 50 |     trainSettings <- settings[trainIdx,]
 51 |     trainFriedmanTypeVec <- friedmanTypeVec[trainIdx] 
 52 |     
 53 |     
 54 |     for (currentSetting in 1:nrow(trainSettings)){
 55 |       
 56 |       #the following functions should generate the data according to the given setting
 57 |       data <- generateFriedmanData(trainFriedmanTypeVec[currentSetting],trainSettings[currentSetting,])
 58 |       
 59 |       #for simplicity we focus on the difference between  lda and cart
 60 |       #we need to set the seed in order to pair the folds
 61 |       #we also need the seed to be different between each execution
 62 |       set.seed(currentRep)
 63 |       fit.lda <- train(data$x, data$class, method="lda", trControl=control, tuneLength = 1)
 64 |       set.seed(currentRep)
 65 |       fit.cart <- train(data$x, data$class, method="rpart1SE", trControl=control, tuneLength = 1)
 66 |       crossValResults[currentSetting,] <- fit.lda$resample$Accuracy - fit.cart$resample$Accuracy
 67 |     }
 68 |     currentMleDiffLdaCart <- apply (crossValResults, 1, mean )
 69 |     
 70 |     #at this point all the simulation for the given setting and repetitions have been done.
 71 |     stanFileName <- paste ('StanHier', sep='')
 72 |     alphaBeta = list('lowerAlpha' =0.5,'upperAlpha'= 3,'lowerBeta' = 0.005,'upperBeta' = 0.05)
 73 |     #infer the different models
 74 |     hierModel <- hierarchical.test (x = crossValResults, sample_file = stanFileName,  chains=4, samplingType = "student", alphaBeta = alphaBeta)
 75 |     # gaussModel <-  hierarchical.test(x = crossValResults, sample_file = stanFileName, chains=4, samplingType = "gaussian")
 76 |     # kruModel <-  hierarchical.test(x = crossValResults, sample_file = stanFileName, chains=4, samplingType = "studentKruschke")
 77 |     
 78 |     #track the error on Delta0
 79 |     stdX <- hierModel$stdX
 80 |     # stopifnot(hierModel$stdX == gaussModel$stdX)
 81 |     # stopifnot(hierModel$stdX == kruModel$stdX)
 82 |     hierMaeDelta0[currentRep]<- abs(actuals$delta0 - mean (hierModel$stanResults$delta0 *stdX))
 83 |     # gaussMaeDelta0[currentRep] <- abs(actuals$delta0 - mean (gaussModel$stanResults$delta0 *stdX)) 
 84 |     # kruMaeDelta0[currentRep] <- abs(actuals$delta0 - mean (kruModel$stanResults$delta0 *stdX))
 85 |     
 86 |     #track the  KL on prob of DeltaI
 87 |     hierKLDeltaI[currentRep] <- KL(hierModel,actuals)
 88 |     # gaussKLDeltaI[currentRep] <- KL(gaussModel,actuals,sampling='gaussian')
 89 |     # kruKLDeltaI[currentRep] <- KL(kruModel,actuals)
 90 |     
 91 |     #track the mae on DeltaI of the dsets analyzed via CV
 92 |     actualTrainDeltaI <- getActualTrainDeltaI (actuals, trainSettings)
 93 |     mleMaeTrainDeltaI[currentRep] <- mean (abs (actualTrainDeltaI - currentMleDiffLdaCart)       )
 94 |     hierMaeTrainDeltaI[currentRep] <- mean (abs (actualTrainDeltaI - hierModel$delta_each_dset)  ) 
 95 |     # gaussMaeTrainDeltaI[currentRep]     <-  mean (abs (actualTrainDeltaI - gaussModel$delta_each_dset) )
 96 |     # kruMaeTrainDeltaI[currentRep]       <-  mean (abs (actualTrainDeltaI - kruModel$delta_each_dset)   )
 97 |     
 98 |     #store results for later analysis
 99 |     currentTrainList <- list('crossValResults'=crossValResults,'currentMleDiffLdaCart'=currentMleDiffLdaCart,'actualTrainDeltaI'=actualTrainDeltaI,'trainIdx'=trainIdx)
100 |     trainData[[currentRep]] <-  as.list(currentTrainList)
101 |     hierModel$stanResults$delta <- NULL #to avoid storing too much stuff
102 |     hierModels[[currentRep]] <-  as.list(hierModel)
103 |     # gaussModels <- list(gaussModels, gaussModel)
104 |     # kruModels <- list(kruModels,kruModel)
105 |     
106 |     #save tmp results
107 |     tmpFilename <- paste('Rdata/cvalFriedmanPredictiveTMP',currentRep,'.Rdata',sep = '')
108 |     #     save (trainData, gaussModels, hierModels, kruModels, 
109 |     #           mleMaeTrainDeltaI, hierMaeTrainDeltaI, gaussMaeTrainDeltaI, kruMaeTrainDeltaI, 
110 |     #           hierMaeDelta0, gaussMaeDelta0, kruMaeDelta0, hierKLDeltaI, gaussKLDeltaI, 
111 |     #           kruKLDeltaI, file = tmpFilename)  
112 |     
113 |     save (trainData, hierModels, mleMaeTrainDeltaI, hierMaeTrainDeltaI,
114 |           hierMaeDelta0, hierKLDeltaI, file = tmpFilename)  
115 |     
116 |   }
117 |   
118 |   maeTrainDeltaI <- list(
119 |     mleMaeTrainDeltaI=mleMaeTrainDeltaI,
120 |     hierMaeTrainDeltaI=hierMaeTrainDeltaI
121 |     # gaussMaeTrainDeltaI=gaussMaeTrainDeltaI,
122 |     # kruMaeTrainDeltaI=kruMaeTrainDeltaI
123 |   )
124 |   
125 |   # KLDelta <- list(hierKLDeltaI=hierKLDeltaI,gaussKLDeltaI=gaussKLDeltaI,kruKLDeltaI=kruKLDeltaI)
126 |   # maeDelta0 <- list(hierMaeDelta0=hierMaeDelta0, gaussMaeDelta0=gaussMaeDelta0, kruMaeDelta0=kruMaeDelta0)
127 |   KLDelta <- list(hierKLDeltaI=hierKLDeltaI)
128 |   maeDelta0 <- list(hierMaeDelta0=hierMaeDelta0)
129 |   
130 |   powerNextDelta<-0
131 |   powerSignedRank<-0
132 |   lengthCIdelta0 <- 0
133 |   calibrationDelta0<-0
134 |   probNextDeltaLeft<-0
135 |   probNextDeltaRope<-0
136 |   probNextDeltaRight<-0
137 |   for (i in 1:length(hierModels)){
138 |     calibrationDelta0 = calibrationDelta0 + as.numeric((actuals$delta0 > hierModels[[i]]$delta0$HDPlower) & (actuals$delta0 < hierModels[[i]]$delta0$HDPupper))
139 |     lengthCIdelta0 <- lengthCIdelta0 + hierModels[[i]]$delta0$HDPupper - hierModels[[i]]$delta0$HDPlower
140 |     powerNextDelta = powerNextDelta + as.numeric(hierModels[[i]]$nextDelta$right > .95) 
141 |     probNextDeltaLeft <- probNextDeltaLeft + hierModels[[i]]$nextDelta$left
142 |     probNextDeltaRope <- probNextDeltaRope + hierModels[[i]]$nextDelta$rope
143 |     probNextDeltaRight <- probNextDeltaRight + hierModels[[i]]$nextDelta$right
144 |     powerSignedRank <- powerSignedRank + as.numeric(wilcox.test(trainData[[i]]$currentMleDiffLdaCart)$p.value < 0.05)
145 |   }
146 |   
147 |   averageResults <- list ('calibration'=calibrationDelta0/reps, 
148 |                           'delta0CI'=lengthCIdelta0/reps, 
149 |                           'powerNextDelta'=powerNextDelta/reps, 
150 |                           'probNextDeltaLeft'=probNextDeltaLeft/reps, 
151 |                           'probNextDeltaRope'=probNextDeltaRope/reps, 
152 |                           'probNextDeltaRight'=probNextDeltaRight/reps,
153 |                           'powerSignedRank'=powerSignedRank/reps)
154 |   
155 |   
156 |   #at this points we save the result to file
157 |   filename <- 'Rdata/cvalFriedmanPredictive.Rdata'
158 |   save (trainData,  hierModels, maeTrainDeltaI,KLDelta,maeDelta0, averageResults, actuals, file = filename)  
159 | }


--------------------------------------------------------------------------------
/hierarchical/cvalFriedmanPredictivePlots.R:
--------------------------------------------------------------------------------
  1 | cvalFriedmanPredictivePlots <- function(reps=1) {
  2 |   #run one single runs of cross-validation producing 
  3 |   #scatterplots of mle and hier estimates,
  4 |   #and comparison of the actual and estimated densities of delta_i
  5 |   #estimates  accuracy of  classifiers on data sets of the Friedman family via cross-validation 
  6 |   #(10 runs of 10 folds)
  7 |   #we jointly run all the three Friedman families. 
  8 |   #tracks the MAE of MLE and of different variants of hier models on the delta_i
  9 |   #tracks MAE on delta0 and log_loss on P(delta) belonging to left, right, rope 
 10 |   library(mlbench)
 11 |   library(caret)
 12 |   library(tikzDevice)
 13 |   source('generateFriedmanData.R')
 14 |   source('genFriedmanSettings.R')
 15 |   source('hierarchical_test.R')
 16 |   source('selectTrainSettings.R')
 17 |   source('Utils.R')
 18 |   
 19 |   settings <- rbind(genFriedmanSettings(1),genFriedmanSettings(2),genFriedmanSettings(3)) 
 20 |   friedmanTypeVec <- 
 21 |     cbind ( t(rep(1,nrow(genFriedmanSettings(1)))), t(rep(2,nrow(genFriedmanSettings(2)))),
 22 |             t(rep(3,nrow(genFriedmanSettings(3))))  )
 23 |   actuals <- getActuals()
 24 |   
 25 |   totExperiments <- dim(settings)[1] * reps
 26 |   mleMaeTrainDeltaI <-  vector (length = reps)
 27 |   hierMaeTrainDeltaI <- vector (length = reps)
 28 |   hierMaeDelta0 <- vector (length = reps)
 29 |   hierKLDeltaI <- vector (length = reps)
 30 |   hierModels <- list()
 31 |   trainData <- list()
 32 |   set.seed(42)
 33 |   
 34 |   
 35 |   #10 runs of 10-folds cross-validation, hard-coded
 36 |   nRuns <- 10
 37 |   nFolds <- 10
 38 |   control <- trainControl(method = "repeatedcv", number=nFolds, repeats=nRuns)
 39 |   
 40 |   for (currentRep in 1:reps){
 41 |     set.seed(currentRep)
 42 |     cat('Repetition:', currentRep,'/',reps,'\n')
 43 |     trainIdx <- selectTrainSettings(friedmanTypeVec)
 44 |     crossValResults <-  matrix(nrow=length(trainIdx), ncol = nRuns * nFolds)
 45 |     trainSettings <- settings[trainIdx,]
 46 |     trainFriedmanTypeVec <- friedmanTypeVec[trainIdx] 
 47 |     
 48 |     
 49 |     for (currentSetting in 1:nrow(trainSettings)){
 50 |       
 51 |       #the following functions should generate the data according to the given setting
 52 |       data <- generateFriedmanData(trainFriedmanTypeVec[currentSetting],trainSettings[currentSetting,])
 53 |       
 54 |       #for simplicity we focus on the difference between  lda and cart
 55 |       #we need to set the seed in order to pair the folds
 56 |       #we also need the seed to be different between each execution
 57 |       set.seed(currentRep)
 58 |       fit.lda <- train(data$x, data$class, method="lda", trControl=control, tuneLength = 1)
 59 |       set.seed(currentRep)
 60 |       fit.cart <- train(data$x, data$class, method="rpart1SE", trControl=control, tuneLength = 1)
 61 |       crossValResults[currentSetting,] <- fit.lda$resample$Accuracy - fit.cart$resample$Accuracy
 62 |     }
 63 |     currentMleDiffLdaCart <- apply (crossValResults, 1, mean )
 64 |     
 65 |     #at this point all the simulation for the given setting and repetitions have been done.
 66 |     stanFileName <- paste ('StanHier', sep='')
 67 |     alphaBeta = list('lowerAlpha' =0.5,'upperAlpha'= 3,'lowerBeta' = 0.005,'upperBeta' = 0.05)
 68 |     hierModel <- hierarchical.test (x = crossValResults, sample_file = stanFileName,  chains=4, samplingType = "student", alphaBeta = alphaBeta)
 69 |     
 70 |     stdX <- hierModel$stdX
 71 |     hierMaeDelta0[currentRep]<- abs(actuals$delta0 - mean (hierModel$stanResults$delta0 *stdX))
 72 |     hierKLDeltaI[currentRep] <- KL(hierModel,actuals)
 73 |     
 74 |     #track the mae on DeltaI of the dsets analyzed via CV
 75 |     actualTrainDeltaI <- getActualTrainDeltaI (actuals, trainSettings)
 76 |     mleMaeTrainDeltaI[currentRep] <- mean (abs (actualTrainDeltaI - currentMleDiffLdaCart)       )
 77 |     hierMaeTrainDeltaI[currentRep] <- mean (abs (actualTrainDeltaI - hierModel$delta_each_dset)  ) 
 78 |     
 79 |     #store results for later analysis
 80 |     currentTrainList <- list('crossValResults'=crossValResults,'currentMleDiffLdaCart'=currentMleDiffLdaCart,'actualTrainDeltaI'=actualTrainDeltaI)
 81 |     trainData <- list (trainData,  currentTrainList, actualTrainDeltaI)
 82 |     hierModels <- list( hierModels, hierModel)
 83 |     
 84 |     df<- data.frame('actual'=actualTrainDeltaI,'MLE'=currentMleDiffLdaCart,
 85 |                     'Hier'=hierModel$delta_each_dset)
 86 |     fileName <- 'plots/scatterHierMle.csv'
 87 |     write.matrix(df,file = fileName)
 88 |         
 89 | #     yLimit=c(min(currentMleDiffLdaCart),max(currentMleDiffLdaCart))
 90 | #     xLimit <- yLimit
 91 | #     #boxplot of the session-average results
 92 | #      tikz("plots/scatterHierActual.tex", width= 3, height=1.5)
 93 | #     plot(actualTrainDeltaI,hierModel$delta_each_dset, 
 94 | #          xlab='Actual', ylab= 'Hier estimate',xlim=xLimit, ylim=yLimit)
 95 | #     abline(0,1)
 96 | #      dev.off()
 97 | #     
 98 | #     tikz("plots/scatterHierMle.tex", width= 3, height=1.5)
 99 | #     plot(actualTrainDeltaI,currentMleDiffLdaCart, 
100 | #          xlab='Actual', ylab= 'MLE estimate', xlim=xLimit, ylim=yLimit)
101 | #     abline(0,1)
102 | #     dev.off()
103 |     
104 |     #save results to file
105 |     tmpFilename <- paste('cvalFriedmanPredictivePlot',currentRep,'.Rdata',sep = ',')
106 |     save (trainData, hierModels,  
107 |           mleMaeTrainDeltaI, hierMaeTrainDeltaI, 
108 |           hierMaeDelta0, hierKLDeltaI, 
109 |           file = tmpFilename) 
110 |     
111 |     #now plot the posterior
112 |     postSamples <- length(hierModel$stanResults$delta0)
113 |     sampleDelta <- vector (length = postSamples)
114 |     std <- hierModel$stanResults$std0
115 |     mu  <- hierModel$stanResults$delta0
116 |     nu  <- hierModel$stanResults$nu
117 |     deltaShr <- hierModel$delta_each_dset
118 |     for (r in 1:postSamples){
119 |       sampleDelta[r] <- 1000
120 |       while (abs(sampleDelta[r]) > 1) {
121 |         sampleDelta[r]   <- (rt (1, nu[r]) * std[r] + mu[r]) * hierModel$stdX
122 |       }
123 |     }
124 |     deltaShr <- hierModel$delta_each_dset
125 |     d1 <- density(deltaShr, from = 2*min(deltaShr), to=2*max(deltaShr), n = 512 )
126 |     d2 <- density(sampleDelta, from = 2*min(deltaShr), to=2*max(deltaShr), n = 512 )
127 |     
128 |     filename <- paste('plots/densityShrHier.pdf')
129 |     pdf(file=filename)
130 |     plot(d1, col=1, xlim = c(-.1,.1))
131 |     lines(d2,col=2)
132 |     legend(0.02,10,legend=c('shr','hier'), lty=c(1,1),col=c(1,2))
133 |     dev.off()
134 |   }
135 |   
136 |   maeTrainDeltaI <- list(
137 |     mleMaeTrainDeltaI=mleMaeTrainDeltaI,
138 |     hierMaeTrainDeltaI=hierMaeTrainDeltaI,
139 |     kruMaeTrainDeltaI=kruMaeTrainDeltaI)
140 |   
141 |   KLDelta <- list(hierKLDeltaI=hierKLDeltaI,kruKLDeltaI=kruKLDeltaI)
142 |   maeDelta0 <- list(hierMaeDelta0=hierMaeDelta0,kruMaeDelta0=kruMaeDelta0)
143 |   #at this points we save the result to file
144 |   filename <- 'Rdata/cvalFriedmanPredictive.Rdata'
145 |   save (trainData, hierModels, kruModels, maeTrainDeltaI,KLDelta,maeDelta0,file = filename)  
146 | }


--------------------------------------------------------------------------------
/hierarchical/genFriedmanSettings.R:
--------------------------------------------------------------------------------
 1 | genFriedmanSettings <- function (friedmanType=1) {
 2 |   
 3 |   #generate epxerimental settings for Friedman family:
 4 |   #levels of sample size, std dev, redundant features and the discretization threshold.
 5 |   #settings to be expanded later
 6 |   source('generateFriedmanData.R')
 7 |   redundantFeats <- c (0,20)
 8 |   sampleSize <- c(30, 100, 1000)
 9 |   
10 |   if (friedmanType==1) {
11 |     friedmanSd <- c(0.5, 1, 2) 
12 |   }
13 |   else  if (friedmanType==2) {
14 |     friedmanSd <- c(62.5, 125, 250) 
15 |   }
16 |   else  if (friedmanType==3) {
17 |     friedmanSd <- c(0.05, 0.1, 0.2) 
18 |   }
19 |   
20 |   
21 |   #data frame containing all the experimental setups
22 |   #settings and testSettings differ as for the sampleSize, which in trainSetginds varies while in testSettings is always 1000.
23 |   settings <- expand.grid(redundantFeats=redundantFeats,sampleSize=sampleSize,friedmanSd=friedmanSd)
24 |   # settings$friedmanType <- rep(friedmanType, length(settings$redundantFeats))
25 |   #now we need to accurately estimate the discr threshold.
26 |   #to this end we generate 10000 samples .The threshold is equal for
27 |   #all settings of the same Friedman function, as it is not affected by sample size,
28 |   #redundant Feats or noise, as it is the median of y.
29 |   
30 |   # freezing the seed is necessary to produce the same threshold for cross-validation
31 |   # and actualFriedmanAccuracy experiments 
32 |   
33 |   set.seed(42)
34 |   tmpSettings <- settings[1,]
35 |   tmpSettings$sampleSize <- 10000
36 |   data <- generateFriedmanData(friedmanType,tmpSettings)
37 |   threshold <- median (data$y)
38 |   settings$threshold <- rep(threshold,dim(settings)[1])
39 |   return (settings)
40 | }


--------------------------------------------------------------------------------
/hierarchical/generateFriedmanData.R:
--------------------------------------------------------------------------------
 1 | generateFriedmanData <- function (friedmanType,settings){
 2 |   library(mlbench)
 3 |   
 4 |   if (friedmanType==1) {
 5 |     data <- mlbench.friedman1(settings$sampleSize, sd=settings$friedmanSd)
 6 |   }
 7 |   else if (friedmanType==2) {
 8 |     data <- mlbench.friedman2(settings$sampleSize, sd=settings$friedmanSd)
 9 |   }
10 |   else if (friedmanType==3) {
11 |     data <- mlbench.friedman3(settings$sampleSize, sd=settings$friedmanSd)
12 |   }
13 |   
14 |   #add noisy features, generating first random values
15 |   #from a standard normal and then reshaping them
16 |   if (settings$redundantFeats > 0) {
17 |     d <- rnorm(settings$sampleSize * settings$redundantFeats)
18 |     dMatrix <- matrix(d, nrow=settings$sampleSize, ncol = settings$redundantFeats)
19 |     data$x <- cbind( data$x, dMatrix) 
20 |   }
21 | 
22 |   #generate the class variable by discretizing on the threshold
23 |   data$class <- as.factor ( data$y > settings$threshold)
24 |   
25 |   return ( data )
26 |   
27 | }


--------------------------------------------------------------------------------
/hierarchical/hierUciAnalysis.R:
--------------------------------------------------------------------------------
 1 | hierUciAnalysis <- function (class1, class2){
 2 |   #class1 and class2 are the two classifier being compared
 3 |   #'naive Bayes','aode','hnb','j48','j48_grafted' are coded as 1,2,3,4,5 respectively
 4 |   #'e.g. to compare naive Bayes and aode: sensitivityNormalStudent(1,2)
 5 |   #infers the hierarchical model on the results of classifiers class1 and class2, as loaded from data.
 6 |   
 7 |   library(rstan)
 8 |   rstan_options(auto_write = TRUE)
 9 |   options(mc.cores = parallel::detectCores())
10 |   source ("hierarchical_test.R")
11 |   source ("logPredictive.R")
12 |   source ("Utils.R")
13 |   
14 |   #this workspace needs to be there
15 |   load("uci_data.RData")
16 |   nFolds <- max (uci_classification$Key.Fold)
17 |   rho=1/nFolds
18 |   #this foldID goes between 1 and 100
19 |   foldID <- uci_classification$Key.Run*10+uci_classification$Key.Fold-10
20 |   rope_min  <- -0.01
21 |   rope_max  <- 0.01
22 |   
23 |   #prepare the data for  the hierarchical test
24 |   results <- data.frame (classifierID = uci_classification$ClassifierID, 
25 |                          dsetID=uci_classification$DatasetID, 
26 |                          accuracy=uci_classification$Percent.correct, 
27 |                          fold=foldID)
28 |   
29 |   
30 |   
31 |   diffResults  <- results[results$classifierID==class1,]
32 |   results2  <- results[results$classifierID==class2,]
33 |   stopifnot( mean (diffResults$dsetID==results2$dsetID) ==1)
34 |   diffResults$diff <- diffResults$accuracy - results2$accuracy
35 |   
36 |   
37 |   #build matrix of results to be parsed by hierarchical test
38 |   howManyDsets <- max(diffResults$dsetID)
39 |   x<-matrix(ncol  = max(foldID), nrow =  howManyDsets)
40 |   
41 |   for (dsetIdx in 1:howManyDsets) {
42 |     tmp <- diffResults$diff[diffResults$dsetID == dsetIdx] 
43 |     x[dsetIdx,]  <- t (tmp)
44 |   }
45 |   
46 |   
47 |   
48 |   #those lines if you want to infer the hierarchical model using Gamma prior on the degrees of freedom,
49 |   #as sensitivity analysis
50 |   
51 |   # simulationID <- paste('class',class1,'class',class2,"Kruschke",sep ='')
52 |   # hierPosteriorKru <- hierarchical.test (x=x,rho=rho,samplingType = "studentKruschke",rope_min = rope_min,
53 |   # rope_max = rope_max,std_upper_bound = stdUpperBound,chains = chains,sample_file = simulationID)
54 |   
55 |   # simulationID <- paste('class',class1,'class',class2,"Juanez",sep ='')
56 |   # hierPosteriorJua <- hierarchical.test (x=x,rho=rho,samplingType = "studentJuanez",rope_min = rope_min,
57 |   # rope_max = rope_max,std_upper_bound = stdUpperBound,chains = chains,sample_file = simulationID)
58 |   
59 |   simulationID <- paste('class',class1,'class',class2,"GCsens",sep ='')
60 |   
61 |   
62 |   #novel setup
63 |   #this setup of parameters works well in most cases, providing generally better fit than 
64 |   #a single gamma prior on nu 
65 |   alphaBeta = list('lowerAlpha' =0.5,'upperAlpha'= 5,'lowerBeta' = .05,'upperBeta' = .15)
66 |   hierPosterior <- hierarchical.test (x = x,sample_file = simulationID,samplingType = "student", alphaBeta = alphaBeta)
67 |   
68 |   fileName <- paste('Rdata/hierUciAnalysis',class1,class2,'.Rdata', sep='')
69 |   save (hierPosterior, file = fileName)  
70 |   
71 | }


--------------------------------------------------------------------------------
/hierarchical/hierarchical_test.R:
--------------------------------------------------------------------------------
  1 | hierarchical.test <- function(x, sample_file, samplingType="student", 
  2 |                               alphaBeta = list('lowerAlpha' =0.5,'upperAlpha'= 5,'lowerBeta' = 0.05,'upperBeta' = .15),
  3 |                               rho = 0.1, rope_min=-0.01, rope_max=0.01, std_upper_bound=1000, chains=4)
  4 |   
  5 |   #The meaning of the other parameters is as follows:
  6 |   #x is a matrix  of results. It contains the difference of accuracy between the two classifiers, on each fold of cross validation, 
  7 |   #for each data set. Its dimension are nrows = # of dsets, ncols = number of folds. 
  8 |   #Hence if you ran 10-folds cross-validation on 25 data sets, the matrix should be 25 x 10.
  9 |   #If you ran 10 runs of 10-folds cross-validation on 25 data sets, the matrix should be 25 x 100.
 10 |   #
 11 |   #sample_file is where Stan will store its simulation results.
 12 |   #
 13 |   #samplingType describes the distribution from which the \delta_i are sample. We obtained the best results with the Student distribution, 
 14 |   #setting a prior on its degrees of freedom (\nu). This correspond to the default choice "student". 
 15 |   #
 16 |   #alphaBeta is a lista containing upper and lower bounds  for the values of alpha and beta, which are the parameters
 17 |   #of the prior on the degrees of freedom (\nu): \vu ~ Gamma (alpha, beta); alpha ~ unif (lowerAlpha,upperAlpha);
 18 |   #beta ~ unif (lowerBeta,upperBeta);
 19 |   #
 20 |   #rho is the correlation between the different results of cross-validation. In the literature, it is recommended to be set
 21 |   #to rho=1/(number of folds), hence the default of 1/10
 22 |   #
 23 |   #rope_min and rope_max are the lower and upper bounds of the rope. Assuming the accuracy to lie between 0 and 1, the default values
 24 |   #of rope we recommend are +- 0.01
 25 |   #
 26 |   #std_upper_bound is a large number to ensure that the prior on sigma has a high upper bound.
 27 |   #
 28 |   #chains is the number of MCMC chains to be run by Stan in parallel
 29 |   
 30 |   
 31 | {
 32 |                               
 33 |   
 34 |   rstan_options(auto_write = TRUE)
 35 |   options(mc.cores = parallel::detectCores())
 36 |   library(matrixcalc)
 37 |   library(matrixStats)
 38 |   library(rstan)
 39 |   #for sampling from non-standardized t distribution
 40 |   library(metRology)
 41 |   
 42 |   #------------------------------------------------------------------------------- 
 43 |   if ((max(x))>1 & rope_max < 0.02) {
 44 |     stop('value of rope_max  not compatible with scale of provided x')
 45 |   }
 46 |   sample_file <- paste('stanOut/',sample_file,sep='')
 47 |   Nsamples <- dim(x)[2]
 48 |   q <- dim(x)[1]
 49 |   sample_file <- paste(sample_file,".StanOut",sep='')
 50 |   
 51 |   
 52 |   #data rescaling, to have homogenous scale among all dsets
 53 |   stdX <- mean(rowSds(x)) #we scale all the data by the mean of the standard deviation of data sets
 54 |   x <- x/stdX
 55 |   rope_min <- rope_min/stdX
 56 |   rope_max <- rope_max/stdX
 57 |   
 58 |   #search for data sets with 0 variance, which sometimes are there.
 59 |   zeroVarIdx <- which (rowSds(x) == 0)
 60 |   if ( length(zeroVarIdx) > 0) {
 61 |     #to each dset with zero variance we add a gausian noise with mean 0 and very small std dev
 62 |     #this way we preserve the mean of the original data while obtaining a positive std dev
 63 |     for (i in 1:length(zeroVarIdx)){
 64 |       noise <- runif(Nsamples/2,rope_min,rope_max)
 65 |       x[zeroVarIdx[i],1:(Nsamples/2)] <- x[zeroVarIdx[i],1:(Nsamples/2)] + noise;
 66 |       x[zeroVarIdx[i],(Nsamples/2+1):Nsamples] <- x[zeroVarIdx[i],(Nsamples/2+1):Nsamples] - noise;
 67 |     } 
 68 |   }
 69 |   
 70 |   if (q>1) {
 71 |     std_among = sd(rowMeans(x))
 72 |   } else {
 73 |     #to manage the particular case q=1
 74 |     std_among = mean(rowSds(x))
 75 |   }
 76 |   
 77 |   std_within <- mean(rowSds(x))
 78 |   
 79 |   dataList = list(
 80 |     deltaLow = -max(abs(x)),
 81 |     deltaHi = max(abs(x)),
 82 |     stdLow = 0,
 83 |     stdHi = std_within*std_upper_bound,
 84 |     std0Low = 0,
 85 |     std0Hi = std_among*std_upper_bound,
 86 |     Nsamples = Nsamples,
 87 |     q = q ,
 88 |     x = x ,
 89 |     rho = rho,
 90 |     upperAlpha = alphaBeta$upperAlpha,
 91 |     lowerAlpha = alphaBeta$lowerAlpha,
 92 |     upperBeta = alphaBeta$upperBeta,
 93 |     lowerBeta = alphaBeta$lowerBeta
 94 |   )
 95 |   
 96 |   
 97 |   #this calls the Student with learnable dofs, which is the default
 98 |   if (samplingType=="student") {
 99 |     stanfit <-  stan(file = 'stan/hierarchical-t-test.stan', data = dataList,sample_file=sample_file, chains=chains)
100 |   }
101 |   
102 |   #this calls the Student with priors on the dof as in Kruschke, Gamma(1,0.0345). This is referred to a shifted exponential in his paper,
103 |   #this Gamma is however very close to  it.
104 |   else if (samplingType=="studentKruschke") {
105 |     stanfit <-  stan(file = 'stan/hierarchical-t-test_nuKru.stan', data = dataList,sample_file=sample_file, chains=chains)
106 |   }
107 |   #this calls the Student with priors Ga (2,0.1) on the dof as in Juanez and Steel
108 |   else if (samplingType=="studentJuanez") {
109 |     stanfit <-  stan(file = 'stan/hierarchical-t-test_nuJuaSteel.stan', data = dataList,sample_file=sample_file, chains=chains)
110 |   }
111 |   
112 |   #this calls the Gaussian
113 |   else if (samplingType=="gaussian") {
114 |     stanfit <-  stan(file = 'stan/hierarchical-t-testGaussian.stan', data = dataList,sample_file=sample_file, chains=chains)
115 |   }
116 |   
117 |   stanResults<- extract(stanfit, permuted = TRUE)
118 |   
119 |   
120 |   #get for each data set the probability of left, rope and right
121 |   prob_right_each_dset<-vector(length = q, mode = "double")
122 |   prob_rope_each_dset<-vector(length = q, mode = "double")
123 |   prob_left_each_dset<-vector(length = q, mode = "double")
124 |   delta_each_dset<-vector(length = q, mode = "double")
125 |   
126 |   #results on non-std data, but the computation is ok because rope_min and rope_max are already scaled.
127 |   sampled_delta_each_dset<-stanResults$delta
128 |   for (j in 1:q){
129 |     prob_right_each_dset[j] <- mean(sampled_delta_each_dset[,j]>rope_max)
130 |     prob_rope_each_dset[j]  <- mean(sampled_delta_each_dset[,j]>rope_min & sampled_delta_each_dset[,j]<rope_max)
131 |     prob_left_each_dset[j]  <- mean(sampled_delta_each_dset[,j]<rope_min)
132 |     delta_each_dset[j] <- mean(sampled_delta_each_dset[,j])*stdX
133 |   }
134 |   
135 |   #keep small the data to be saved by removing helping variables
136 |   stanResults$diff<-NULL
137 |   stanResults$diagQuad<-NULL
138 |   stanResults$oneOverSigma2<-NULL
139 |   stanResults$nuMinusOne<-NULL
140 |   stanResults$log_lik<-NULL
141 |   
142 |   #compute the probability that the most probable outcome for 
143 |   #the next delta is rope, left or right   
144 |   postSamples <- length(stanResults$delta0)
145 |   sampledRopeWins <- 0 
146 |   sampledLeftWins <- 0 
147 |   sampledRigthWins <- 0 
148 |   sampledPositiveWins <- 0
149 |   sampledNegativeWins <- 0
150 |   cumulativeRope <- vector (length = postSamples)
151 |   cumulativeRight <- vector (length = postSamples)
152 |   cumulativeLeft <- vector (length = postSamples)
153 |   
154 |   std <- stanResults$std0
155 |   mu  <- stanResults$delta0
156 |   #for all the student-based model we sample in the same way
157 |   for (r in 1:postSamples){
158 |     if ( any (samplingType == c('student', 'studentKruschke', 'studentJuanez'))) {
159 |       nu  <- stanResults$nu
160 |       cumulativeRope[r] <- pt.scaled(rope_max, df=nu[r], mean=mu[r], sd=std[r]) - pt.scaled(rope_min, df=nu[r], mean=mu[r], sd=std[r])
161 |       cumulativeLeft[r] <- pt.scaled(rope_min, df=nu[r], mean=mu[r], sd=std[r])
162 |       cumulativeRight[r] <- 1-pt.scaled(rope_max, df=nu[r], mean=mu[r], sd=std[r])
163 |     }
164 |     else if (samplingType=="gaussian") {
165 |       cumulativeRope[r] <- pnorm(rope_max, mean=mu[r], sd=std[r]) - pnorm(rope_min, mean=mu[r], sd=std[r])
166 |       cumulativeLeft[r] <- pnorm(rope_min,  mean=mu[r], sd=std[r])
167 |       cumulativeRight[r] <- 1-pnorm(rope_max, mean=mu[r], sd=std[r])
168 |     }
169 |     
170 |     if (cumulativeRope[r] > cumulativeLeft[r] & cumulativeRope[r] > cumulativeRight[r]){
171 |       sampledRopeWins <- sampledRopeWins + 1
172 |     }
173 |     else if  (cumulativeLeft[r] > cumulativeRope[r] & cumulativeLeft[r] > cumulativeRight[r]){
174 |       sampledLeftWins <- sampledLeftWins + 1
175 |     }
176 |     else {
177 |       sampledRigthWins <- sampledRigthWins +1
178 |     }
179 |     if (mu[r]>0){
180 |       sampledPositiveWins <- sampledPositiveWins + 1
181 |     }
182 |     else {
183 |       sampledNegativeWins <- sampledNegativeWins + 1
184 |     }
185 |   }
186 |   
187 |   probRightNextDelta <- sampledRigthWins/(sampledRigthWins+sampledLeftWins+sampledRopeWins)
188 |   probLeftNextDelta  <- sampledLeftWins/(sampledRigthWins+sampledLeftWins+sampledRopeWins)
189 |   probRopeNextDelta  <- sampledRopeWins/(sampledRigthWins+sampledLeftWins+sampledRopeWins)
190 |   probPositiveNextDelta  <- sampledPositiveWins/(sampledPositiveWins+sampledNegativeWins)
191 |   probNegativeNextDelta  <- sampledNegativeWins /(sampledPositiveWins+sampledNegativeWins)
192 |   
193 |   
194 |   
195 |   results = list (
196 |                   "nextDelta"=list("right"=probRightNextDelta, "left"=probLeftNextDelta, "rope"=probRopeNextDelta, "positive"=probPositiveNextDelta,"negative"=probNegativeNextDelta),
197 |                   "meanDeltaEachDset"=delta_each_dset,
198 |                   "probEachDset"=list("left"=prob_left_each_dset, "rope"=prob_rope_each_dset, 
199 |                                "right"=prob_right_each_dset),"rawResults" = stanResults, "x"=x, "stdX"=stdX)
200 |   
201 |   return (results)
202 |   
203 | }
204 | 
205 | 


--------------------------------------------------------------------------------
/hierarchical/logPredictive.R:
--------------------------------------------------------------------------------
 1 | logPredictive <- function  (fittedModel,testX, rho){
 2 |   #computes the log-predictive of the fitted model on the test set (testX)
 3 |   library('mvtnorm')
 4 | 
 5 |     buildCovarMatrix <- function (){
 6 |       #build a matrix full of sigma^2
 7 |     covarMatrix <- matrix (data = rep (rho * currentSigma^2,  instancesEachDset * instancesEachDset), nrow=instancesEachDset)
 8 |     for (i in 1:instancesEachDset){
 9 |         covarMatrix[i,i] <- currentSigma^2 
10 |       }
11 |     return (covarMatrix)
12 |   }
13 |   
14 |   
15 |   dsets <- nrow(testX)
16 |   samples <- dim(fittedModel$stanResults$delta)[1]
17 |   #initialized as a vector of 0s, one for each dset
18 |   postLogPredictive <- rep (0,dsets) 
19 |   #test instances available on each dset, for which to compute the log predictive
20 |   instancesEachDset <- ncol(testX)
21 |   
22 |   for (currentDset in 1:dsets){
23 |    for (currentSample in 1:samples){
24 |      currentMu = fittedModel$stanResults$delta[currentSample,currentDset]
25 |      #make currentMu a vector
26 |      currentMu <- rep(currentMu, instancesEachDset)
27 |      currentSigma = fittedModel$stanResults$sigma[currentSample,currentDset]
28 |      covarMatrix <- buildCovarMatrix()
29 |      postLogPredictive[currentDset] <- postLogPredictive[currentDset] + log (dmvnorm( x=testX[currentDset,], mean=currentMu,sigma = covarMatrix) )
30 |      if (is.infinite (dmvnorm( x=testX[currentDset,], mean=currentMu,sigma = covarMatrix))) {
31 |        browser()
32 |      }
33 |    } 
34 |   }
35 |   postLogPredictive <- postLogPredictive / samples
36 |   return (postLogPredictive)
37 |   
38 | }


--------------------------------------------------------------------------------
/hierarchical/multiple_dsets_rope.R:
--------------------------------------------------------------------------------
  1 | multiple_dsets_rope <- function(delta0=0,std0=0.01,how_many_dsets=50,reps=250,sample_sizes=500,simulation_ID=1, delta_acc_sampling='cauchy') {
  2 |   
  3 |   library(MASS)
  4 |   library(matrixStats)
  5 |   library(rstan)
  6 |   rstan_options(auto_write = TRUE)
  7 |   options(mc.cores = parallel::detectCores())
  8 |   
  9 |   source('cv_nbc.R')
 10 |   source('hierarchical_test.R')
 11 |   
 12 |   if  (( delta_acc_sampling!='cauchy') && (delta_acc_sampling!='gaussian') && (delta_acc_sampling!='mixture') ) { 
 13 |     stop('wrong delta_acc_sampling')
 14 |   }
 15 |   
 16 |   #set the seed for reproducible results
 17 |   set.seed(simulation_ID)
 18 |   
 19 |   # this control the variance of the simulation.
 20 |   # the first classifier has accuracy theta_star, the second theta_star+delta_acc.
 21 |   # with theta_star=1, we have zero variance.
 22 |   # with theta_star=0.5, we have maximum variance
 23 |   theta_star <- 0.9
 24 |   n_folds <- 10
 25 |   n_runs <- 10
 26 |   
 27 |   file_str0 <- paste('csvResults/Delta0',delta0,'Std0',std0,'_','sample_sizes',sample_sizes,'_',delta_acc_sampling,'_simulID_',simulation_ID,sep = "")
 28 |   filename <- paste(file_str0,'.csv',sep = "")
 29 |   log_filename <- paste('log_',file_str0,'.csv',sep = "")
 30 |   
 31 |   rope_min <- -0.01
 32 |   rope_max <- 0.01
 33 |   
 34 |   
 35 |   #point estimator of the correlation
 36 |   rho <- 1/n_folds;
 37 |   sign_rank_p_value <- matrix(0,reps)
 38 |   probLeftNextDelta <- matrix(0,reps)
 39 |   probRopeNextDelta <- matrix(0,reps)
 40 |   probRightNextDelta <- matrix(0,reps)
 41 |   rmseMleDelta_i <- matrix(0,reps)
 42 |   rmseHierDelta_i <- matrix(0,reps)
 43 |   settings <- list() 
 44 |   results <- list() 
 45 |   counter <- 1
 46 |   
 47 |   #originally the function was supposed to handle multiple value of parameters.
 48 |   #in reality we always used on a cluster running a single setting.
 49 |   for (k in 1:length(how_many_dsets)) {
 50 |     for (i in 1:length(sample_sizes)) {
 51 |       settings[[counter]] <- list('dsets'=how_many_dsets[k],'sample_size'=sample_sizes[i]); ##ok<AGROW>
 52 |       counter=counter+1;
 53 |     }
 54 |   }
 55 |   
 56 |   #simulation
 57 |   for (j in 1:length(settings)) {
 58 |     cat('setting',j,'\n');
 59 |     current_many_dsets <- settings[[j]]$dsets;
 60 |     current_dset_size <- settings[[j]]$sample_size;
 61 |     test_set_size <- current_dset_size/n_folds;
 62 |     #to be appended to various created files
 63 |     file_str <- paste('delta0',delta0,'Std0',std0,'_dsets_',current_many_dsets,'_delta_acc_',delta_acc_sampling,'_samples_sizes_',sample_sizes,'simulationID_',simulation_ID,sep="");
 64 |     
 65 |     
 66 |     for (k in 1:reps) {
 67 |       
 68 |       if (k%%100==0) cat ('reps',k,'\n');
 69 |       
 70 |       #Gaussian or cauchy sampling
 71 |       if (delta_acc_sampling=='gaussian'){
 72 |         #we use the Gaussian only to compare with the mixture, hence the hard coding of the parameters
 73 |         delta_mean <- (0.005 + 0.02) / 2
 74 |         std <- 0.01
 75 |         std_mixture <- sqrt( std^2 + (0.005^2)/2 + (0.02^2)/2 - (0.5*(0.005+0.02))^2 )
 76 |         delta_acc_each_dset <- rnorm(current_many_dsets, mean = delta_mean, sd = std_mixture)
 77 |       }
 78 |       else if (delta_acc_sampling=='cauchy'){
 79 |         delta_acc_each_dset <-  rt(current_many_dsets,1)*std0 + delta0; 
 80 |       }
 81 |       else if (delta_acc_sampling=='mixture'){
 82 |         #random number from the uniform and sorted
 83 |         idx <- sort(runif(current_many_dsets))
 84 |         count1 <- max(1,sum (idx>0.5))
 85 |         count1 <- min(count1,current_many_dsets-1)
 86 |         count2 <- how_many_dsets - count1 
 87 |         std <- 0.01
 88 |         mean1 <- 0.005 
 89 |         mean2 <- 0.02
 90 |         delta_acc_each_dset <- vector(length = current_many_dsets)
 91 |         delta_acc_each_dset[1:count1] <- rnorm(count1, mean = mean1, sd = std); 
 92 |         delta_acc_each_dset[(count1+1):(count1+count2)] <- rnorm(count2, mean = mean2, sd = std); 
 93 |       }
 94 |       
 95 |       sample_diff_acc_a_b_each_dset <- matrix(0,current_many_dsets);
 96 |       diffMatrix <- matrix(0,n_runs*n_folds,current_many_dsets);
 97 |       
 98 |       for (i in 1:current_many_dsets){
 99 |         
100 |         current_delta_acc <- delta_acc_each_dset[i];
101 |         cv_results <- list('delta'=rep(0,n_runs*n_folds))
102 |         while (var(cv_results$delta)==0 && mean(cv_results$delta)==0) { 
103 |           cv_results <- cv_competing_nbc(n_runs,n_folds,current_dset_size,current_delta_acc,theta_star);
104 |           currentDiff <- cv_results$delta;
105 |         }
106 |         sample_diff_acc_a_b_each_dset[i] <- mean(currentDiff);
107 |         diffMatrix[,i]=currentDiff;
108 |       }
109 |       
110 |       #frequentist Wilcoxon signed rank
111 |       sign_rank_p_value[k]  <-  wilcox.test(sample_diff_acc_a_b_each_dset,alternative = "two.sided",exact=FALSE)$p.value;
112 |       
113 |       #running Stan
114 |       #we adopt here a reduced number of chains because artificial data are smooth and convergence is easy
115 |       stanModel <- hierarchical.test(x=t(diffMatrix), sample_file =  file_str,samplingType =  'student',
116 |                                      chains=4)
117 |       
118 |       #probability of left, right and rope being the most probable outcome on the next data set.
119 |       probLeftNextDelta[k]  <- stanModel$nextDelta$left
120 |       probRopeNextDelta[k]  <- stanModel$nextDelta$rope
121 |       probRightNextDelta[k] <- stanModel$nextDelta$right
122 |       
123 |       
124 |       #comparing MLE and hierarchical estimates of Delta_i
125 |       maxLikMeans <- colMeans(diffMatrix)
126 |       rmseMleDelta_i[k] <- sqrt(mean( (delta_acc_each_dset- maxLikMeans)^2 ))
127 |       rmseHierDelta_i[k] <- sqrt(mean( (delta_acc_each_dset- stanModel$meanDeltaEachDset)^2 ))
128 |       
129 |     }#closes the loop on the repetitions
130 |     
131 |     
132 |     
133 |     results[[j]] <- list('how_many_dsets'=current_many_dsets,
134 |                          'sample_size'=current_dset_size,
135 |                          'delta0'=delta0,
136 |                          'std0'=std0,
137 |                          'sign_rank_p_value'=sign_rank_p_value,
138 |                          'probLeftNextDelta'=probLeftNextDelta,
139 |                          'probRopeNextDelta'=probRopeNextDelta,
140 |                          'probRightNextDelta'=probRightNextDelta,
141 |                          'rmseHierDelta_i'=rmseHierDelta_i,
142 |                          'rmseMleDelta_i'=rmseMleDelta_i
143 |                          )  
144 |   }
145 |   
146 |   save_results <- function(results,filename) {
147 |     
148 |     mystring <- paste('delta_acc_sampling,delta0,std0,num_experiments,',
149 |                       'how_many_dsets,sample_size,',
150 |                       'signRankPower,signRankPValue,',
151 |                       'hier_left_95,hier_rope_95,hier_right95,',
152 |                       'medianHierLeft,medianHierRope,medianHierRight,',
153 |                       'rmse_hier,rmse_mle',sep="")
154 |     
155 |     write(mystring, filename, append = FALSE)
156 |     
157 |     for (ii in 1:length(results)){
158 |       
159 |       tmp_vector <- c(
160 |                       results[[ii]]$delta0,
161 |                       results[[ii]]$std0,
162 |                       length(results[[ii]]$sign_rank_p_value),
163 |                       results[[ii]]$how_many_dsets,
164 |                       results[[ii]]$sample_size,
165 |                       mean(results[[ii]]$sign_rank_p_value<.05),
166 |                       median(results[[ii]]$sign_rank_p_value),
167 |                       mean(results[[ii]]$probLeftNextDelta>.95),
168 |                       mean(results[[ii]]$probRopeNextDelta>.95),
169 |                       mean(results[[ii]]$probRightNextDelta>.95),
170 |                       median(results[[ii]]$probLeftNextDelta),
171 |                       median(results[[ii]]$probRopeNextDelta),
172 |                       median(results[[ii]]$probRightNextDelta),
173 |                       mean(results[[ii]]$rmseHierDelta_i),
174 |                       mean(results[[ii]]$rmseMleDelta_i)
175 |                       )
176 |       
177 |       mystring <- paste(delta_acc_sampling,',',paste(tmp_vector,collapse=","))
178 |       write(mystring, filename, append = TRUE)
179 |     }
180 |   }
181 |   
182 |   #save csv file
183 |   save_results(results, filename)
184 |   
185 |   #save the Rdata file
186 |   rdata_filename <- paste(file_str0,'.Rdata',sep="");
187 |   save(results, file = rdata_filename)
188 |   
189 | }
190 | 
191 | ttest_Bayesian <- function(diff_a_b,rho,rope_min,rope_max) {
192 |   delta <- mean(diff_a_b)
193 |   n <- length(diff_a_b)
194 |   df <- n-1
195 |   stdX <- sd(diff_a_b)
196 |   sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
197 |   p.left <- pt((rope_min - delta)/sp, df)
198 |   p.rope <- pt((rope_max - delta)/sp, df)-p.left
199 |   indep_p_each_dset <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
200 | }
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/hierarchical/selectTrainSettings.R:
--------------------------------------------------------------------------------
 1 | selectTrainSettings <- function (friedmanTypeVec){
 2 |   #selects 2/3 of the provided settings, stratified over the 3 Friedman families
 3 |   
 4 |   origIdx <- 1:length(friedmanTypeVec)
 5 |   
 6 |   idx1 <- sample(which(friedmanTypeVec==1))
 7 |   idx2 <- sample(which(friedmanTypeVec==2))
 8 |   idx3 <- sample(which(friedmanTypeVec==3))
 9 |   
10 |   stopifnot(length(idx1)==length(idx2))
11 |   stopifnot(length(idx3)==length(idx2))
12 |   reducedLength= round (2/3 * length(idx1))
13 |   
14 |   idx <- c ( idx1[1:reducedLength], idx2[1:reducedLength], idx3[1:reducedLength])
15 |   return(idx)
16 | }


--------------------------------------------------------------------------------
/hierarchical/sensitivityNormalStudent.R:
--------------------------------------------------------------------------------
 1 | sensitivityNormalStudent <- function (class1, class2){
 2 |   #class1 and class2 are the two classifier being compared
 3 |   #'naive Bayes','aode','hnb','j48','j48_grafted' are coded as 1,2,3,4,5 respectively
 4 |   #'e.g. to compare naive Bayes and aode: sensitivityNormalStudent(1,2)
 5 |   #infers the hierarchical model on the results of classifiers class1 and class2, as loaded from data.
 6 |   
 7 |   library(rstan)
 8 |   rstan_options(auto_write = TRUE)
 9 |   options(mc.cores = parallel::detectCores())
10 |   source ("hierarchical_test.R")
11 |   source ("logPredictive.R")
12 |   source ("Utils.R")
13 |   
14 |   #this workspace needs to be there
15 |   load("uci_data.RData")
16 |   nFolds <- max (uci_classification$Key.Fold)
17 |   rho=1/nFolds
18 |   #this foldID goes between 1 and 100
19 |   foldID <- uci_classification$Key.Run*10+uci_classification$Key.Fold-10
20 |   rope_min  <- -0.01
21 |   rope_max  <- 0.01
22 |   
23 |   #prepare the data for  the hierarchical test
24 |   results <- data.frame (classifierID = uci_classification$ClassifierID, 
25 |                          dsetID=uci_classification$DatasetID, 
26 |                          accuracy=uci_classification$Percent.correct, 
27 |                          fold=foldID)
28 |   
29 |   
30 |   
31 |   diffResults  <- results[results$classifierID==class1,]
32 |   results2  <- results[results$classifierID==class2,]
33 |   stopifnot( mean (diffResults$dsetID==results2$dsetID) ==1)
34 |   diffResults$diff <- diffResults$accuracy - results2$accuracy
35 |   
36 |   
37 |   #build matrix of results to be parsed by hierarchical test
38 |   howManyDsets <- max(diffResults$dsetID)
39 |   x<-matrix(ncol  = max(foldID), nrow =  howManyDsets)
40 |   
41 |   for (dsetIdx in 1:howManyDsets) {
42 |     tmp <- diffResults$diff[diffResults$dsetID == dsetIdx] 
43 |     x[dsetIdx,]  <- t (tmp)
44 |   }
45 |   
46 |   
47 |   
48 |   #those lines if you want to infer the hierarchical model using Gamma prior on the degrees of freedom,
49 |   #as sensitivity analysis
50 |   
51 |   # simulationID <- paste('class',class1,'class',class2,"Kruschke",sep ='')
52 |   # hierPosteriorKru <- hierarchical.test (x=x,rho=rho,samplingType = "studentKruschke",rope_min = rope_min,
53 |   # rope_max = rope_max,std_upper_bound = stdUpperBound,chains = chains,sample_file = simulationID)
54 |   
55 |   # simulationID <- paste('class',class1,'class',class2,"Juanez",sep ='')
56 |   # hierPosteriorJua <- hierarchical.test (x=x,rho=rho,samplingType = "studentJuanez",rope_min = rope_min,
57 |   # rope_max = rope_max,std_upper_bound = stdUpperBound,chains = chains,sample_file = simulationID)
58 |   
59 |   simulationID <- paste('class',class1,'class',class2,"GCsens",sep ='')
60 |   
61 |   
62 |   #novel setup
63 |   #this setup of parameters works well in most cases, providing generally better fit than 
64 |   #a single gamma prior on nu 
65 |   alphaBeta = list('lowerAlpha' =0.5,'upperAlpha'= 5,'lowerBeta' = .05,'upperBeta' = .15)
66 |   hierPosterior <- hierarchical.test (x = x,sample_file = simulationID,samplingType = "student", alphaBeta = alphaBeta,chains=chains)
67 |   
68 |   fileName <- paste('Rdata/hierModel',class1,class2,'.Rdata', sep='')
69 |   save (hierPosteriorNovel, file = fileName)  
70 |   
71 | }


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/hierarchical/stan/hierarchical-t-test


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-test.stan:
--------------------------------------------------------------------------------
  1 | /*Hierarchical Bayesian model for the analysis of competing cross-validated classifiers on multiple data sets.
  2 | */
  3 | 
  4 |   data {
  5 |     
  6 |     real deltaLow;
  7 |     real deltaHi;
  8 |     
  9 |     //bounds of the sigma of the higher-level distribution
 10 |     real std0Low; 
 11 |     real std0Hi; 
 12 | 
 13 |     //bounds on the domain of the sigma of each data set
 14 |     real stdLow; 
 15 |     real stdHi; 
 16 |     
 17 |         
 18 |     //number of results for each data set. Typically 100 (10 runs of 10-folds cv)
 19 |     int<lower=2> Nsamples; 
 20 | 
 21 |     //number of data sets. 
 22 |     int<lower=1> q; 
 23 | 
 24 |     //difference of accuracy between the two classifier, on each fold of each data set.
 25 |     matrix[q,Nsamples] x;
 26 |     
 27 |     //correlation (1/(number of folds))
 28 |     real rho; 
 29 |     
 30 |     /*upper and lower bound for alpha and beta, which are the parameters of the Gamma distribution
 31 |     used as a prior for the degress of freedom.
 32 |     As a default we suggest: lowerAlpha=0.5; upperAlpha= 5; lowerBeta=0.05; upperBeta = .15
 33 |     */
 34 |     real upperAlpha;
 35 |     real lowerAlpha;
 36 |     real upperBeta;
 37 |     real lowerBeta;
 38 |     
 39 |      }
 40 | 
 41 | 
 42 |   transformed data {
 43 | 
 44 |     //vector of 1s appearing in the likelihood 
 45 |     vector[Nsamples] H;
 46 |     
 47 |     //vector of 0s: the mean of the mvn noise 
 48 |     vector[Nsamples] zeroMeanVec;
 49 |     
 50 |     /* M is the correlation matrix of the mvn noise.
 51 |     invM is its inverse, detM its determinant */
 52 |     matrix[Nsamples,Nsamples] invM;
 53 |     real detM;
 54 |     
 55 |     //The determinant of M is analytically known
 56 |     detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);
 57 | 
 58 |     //build H and invM. They do not depend on the data.
 59 |     for (j in 1:Nsamples){
 60 |       zeroMeanVec[j]<-0;
 61 |       H[j]<-1;
 62 |       for (i in 1:Nsamples){
 63 |         if (j==i)
 64 |           invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
 65 |         else
 66 |           invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
 67 |        }
 68 |     }
 69 |     /*at this point invM contains the adjugate of M.
 70 |     we  divide it by det(M) to obtain the inverse of M.*/
 71 |     invM <-invM/detM;
 72 |   }
 73 | 
 74 |   parameters {
 75 |     //mean of the  hyperprior from which we sample the delta_i
 76 |     real<lower=deltaLow,upper=deltaHi> delta0; 
 77 |     
 78 |     //std of the hyperprior from which we sample the delta_i
 79 |     real<lower=std0Low,upper=std0Hi> std0;
 80 |     
 81 |     //delta_i of each data set: vector of lenght q.
 82 |     vector[q] delta;               
 83 | 
 84 |     //sigma of each data set: : vector of lenght q.
 85 |     vector<lower=stdLow,upper=stdHi>[q] sigma; 
 86 |     
 87 |     /* the domain of (nu - 1) starts from 0
 88 |     and can be given a gamma prior*/
 89 |     real<lower=0> nuMinusOne; 
 90 |     
 91 |     //parameters of the Gamma prior on nuMinusOne
 92 |     real<lower=lowerAlpha,upper=upperAlpha> gammaAlpha;
 93 |     real<lower=lowerBeta, upper=upperBeta> gammaBeta;
 94 |     
 95 |   }
 96 | 
 97 |  transformed parameters {
 98 |     //degrees of freedom
 99 |     real<lower=1> nu ;
100 |     
101 |     /*difference between the data (x matrix) and 
102 |     the vector of the q means.*/
103 |     matrix[q,Nsamples] diff; 
104 |     
105 |     vector[q] diagQuad;
106 |     
107 |     /*vector of length q: 
108 |     1 over the variance of each data set*/
109 |     vector[q] oneOverSigma2; 
110 |     
111 |     vector[q] logDetSigma;
112 |     
113 |     vector[q] logLik;
114 |    
115 |     //degrees of freedom
116 |     nu <- nuMinusOne + 1 ;
117 |     
118 |     //1 over the variance of each data set
119 |     oneOverSigma2 <- rep_vector(1, q) ./ sigma;
120 |     oneOverSigma2 <- oneOverSigma2 ./ sigma;
121 | 
122 |     /*the data (x) minus a matrix done as follows:
123 |     the delta vector (of lenght q) pasted side by side Nsamples times*/
124 |     diff <- x - rep_matrix(delta,Nsamples); 
125 |     
126 |     //efficient matrix computation of the likelihood.
127 |     diagQuad <- diagonal (quad_form (invM,diff'));
128 |     logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
129 |     logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);  
130 |     logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;
131 |     
132 |   }
133 | 
134 |   model {
135 |     /*mu0 and std0 are not explicitly sampled here.
136 |     Stan automatically samples them: mu0 as uniform and std0 as
137 |     uniform over its domain (std0Low,std0Hi).*/
138 | 
139 |     //sampling the degrees of freedom
140 |     nuMinusOne ~ gamma ( gammaAlpha, gammaBeta);
141 |     
142 |     //vectorial sampling of the delta_i of each data set
143 |     delta ~ student_t(nu, delta0, std0);
144 |     
145 |     //logLik is computed in the previous block 
146 |     increment_log_prob(sum(logLik));   
147 |  }
148 |  
149 |  
150 | 


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-testGaussian.stan:
--------------------------------------------------------------------------------
  1 | /*this version implements a Gaussian hyper-prior
  2 | */
  3 | 
  4 |   data {
  5 |     
  6 |     real deltaLow;
  7 |     real deltaHi;
  8 |     
  9 |     //bounds of the sigma of the higher-level distribution
 10 |     real std0Low; 
 11 |     real std0Hi; 
 12 | 
 13 |     //bounds on the domain of the sigma of each data set
 14 |     real stdLow; 
 15 |     real stdHi; 
 16 |     
 17 |         
 18 |     //number of results for each data set. Typically 100 (10 runs of 10-folds cv)
 19 |     int<lower=2> Nsamples; 
 20 | 
 21 |     //number of data sets. 
 22 |     int<lower=1> q; 
 23 | 
 24 |     //difference of accuracy between the two classifier, on each fold of each data set.
 25 |     matrix[q,Nsamples] x;
 26 |     
 27 |     //correlation (1/(number of folds))
 28 |     real rho; 
 29 |      }
 30 | 
 31 | 
 32 |   transformed data {
 33 | 
 34 |     //vector of 1s appearing in the likelihood 
 35 |     vector[Nsamples] H;
 36 |     
 37 |     //vector of 0s: the mean of the mvn noise 
 38 |     vector[Nsamples] zeroMeanVec;
 39 |     
 40 |     /* M is the correlation matrix of the mvn noise.
 41 |     invM is its inverse, detM its determinant */
 42 |     matrix[Nsamples,Nsamples] invM;
 43 |     real detM;
 44 |     
 45 |     //The determinant of M is analytically known
 46 |     detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);
 47 | 
 48 |     //build H and invM. They do not depend on the data.
 49 |     for (j in 1:Nsamples){
 50 |       zeroMeanVec[j]<-0;
 51 |       H[j]<-1;
 52 |       for (i in 1:Nsamples){
 53 |         if (j==i)
 54 |           invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
 55 |         else
 56 |           invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
 57 |        }
 58 |     }
 59 |     /*at this point invM contains the adjugate of M.
 60 |     we  divide it by det(M) to obtain the inverse of M.*/
 61 |     invM <-invM/detM;
 62 |   }
 63 | 
 64 |   parameters {
 65 |     //mean of the  hyperprior from which we sample the delta_i
 66 |     real<lower=deltaLow,upper=deltaHi> delta0; 
 67 |     
 68 |     //std of the hyperprior from which we sample the delta_i
 69 |     real<lower=std0Low,upper=std0Hi> std0;
 70 |     
 71 |     //delta_i of each data set: vector of lenght q.
 72 |     vector[q] delta;               
 73 | 
 74 |     //sigma of each data set: : vector of lenght q.
 75 |     vector<lower=stdLow,upper=stdHi>[q] sigma; 
 76 |     
 77 |     
 78 |   }
 79 | 
 80 |  transformed parameters {
 81 |     
 82 |     /*difference between the data (x matrix) and 
 83 |     the vector of the q means.*/
 84 |     matrix[q,Nsamples] diff; 
 85 |     
 86 |     vector[q] diagQuad;
 87 |     
 88 |     /*vector of length q: 
 89 |     1 over the variance of each data set*/
 90 |     vector[q] oneOverSigma2; 
 91 |     
 92 |     vector[q] logDetSigma;
 93 |     
 94 |     vector[q] logLik;
 95 |    
 96 |     
 97 |     //1 over the variance of each data set
 98 |     oneOverSigma2 <- rep_vector(1, q) ./ sigma;
 99 |     oneOverSigma2 <- oneOverSigma2 ./ sigma;
100 | 
101 |     /*the data (x) minus a matrix done as follows:
102 |     the delta vector (of lenght q) pasted side by side Nsamples times*/
103 |     diff <- x - rep_matrix(delta,Nsamples); 
104 |     
105 |     //efficient matrix computation of the likelihood.
106 |     diagQuad <- diagonal (quad_form (invM,diff'));
107 |     logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
108 |     logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);  
109 |     logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;
110 |     
111 |   }
112 | 
113 |   model {
114 |     /*delta0 and std0 are not explicitly sampled here.
115 |     Stan automatically samples them: mu0 as uniform and std0 as
116 |     uniform over its domain (std0Low,std0Hi).*/
117 | 
118 |     
119 |     //vectorial sampling of the delta_i of each data set
120 |     delta ~ normal (delta0, std0);
121 |     
122 |     //logLik is computed in the previous block 
123 |     increment_log_prob(sum(logLik));   
124 |  }
125 | 


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-test_nuJuaSteel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/hierarchical/stan/hierarchical-t-test_nuJuaSteel


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-test_nuJuaSteel.stan:
--------------------------------------------------------------------------------
  1 | /*Hierarchical Bayesian model for the analysis of competing cross-validated classifiers on multiple data sets.
  2 | Adopts Juarez-Steel prior as for the degrees of freedom, Gamma(2,0.1)
  3 | */
  4 | 
  5 |   data {
  6 |     
  7 |     real deltaLow;
  8 |     real deltaHi;
  9 |     
 10 |     //bounds of the sigma of the higher-level distribution
 11 |     real std0Low; 
 12 |     real std0Hi; 
 13 | 
 14 |     //bounds on the domain of the sigma of each data set
 15 |     real stdLow; 
 16 |     real stdHi; 
 17 |     
 18 |         
 19 |     //number of results for each data set. Typically 100 (10 runs of 10-folds cv)
 20 |     int<lower=2> Nsamples; 
 21 | 
 22 |     //number of data sets. 
 23 |     int<lower=1> q; 
 24 | 
 25 |     //difference of accuracy between the two classifier, on each fold of each data set.
 26 |     matrix[q,Nsamples] x;
 27 |     
 28 |     //correlation (1/(number of folds))
 29 |     real rho; 
 30 |      }
 31 | 
 32 | 
 33 |   transformed data {
 34 | 
 35 |     //vector of 1s appearing in the likelihood 
 36 |     vector[Nsamples] H;
 37 |     
 38 |     //vector of 0s: the mean of the mvn noise 
 39 |     vector[Nsamples] zeroMeanVec;
 40 |     
 41 |     /* M is the correlation matrix of the mvn noise.
 42 |     invM is its inverse, detM its determinant */
 43 |     matrix[Nsamples,Nsamples] invM;
 44 |     real detM;
 45 |     
 46 |     //The determinant of M is analytically known
 47 |     detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);
 48 | 
 49 |     //build H and invM. They do not depend on the data.
 50 |     for (j in 1:Nsamples){
 51 |       zeroMeanVec[j]<-0;
 52 |       H[j]<-1;
 53 |       for (i in 1:Nsamples){
 54 |         if (j==i)
 55 |           invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
 56 |         else
 57 |           invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
 58 |        }
 59 |     }
 60 |     /*at this point invM contains the adjugate of M.
 61 |     we  divide it by det(M) to obtain the inverse of M.*/
 62 |     invM <-invM/detM;
 63 |   }
 64 | 
 65 |   parameters {
 66 |     //mean of the  hyperprior from which we sample the delta_i
 67 |     real<lower=deltaLow,upper=deltaHi> delta0; 
 68 |     
 69 |     //std of the hyperprior from which we sample the delta_i
 70 |     real<lower=std0Low,upper=std0Hi> std0;
 71 |     
 72 |     //delta_i of each data set: vector of lenght q.
 73 |     vector[q] delta;               
 74 | 
 75 |     //sigma of each data set: : vector of lenght q.
 76 |     vector<lower=stdLow,upper=stdHi>[q] sigma; 
 77 |     
 78 |     /* the domain of (nu - 1) starts from 0
 79 |     and can be given a gamma prior*/
 80 |     real<lower=0> nuMinusOne; 
 81 |     
 82 |     //parameters of the Gamma prior on nuMinusOne
 83 |     real<lower=1,upper=2> gammaAlpha;
 84 |     real<lower=0.01, upper=0.1> gammaBeta;
 85 |     
 86 |   }
 87 | 
 88 |  transformed parameters {
 89 |     //degrees of freedom
 90 |     real<lower=1> nu ;
 91 |     
 92 |     /*difference between the data (x matrix) and 
 93 |     the vector of the q means.*/
 94 |     matrix[q,Nsamples] diff; 
 95 |     
 96 |     vector[q] diagQuad;
 97 |     
 98 |     /*vector of length q: 
 99 |     1 over the variance of each data set*/
100 |     vector[q] oneOverSigma2; 
101 |     
102 |     vector[q] logDetSigma;
103 |     
104 |     vector[q] logLik;
105 |    
106 |     //degrees of freedom
107 |     nu <- nuMinusOne + 1 ;
108 |     
109 |     //1 over the variance of each data set
110 |     oneOverSigma2 <- rep_vector(1, q) ./ sigma;
111 |     oneOverSigma2 <- oneOverSigma2 ./ sigma;
112 | 
113 |     /*the data (x) minus a matrix done as follows:
114 |     the delta vector (of lenght q) pasted side by side Nsamples times*/
115 |     diff <- x - rep_matrix(delta,Nsamples); 
116 |     
117 |     //efficient matrix computation of the likelihood.
118 |     diagQuad <- diagonal (quad_form (invM,diff'));
119 |     logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
120 |     logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);  
121 |     logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;
122 |     
123 |   }
124 | 
125 |   model {
126 |     /*mu0 and std0 are not explicitly sampled here.
127 |     Stan automatically samples them: mu0 as uniform and std0 as
128 |     uniform over its domain (std0Low,std0Hi).*/
129 | 
130 |     //sampling the degrees of freedom according to Kruschke prior
131 |     nuMinusOne ~ gamma ( 2, 0.1);
132 |     
133 |     
134 |     //vectorial sampling of the delta_i of each data set
135 |     delta ~ student_t(nu, delta0, std0);
136 |     
137 |     //logLik is computed in the previous block 
138 |     increment_log_prob(sum(logLik));   
139 |  }
140 |  
141 |  
142 | 


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-test_nuKru:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/hierarchical/stan/hierarchical-t-test_nuKru


--------------------------------------------------------------------------------
/hierarchical/stan/hierarchical-t-test_nuKru.stan:
--------------------------------------------------------------------------------
  1 | /*Hierarchical Bayesian model for the analysis of competing cross-validated classifiers on multiple data sets.
  2 | Adopts Kruschke prior as for the degrees of freedom, Gamma(1,0.0345)
  3 | */
  4 | 
  5 |   data {
  6 |     
  7 |     real deltaLow;
  8 |     real deltaHi;
  9 |     
 10 |     //bounds of the sigma of the higher-level distribution
 11 |     real std0Low; 
 12 |     real std0Hi; 
 13 | 
 14 |     //bounds on the domain of the sigma of each data set
 15 |     real stdLow; 
 16 |     real stdHi; 
 17 |     
 18 |         
 19 |     //number of results for each data set. Typically 100 (10 runs of 10-folds cv)
 20 |     int<lower=2> Nsamples; 
 21 | 
 22 |     //number of data sets. 
 23 |     int<lower=1> q; 
 24 | 
 25 |     //difference of accuracy between the two classifier, on each fold of each data set.
 26 |     matrix[q,Nsamples] x;
 27 |     
 28 |     //correlation (1/(number of folds))
 29 |     real rho; 
 30 |      }
 31 | 
 32 | 
 33 |   transformed data {
 34 | 
 35 |     //vector of 1s appearing in the likelihood 
 36 |     vector[Nsamples] H;
 37 |     
 38 |     //vector of 0s: the mean of the mvn noise 
 39 |     vector[Nsamples] zeroMeanVec;
 40 |     
 41 |     /* M is the correlation matrix of the mvn noise.
 42 |     invM is its inverse, detM its determinant */
 43 |     matrix[Nsamples,Nsamples] invM;
 44 |     real detM;
 45 |     
 46 |     //The determinant of M is analytically known
 47 |     detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);
 48 | 
 49 |     //build H and invM. They do not depend on the data.
 50 |     for (j in 1:Nsamples){
 51 |       zeroMeanVec[j]<-0;
 52 |       H[j]<-1;
 53 |       for (i in 1:Nsamples){
 54 |         if (j==i)
 55 |           invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
 56 |         else
 57 |           invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
 58 |        }
 59 |     }
 60 |     /*at this point invM contains the adjugate of M.
 61 |     we  divide it by det(M) to obtain the inverse of M.*/
 62 |     invM <-invM/detM;
 63 |   }
 64 | 
 65 |   parameters {
 66 |     //mean of the  hyperprior from which we sample the delta_i
 67 |     real<lower=deltaLow,upper=deltaHi> delta0; 
 68 |     
 69 |     //std of the hyperprior from which we sample the delta_i
 70 |     real<lower=std0Low,upper=std0Hi> std0;
 71 |     
 72 |     //delta_i of each data set: vector of lenght q.
 73 |     vector[q] delta;               
 74 | 
 75 |     //sigma of each data set: : vector of lenght q.
 76 |     vector<lower=stdLow,upper=stdHi>[q] sigma; 
 77 |     
 78 |     /* the domain of (nu - 1) starts from 0
 79 |     and can be given a gamma prior*/
 80 |     real<lower=0> nuMinusOne; 
 81 |     
 82 |     //parameters of the Gamma prior on nuMinusOne
 83 |     real<lower=1,upper=2> gammaAlpha;
 84 |     real<lower=0.01, upper=0.1> gammaBeta;
 85 |     
 86 |   }
 87 | 
 88 |  transformed parameters {
 89 |     //degrees of freedom
 90 |     real<lower=1> nu ;
 91 |     
 92 |     /*difference between the data (x matrix) and 
 93 |     the vector of the q means.*/
 94 |     matrix[q,Nsamples] diff; 
 95 |     
 96 |     vector[q] diagQuad;
 97 |     
 98 |     /*vector of length q: 
 99 |     1 over the variance of each data set*/
100 |     vector[q] oneOverSigma2; 
101 |     
102 |     vector[q] logDetSigma;
103 |     
104 |     vector[q] logLik;
105 |    
106 |     //degrees of freedom
107 |     nu <- nuMinusOne + 1 ;
108 |     
109 |     //1 over the variance of each data set
110 |     oneOverSigma2 <- rep_vector(1, q) ./ sigma;
111 |     oneOverSigma2 <- oneOverSigma2 ./ sigma;
112 | 
113 |     /*the data (x) minus a matrix done as follows:
114 |     the delta vector (of lenght q) pasted side by side Nsamples times*/
115 |     diff <- x - rep_matrix(delta,Nsamples); 
116 |     
117 |     //efficient matrix computation of the likelihood.
118 |     diagQuad <- diagonal (quad_form (invM,diff'));
119 |     logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
120 |     logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);  
121 |     logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;
122 |     
123 |   }
124 | 
125 |   model {
126 |     /*mu0 and std0 are not explicitly sampled here.
127 |     Stan automatically samples them: mu0 as uniform and std0 as
128 |     uniform over its domain (std0Low,std0Hi).*/
129 | 
130 |     //sampling the degrees of freedom according to Kruschke prior
131 |     nuMinusOne ~ gamma ( 1, 0.0345);
132 |     
133 |     
134 |     //vectorial sampling of the delta_i of each data set
135 |     delta ~ student_t(nu, delta0, std0);
136 |     
137 |     //logLik is computed in the previous block 
138 |     increment_log_prob(sum(logLik));   
139 |  }
140 |  
141 |  
142 | 


--------------------------------------------------------------------------------
/hierarchical/uci_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/hierarchical/uci_data.RData


--------------------------------------------------------------------------------
/slides/parametricBayesianComparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/parametricBayesianComparison.pdf


--------------------------------------------------------------------------------
/slides/plots/Bsigntest1.jl:
--------------------------------------------------------------------------------
 1 | function Bsigntest(y,x,rope,priorvec=[1 1 1]/3)
 2 | 
 3 | 
 4 | s=1
 5 | nsamples=100000
 6 | diff=y-x
 7 | 
 8 | if rope>0
 9 | #Compute counts
10 | nright=length(find(z->z> rope,diff[:]))
11 | nleft=length(find(z->(z<-rope),diff[:]))
12 | nrope=length(find(z->(z<=rope && z>=-rope),diff[:]))
13 | 
14 | data = rand(Dirichlet([nleft nrope nright][:]+priorvec*s),nsamples)
15 | 
16 | else
17 | #Compute counts
18 | nright=length(find(z->z> 0,diff[:]))
19 | nleft=length(find(z->(z<=0),diff[:]))
20 | 
21 | 
22 | data = rand(Dirichlet([nleft nright][:]+priorvec*s),nsamples)
23 | 
24 | 
25 | 
26 | end
27 | return data
28 | 
29 | end
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/slides/plots/CONVERTING.txt:
--------------------------------------------------------------------------------
1 | convert -density 300 file.pdf -quality 90 file.png
2 | 


--------------------------------------------------------------------------------
/slides/plots/Comic1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/Comic1.jpg


--------------------------------------------------------------------------------
/slides/plots/bayes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/bayes.png


--------------------------------------------------------------------------------
/slides/plots/canvas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/canvas.png


--------------------------------------------------------------------------------
/slides/plots/canvas1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/canvas1.png


--------------------------------------------------------------------------------
/slides/plots/confused.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/confused.png


--------------------------------------------------------------------------------
/slides/plots/densplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/densplot.png


--------------------------------------------------------------------------------
/slides/plots/densplotpost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/densplotpost.png


--------------------------------------------------------------------------------
/slides/plots/eye.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/eye.jpg


--------------------------------------------------------------------------------
/slides/plots/fig1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/fig1.pdf


--------------------------------------------------------------------------------
/slides/plots/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/fig1.png


--------------------------------------------------------------------------------
/slides/plots/fig1.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[tikz]{standalone}
 2 | 
 3 | 
 4 | 
 5 | \usepackage{tikz}
 6 | \usetikzlibrary{matrix}
 7 | % \usepackage{etex,etoolbox}
 8 | \usepackage{hyperref}   % use for hypertext links, including those to external documents and URLs
 9 |         % don't add extra vertical space
10 | 
11 | \usepackage{pgfplots}
12 | 
13 | \usepackage{tikz}
14 | \usepackage{tabu}
15 | \usepackage{colortbl}
16 | \usepackage{pifont}
17 | \usepackage{eurosym}
18 | \usepackage{bm}
19 | \usepackage{bbding}
20 |  
21 | 
22 | 
23 | \usepackage{pgfplots}
24 | 
25 | \pgfmathdeclarefunction{gauss}{2}{%
26 |   \pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}%
27 | }
28 | 
29 |   \usetikzlibrary{positioning,calc,fit,shapes.geometric,patterns}
30 |   \pgfdeclarelayer{background}
31 |   \pgfdeclarelayer{foreground}
32 |   \pgfsetlayers{background,main,foreground}
33 |   \tikzstyle{vec}=[circle,inner sep=1pt,outer sep=-1pt,fill]
34 |   \tikzstyle{border}=[thick]
35 |   \tikzstyle{favborder}=[border,dotted]
36 |   \tikzstyle{exclborder}=[border,dashed]
37 | \usepackage{pdfsync}
38 | \usepackage{wrapfig}
39 | 
40 | \begin{document}
41 |   \begin{tikzpicture}[framed,thick,scale=0.6, every node/.style={transform shape},font=\Large]
42 |   \begin{axis}[
43 |   no markers, domain=0:10, samples=100,
44 |   axis lines=none, xlabel=$~$, ylabel=$\mu$,
45 |   every axis y label/.style={at=(current axis.above origin),anchor=south},
46 |   every axis x label/.style={at=(current axis.right of origin),anchor=west},
47 |   height=5cm, width=12cm,
48 |   xtick={4,6.5}, ytick=\empty,
49 |   enlargelimits=false, clip=false, axis on top,
50 |   grid = major
51 |   ]
52 |   \addplot [fill=cyan!20, draw=black, domain=-1.8:1.8] {gauss(0,0.6)} ;
53 |    \addplot [fill=red!20, draw=black, domain=3.2:9.2] {gauss(6,0.6)};
54 | %   \addplot [very thick,cyan!50!black] {gauss(6.5,1)};
55 |   \node (mu) at (axis cs:0,0.76) {$\theta$};
56 |     \node (pt) at (axis cs:6,0.76) {$p(\theta)$};
57 |     \node (c) at (axis cs:-1.3,0) {};
58 |   \node (x) at (axis cs:-4.1,0.35) {~~~~$x_1,x_2,\dots,x_n$};
59 |     \node (g) at (axis cs:0,0.05) {$\mathcal{N}(x;\theta,1)$};
60 | %   \draw [<-, right=of x] (x)--(c) {};
61 |  
62 | \end{axis}
63 | \end{tikzpicture}
64 | }
65 | \end{document}


--------------------------------------------------------------------------------
/slides/plots/fig2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/fig2.pdf


--------------------------------------------------------------------------------
/slides/plots/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/fig2.png


--------------------------------------------------------------------------------
/slides/plots/fig2.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[tikz]{standalone}
 2 | 
 3 | 
 4 | 
 5 | \usepackage{tikz}
 6 | \usetikzlibrary{matrix}
 7 | % \usepackage{etex,etoolbox}
 8 | \usepackage{hyperref}   % use for hypertext links, including those to external documents and URLs
 9 |         % don't add extra vertical space
10 | 
11 | \usepackage{pgfplots}
12 | 
13 | \usepackage{tikz}
14 | \usepackage{tabu}
15 | \usepackage{colortbl}
16 | \usepackage{pifont}
17 | \usepackage{eurosym}
18 | \usepackage{bm}
19 | \usepackage{bbding}
20 |  
21 | 
22 | 
23 | \usepackage{pgfplots}
24 | 
25 | \pgfmathdeclarefunction{gauss}{2}{%
26 |   \pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}%
27 | }
28 | 
29 |   \usetikzlibrary{positioning,calc,fit,shapes.geometric,patterns}
30 |   \pgfdeclarelayer{background}
31 |   \pgfdeclarelayer{foreground}
32 |   \pgfsetlayers{background,main,foreground}
33 |   \tikzstyle{vec}=[circle,inner sep=1pt,outer sep=-1pt,fill]
34 |   \tikzstyle{border}=[thick]
35 |   \tikzstyle{favborder}=[border,dotted]
36 |   \tikzstyle{exclborder}=[border,dashed]
37 | \usepackage{pdfsync}
38 | \usepackage{wrapfig}
39 | 
40 | \begin{document}
41 |   \begin{tikzpicture}[framed,thick,scale=0.6, every node/.style={transform shape},font=\Large]
42 | \begin{axis}[
43 |   no markers, domain=0:10, samples=100,
44 |   axis lines=none, xlabel=$~$, ylabel=$\mu$,
45 |   every axis y label/.style={at=(current axis.above origin),anchor=south},
46 |   every axis x label/.style={at=(current axis.right of origin),anchor=west},
47 |   height=5cm, width=12cm,
48 |   xtick={4,6.5}, ytick=\empty,
49 |   enlargelimits=false, clip=false, axis on top,
50 |   grid = major
51 |   ]
52 |   \addplot [fill=cyan!20, draw=black, domain=-1.8:1.8] {gauss(0,0.6)} ;
53 |    \addplot [fill=red!20, draw=black, domain=3.2:9.2] {gauss(6,0.6)};
54 | %   \addplot [very thick,cyan!50!black] {gauss(6.5,1)};
55 |   \node (mu) at (axis cs:0,0.76) {$F$};
56 |     \node (pt) at (axis cs:6,0.76) {$\mathcal{P}(F)$};
57 |     \node (c) at (axis cs:-1.3,0) {};
58 |   \node (x) at (axis cs:-4.1,0.35) {~~~~$x_1,x_2,\dots,x_n$};
59 | %     \node (g) at (axis cs:0,0.05) {$\mathcal{N}(x;\theta,1)$};
60 | %   \draw [<-, right=of x] (x)--(c) {};
61 |  
62 | \end{axis}
63 | \end{tikzpicture}
64 | \end{document}


--------------------------------------------------------------------------------
/slides/plots/manytriangles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/manytriangles.png


--------------------------------------------------------------------------------
/slides/plots/plot_data.jl:
--------------------------------------------------------------------------------
 1 | function plot_data(i,j,d,x,marginleft,marginright)
 2 | 
 3 | 
 4 | 
 5 | df = DataFrame(DeltaAcc=x)
 6 | 
 7 | #Geom.histogram(bincount=20
 8 | p=plot(df, x=:DeltaAcc,  xintercept=[-0.01, 0.01],Geom.vline(color="orange", size=1mm), Geom.density,Coord.Cartesian(xmin=marginleft,xmax=marginright),Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt))
 9 | 
10 | 
11 | draw(PDF("Plots/hist$i$j$d.pdf", 6inch, 3inch), p)
12 | 
13 | return p
14 | 
15 | end
16 | 


--------------------------------------------------------------------------------
/slides/plots/real.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/real.png


--------------------------------------------------------------------------------
/slides/plots/realpost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/realpost.png


--------------------------------------------------------------------------------
/slides/plots/realpost1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/realpost1.png


--------------------------------------------------------------------------------
/slides/plots/realpost1b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/realpost1b.png


--------------------------------------------------------------------------------
/slides/plots/realv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/realv.png


--------------------------------------------------------------------------------
/slides/plots/simplex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/simplex.png


--------------------------------------------------------------------------------
/slides/plots/summing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/summing.png


--------------------------------------------------------------------------------
/slides/plots/tab0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/tab0.png


--------------------------------------------------------------------------------
/slides/plots/tab1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/tab1.png


--------------------------------------------------------------------------------
/slides/plots/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/table.png


--------------------------------------------------------------------------------
/slides/plots/table0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/table0.png


--------------------------------------------------------------------------------
/slides/plots/tablecomp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/tablecomp.png


--------------------------------------------------------------------------------
/slides/plots/tablepvalue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/tablepvalue.png


--------------------------------------------------------------------------------
/slides/plots/trianglenbc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/trianglenbc.png


--------------------------------------------------------------------------------
/slides/plots/tvb&w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/tvb&w.png


--------------------------------------------------------------------------------
/slides/plots/tvcol.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BayesianTestsML/tutorial/0dbb29392052ea0ebc3d9a0922cb5052eb0e4e6d/slides/plots/tvcol.png


--------------------------------------------------------------------------------