├── mc.pdf
├── xmmpn.arf
├── xmmpn.rmf
├── source.pha
├── sim.xcm
├── LICENSE
├── groupall
├── sim-rand.xcm
├── fit.xcm
└── README.md


/mc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonVaughanDataAndCode/xspec/HEAD/mc.pdf


--------------------------------------------------------------------------------
/xmmpn.arf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonVaughanDataAndCode/xspec/HEAD/xmmpn.arf


--------------------------------------------------------------------------------
/xmmpn.rmf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonVaughanDataAndCode/xspec/HEAD/xmmpn.rmf


--------------------------------------------------------------------------------
/source.pha:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimonVaughanDataAndCode/xspec/HEAD/source.pha


--------------------------------------------------------------------------------
/sim.xcm:
--------------------------------------------------------------------------------
 1 | # Generate N fake spectra from the same model
 2 | # with no background file
 3 | 
 4 | # Return TCL results for XSPEC commands.
 5 | set xs_return_result 1
 6 | 
 7 | # Keep going until fit converges.
 8 | query yes
 9 | 
10 | # open plot device
11 |   cpd /xs
12 | 
13 | setplot en
14 | 
15 | # Define a procedure called FAKER which actually
16 | # fakes the data 
17 | proc faker {par1 par2} {
18 | fakeit none & xmmpn.rmf & xmmpn.arf  & y & sim & $par1 & $par2 & /* 
19 | }
20 | 
21 | # Loop through all data
22 | for {set i 1} {$i < 501} {incr i} {
23 | 
24 | # Set up the model.
25 |   model wabs*(powerlaw) & /*
26 |   newpar 1  0.123324     0.01  1.0E-4  1.0E-4  10     10    /*
27 |   newpar 2  2.66865      0.01  0       0       1E+24  1E+24 /*
28 |   newpar 3  1.861259E-04 1e-6  0       0       1      1     /*
29 |   show
30 | 
31 | # Fake the data (par1=filename par2=exposure time)
32 | faker sim_$i\.fak 5000
33 | 
34 | # Plot the fake
35 |   ignore **-0.3
36 |   ignore 10.0-**
37 |   setplot rebin 1000 25
38 |   plot ldata del
39 | 
40 | # Reset everything
41 |   data none
42 | }
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Simon Vaughan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/groupall:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 'Bash' script to group a whole bunch of spectra
 4 | # using GRPPHA. Input parameter is the root name of the
 5 | # spectral files. 
 6 | # Simon Vaughan, Leicester 2006
 7 | #
 8 | # Input files  <root name>_<i>.fak
 9 | # Output files <root name>_<i>/fak.g
10 | #
11 | # v1.0 -- 09/12/2004 -- Simon Vaughan
12 | # v1.1 -- 23/01/2006 -- fixed bug if <root name>_<i>/fak.g
13 | #                       already exists.
14 | 
15 | # Check command-line input
16 | 
17 | if [ -z "$1"  ] ; then
18 |   parm1="<root filename>"
19 |   echo Usage: $0 $parm1 
20 |   exit
21 | else
22 |   root=$1
23 | fi
24 | 
25 | # make sure FTOOLS are initialised
26 | 
27 | which grppha 2> ftools.nothere > ftools.here
28 | if [ ! -s ftools.here ] ; then
29 |  echo groupall: Make sure HEAsoft is initialised
30 |  exit $?
31 | fi
32 | 
33 | # Make a list of all the appropriate files
34 | 
35 | rm -f file.list
36 | ls -1  $root\_*.fak > file.list 2> errs.list
37 | 
38 | # Check if there are zero files
39 | 
40 | if [ ! -s file.list ] ; then
41 |  echo groupall: Missing files $root
42 |  exit $?
43 | fi
44 | 
45 | # Main loop: for each file...
46 | 
47 | n=0
48 | for fname in $( more file.list ); do
49 | 
50 |   let n=n+1
51 | 
52 | # Set output filename
53 | 
54 |   outp=$fname\.g
55 | 
56 | # remove file if it's already there
57 | 
58 |   if [ -e $outp ] ; then
59 |     rm -rf $outp
60 |   fi
61 | 
62 | # Let user know what's happening
63 | 
64 |   echo Grouping input file: $fname output file $outp 
65 | 
66 | # bin spectrum (until N >= 20 ct/bin)
67 | 
68 |     grppha infile=$fname outfile=$outp chatter=1 \
69 |      comm="group min 20 & exit" >& grppha.log
70 | 
71 | # end of loop
72 | 
73 | done
74 | 
75 | # Delete the listing files
76 | 
77 | rm -f file.list
78 | rm ftools.nothere
79 | rm ftools.here
80 | 
81 | # Finished
82 | 
83 | echo All Done
84 | exit 0
85 | 
86 | 


--------------------------------------------------------------------------------
/sim-rand.xcm:
--------------------------------------------------------------------------------
  1 | # Generate fake spectra from the model that
  2 | # is randomised based on the original data fit
  3 | # (with no background file)
  4 | # Simon Vaughan, Leicester (2006)
  5 | #
  6 | # Two part algorithm:
  7 | #  1. Load in data. Fit. 
  8 | #     Loop i=1,2,3,...,nn {
  9 | #       -  draw random parameter values from current fit
 10 | #       -  save to ASCII file }
 11 | #
 12 | #  2. Loop i=1,2,3,...,nn{
 13 | #       - Read in the ith parameter values 
 14 | #       - set current model to these values
 15 | #       - simulate data from this model }
 16 | #
 17 | #               v1.0 based on SIM.XCM
 18 | # 23/01/2006 -- v1.1 adapted for XSPEC v12
 19 | #                    modified FAKER procedure for v12
 20 | #                    added SIMPARS to randomise the model
 21 | # 24/01/2006 -- v1.2 removed "data none" command, causes errors
 22 | #                    due to memory leak in XSPEC v12.2.1
 23 | # 27/01/2006 -- v1.3 Moved all parameters to top of script
 24 | #                    Added parameters for Emin/Emax
 25 | # 25/04/2006 -- v1.4 Added line 'data none' before call to 'faker'
 26 | #                    to make sure the channel groupings are forgotten
 27 | #                    prior to generating the fake the spectrum. 
 28 | 
 29 | # Parameters: 
 30 | #  nn   = number of simulations
 31 | #  Emin = minimum energy to fit
 32 | #  Emax = maximum energy to fit
 33 | 
 34 |  set nn 10
 35 |  set Emin 0.5
 36 |  set Emax 10.0
 37 | 
 38 | # Open the file to put the results in.
 39 | 
 40 |   set fileid [open sim_result.dat w]
 41 | 
 42 | # Keep going until fit converges.
 43 | 
 44 |   query yes
 45 | 
 46 | # open plot device
 47 | 
 48 |   cpd /xs
 49 | 
 50 | # Define plotting details
 51 | 
 52 |   setplot en
 53 |   setplot command re x 0.4 10.0
 54 |   setplot command re y 1e-4 0.5 
 55 | 
 56 | # Define a procedure called FAKER which actually
 57 | # fakes the data 
 58 | 
 59 |   proc faker {fname Texp} {
 60 |     fakeit none & xmmpn.rmf & xmmpn.arf & y &  & ${fname} & ${Texp} & /* 
 61 |   }
 62 | 
 63 | # Load the ORIGINAL dataset
 64 | 
 65 |   data source.pha
 66 | 
 67 | # Find the exposure time
 68 | 
 69 |   tclout expos
 70 |   set Texp $xspec_tclout
 71 | 
 72 | # Ignore the low/high energies 
 73 | 
 74 |   ignore **-$Emin
 75 |   ignore $Emax-**
 76 |   ignore bad
 77 | 
 78 | # Set up the null hypothesis model.
 79 | 
 80 |   model wabs*(powerlaw) & /*
 81 |   newpar 1  0.123324     0.01  1.0E-4  1.0E-4  10     10    
 82 |   newpar 2  2.66865      0.01  0       0       1E+24  1E+24 
 83 |   newpar 3  1.861259E-04 1e-6  0       0       1      1     
 84 |   show
 85 | 
 86 | # run the FIT command to calculate covariance matrix
 87 | 
 88 |   fit
 89 | 
 90 | # plot the data
 91 | 
 92 |   setplot command la f Original data
 93 |   plot ld del
 94 | 
 95 | # -----------------------------------
 96 | # Loop through interations i = 1,2,3, ...,nn
 97 | 
 98 |   for {set i 1} {$i <= $nn} {incr i} {
 99 | 
100 | # draw at random new parameter values using
101 | # the covariance matrix to define the distribution
102 | 
103 |     tclout simpars
104 | 
105 | # save the randomised parameter values into the file
106 | 
107 |     puts $fileid "$xspec_tclout"
108 | 
109 | # end of this loop
110 | 
111 | }
112 | 
113 | # Close the file.
114 | 
115 |   close $fileid
116 | 
117 | # -----------------------------------
118 | # open the list of randomised parameters
119 | 
120 |   set fileid [open sim_result.dat r]
121 | 
122 | # Loop through interations i = 1,2,3, ...,nn
123 | 
124 |   for {set i 1} {$i <= $nn} {incr i} {
125 | 
126 | # read in one set of randomised parameters from file
127 | 
128 |     set parms [gets $fileid]
129 |     set par1 [lindex $parms 0]
130 |     set par2 [lindex $parms 1]
131 |     set par3 [lindex $parms 2]
132 | 
133 | # define a new model using randomised parameters
134 | 
135 |     newpar 1 $par1 
136 |     newpar 2 $par2 
137 |     newpar 3 $par3 
138 | 
139 | # show the randomised model
140 | 
141 |     show
142 | 
143 | # remove the data (to forget channel groupings)
144 | 
145 |     data none
146 | 
147 | # Fake the data (parameters: filename, exposure time)
148 | 
149 |     faker sim_$i\.fak $Texp
150 | 
151 | # Plot the fake data 
152 | 
153 |     ignore **-$Emin
154 |     ignore $Emax-**
155 |     setplot rebin 5 250
156 | 
157 |     setplot command la f Simulation number: $i
158 |     plot ldata del
159 | 
160 | # Reset everything
161 | 
162 |     data none
163 | 
164 | # end of this loop
165 | 
166 | }
167 | 
168 | # end of script
169 | 
170 | exit
171 | 
172 | 


--------------------------------------------------------------------------------
/fit.xcm:
--------------------------------------------------------------------------------
  1 | # Fit fake spectra with the same model and
  2 | # output results to ASCII file called 'fit_results.dat'
  3 | # Simon Vaughan, Leicester (2006)
  4 | #
  5 | # Data to be fitted are called sim_<i>.fak.g
  6 | # where <i> = 1, 2, 3, ..., nn
  7 | #
  8 | # Fits with two models - model 1 is simpler;
  9 | # model 2 is more complex - and outputs the
 10 | # fit statistic (chi-squared) for each fit
 11 | #
 12 | # 23/01/2006 -- v1.1 adapted for XSPEC v12
 13 | #               changed error command syntax
 14 | #               using only 1 error call per fit 
 15 | # 26/01/2006 -- v1.2 added check that DOF > 1
 16 | # 27/01/2006 -- v1.3 Moved all parameters to top of script
 17 | #                    Added parameters for Emin/Emax
 18 | # 31/01/2006 -- v1.4 Added routine to step through trial
 19 | #                    values of new parameter and use value
 20 | #                    giving min[chi^2] as first guess for fitting
 21 | # 02/02/2006 -- v1.5 Minor improvements. Set nE=100 which
 22 | #                    gives better results. Added check for
 23 | #                    non-monotonicity in proc 'shakefit'.
 24 | #                    Output F-test results for each spectrum.
 25 | # 05/06/2006 -- v1.6 Revised 'shakefit' procedure to check for
 26 | #                    parameters fitting hard limits and bail out
 27 | #                    after 100 iterations (prevent infinite loop).
 28 | #
 29 | # WARNING: Bug in XSPEC prior to v12.2.1w gives dodgy F-test probabilities!
 30 | # ----------------------------------------------------------
 31 | 
 32 | # Parameters: 
 33 | 
 34 |  set nn 10              ;#  nn   = number of simulations
 35 |  set Emin 0.5           ;#  Emin = minimum energy to fit
 36 |  set Emax 10.0          ;#  Emax = maximum energy to fit
 37 |  set nE 100             ;#  nE   = number of energies to step through
 38 | 
 39 | # ----------------------------------------------------------
 40 | # Define a TCL procedure to find minimum element of array
 41 | # input is 'list' output is the position of
 42 | # the minimum value of 'list'.
 43 | # Internally: $i is a counter
 44 | #             $value is the value of the ith element of list
 45 | #             $minval is the minimum value found so far
 46 | #             $minpos is the position of current minimum
 47 | 
 48 |   proc min { list } {
 49 |     set n [llength $list]
 50 |     set minpos 0
 51 |     set minval [lindex $list 0]
 52 |     for {set i 0} {$i < $n} {incr i} {
 53 |       set value [lindex $list $i]
 54 |       if {$value < $minval} {
 55 |         set minval $value
 56 |         set minpos $i
 57 |       }
 58 |     }
 59 |     return $minpos
 60 |   }
 61 | 
 62 | # ----------------------------------------------------------
 63 | # Define a TCL procedure to refine fitting results
 64 | # by repeated use of 'error' and 'fit'
 65 | # The main loop runs over all parameters. For each free
 66 | # parameter perform at least one 'error' command to
 67 | # 'shake' it out of local minima. Keep fitting the parameter
 68 | # until 'error' does not find a new minimum.
 69 | # Finish once all free parameters have been shaken.
 70 | #
 71 | # $erroout comprises nine T/F flags
 72 | # If the first flag is TRUE then a new minimum was
 73 | # found during the last error command
 74 | #
 75 | # error stopat <nn> <tol> max <max-chi> <del-chi> <par>
 76 | 
 77 |   proc shakefit {} {
 78 |     tclout modpar                         ;# find number of parameters
 79 |     set nopar $xspec_tclout
 80 |     for {set j 1} {$j <= $nopar} {incr j} {
 81 |        tclout param $j
 82 |        set pdel [lindex $xspec_tclout 1]   ;# get parameter delta
 83 |        if {$pdel < 0} continue             ;# if frozen goto next param
 84 |        set doerror 1
 85 |        set delchi 2.706                    ;# delta-chi^2 to probe
 86 |        set counter 0
 87 |        while {$doerror == 1 && $counter < 100} {
 88 |           incr counter
 89 |           error stopat 10 0.1 max 50.0 $delchi $j
 90 |           tclout error $j
 91 |           set errout [lindex $xspec_tclout 2]
 92 |           if {[string match ???T????? $errout] || [string match ????T???? $errout]} {
 93 |             set doerror 0                ;# Hit lower/upper limits
 94 |           }
 95 |           if [string match F???????? $errout] {
 96 |              set doerror 0                ;# Not found better fit
 97 |           } else {
 98 |              fit 100 0.01                 ;# Found better fit
 99 |              if [string match ?T??????? $errout] {
100 |                 set delchi [expr $delchi + 2] ;# increase if non-monotonic
101 |              }                            ;# End IF (?F)
102 |           }                               ;# End IF (F?)
103 |        }                                  ;# End WHILE
104 |     }                                     ;# End FOR
105 |   }                                       ;# End PROC
106 | 
107 | # ----------------------------------------------------------
108 | 
109 |   query no
110 | 
111 | # fitting method: level/migrad
112 | 
113 |   method leven
114 | 
115 | # open plot device
116 | 
117 |   cpd /xs
118 | 
119 | # Define plotting details
120 | 
121 |   setplot energy
122 |   setplot add
123 |   setplot command re x 0.4 5.0
124 |   setplot command re y 1e-4 0.5 
125 | 
126 | # Open the file to put the results in.
127 | 
128 |   set fileout [open fit_result.dat w]
129 | 
130 | # ----------------------------------------------------------
131 | # Loop through all data from 
132 |   
133 |   for {set i 1} {$i <= $nn} {incr i} {
134 | 
135 | # load the grouped spectral file 
136 | # called sim_<i>.fak.g (where i=1,2,...,nn)
137 | 
138 |     data sim_$i.fak.g
139 | 
140 | # Ignore the low/high energies - exactly as real data
141 | 
142 |     ignore **-$Emin
143 |     ignore $Emax-**
144 |     ignore bad
145 | 
146 | # Set up the initial null hypothesis (simple) model.
147 | 
148 |     model wabs*(powerlaw) & /*
149 |     newpar 1  0.123324     0.01  1.0E-4  1.0E-4  10     10    
150 |     newpar 2  2.66865      0.01  0       0       1E+24  1E+24 
151 |     newpar 3  1.861259E-04 1e-6  0       0       1      1     
152 | 
153 | # Check there are enough degrees of freedom to fit
154 | 
155 |   tclout dof
156 |   set tdof [lindex $xspec_tclout 0]
157 |   if {$tdof < 3} {
158 |     puts "** Not enough degrees of freedom"
159 |     continue
160 |   }
161 | 
162 | # Fit it to the model.
163 | 
164 |     fit 100 0.01
165 | 
166 | # Make sure there's no better minimum (using 'shakefit' procedure)
167 | 
168 |     shakefit        
169 | 
170 | # Get the fit statistic and DOF 
171 | 
172 |     tclout stat 
173 |     set chi1 $xspec_tclout
174 |     tclout dof 
175 |     set dof1 [lindex $xspec_tclout 0]
176 | 
177 | # Plot the final fit
178 | 
179 |     setplot command la t Model 1 spectrum: $i
180 |     setplot command la f chi-squared $chi1 / $dof1 dof
181 |     plot ldata
182 | 
183 | # ----------------------------------------------------------
184 | # ----------- INSERT BB ------------------------------------
185 | # add the extra component being tested for (e.g. BB)
186 | # using EDITMOD command to insert the new component
187 | #
188 | #    editmod wabs*(powerlaw+bb) & /*
189 | #    newpar 4  0.222110     0.01  0       0       1      1     
190 | #    newpar 5  2.634640E-06 1e-7  0       0       1      1     
191 | #    show
192 | 
193 | # ----------- INSERT LINE AT LARGEST DELTA-CHI^2 -----------
194 | 
195 | # Find lowest/highest energy data actually used
196 | # nchan = number of channels; E/dE = binenergy/width
197 | 
198 |   tclout dof                                    
199 |   set nchan [expr [lindex $xspec_tclout 1] - 1]
200 |   tclout plot ldata x
201 |   set E $xspec_tclout                           
202 |   tclout plot ldata xerr 
203 |   set dE $xspec_tclout                          
204 |   set Ehi [expr [lindex $E $nchan] + [lindex $dE $nchan]]
205 |   set nchan 0
206 |   set Elo [expr [lindex $E $nchan] - [lindex $dE $nchan]]
207 | 
208 | # Insert zero-flux line in spectral model
209 | 
210 |     editmod wabs*(powerlaw+gaussian) & /*
211 |     newpar 4,$Elo,0.01,$Emin,$Elo,$Ehi,$Emax
212 |     newpar 5,0.00,0.01 0.0 0.0 1.0 1.0
213 |     newpar 6,0.00,1e-7 0.0 0.0 1.0 1.0
214 |     freeze 5                ;# Ensure line stays narrow
215 | 
216 | # Step through nE energies finding chi-squared at each one
217 | # use logarithmic energy steps from lowest to highest energy
218 | # resetting model each step ('best' rather than 'current')
219 | 
220 |     steppar best log 4 $Elo $Ehi $nE
221 | 
222 | # put steppar output into lists
223 | 
224 |     tclout steppar statistic    ;# chi-squared values
225 |     set chisq $xspec_tclout
226 |     tclout steppar 4            ;# corresponding parameter values
227 |     set trialE $xspec_tclout
228 | 
229 | # Put line at energy that gave minimum chi-squared
230 | 
231 |     set minindex [min $chisq]
232 |     set Epeak [lindex $trialE $minindex]
233 |     newpar 4,$Epeak,0.01,$Emin,$Emin,$Emax,$Emax
234 |     newpar 6,0.00,1e-7 0.0 0.0 1.0 1.0
235 | 
236 | # Before fitting, adjust line norm at current energy
237 | 
238 |     freeze 4
239 |     fit 100 0.01
240 |     thaw 4
241 | 
242 | # Now fit it to the model to improve energy and norm
243 | 
244 |     fit 100 0.01
245 | 
246 | # Make sure there's no better minimum (using 'shakefit' procedure)
247 | 
248 |     shakefit        
249 | 
250 | # Get the fit statistic and DOF 
251 | 
252 |     tclout stat 
253 |     set chi2 $xspec_tclout
254 |     tclout dof 
255 |     set dof2 [lindex $xspec_tclout 0]
256 | 
257 | # Plot the final fit
258 | 
259 |     setplot command la t Model 2 spectrum: $i
260 |     setplot command la f chi-squared $chi2 / $dof2 dof
261 |     plot ldata
262 | 
263 | # Perform an F-test (for the hell of it!)
264 | 
265 |     ftest $chi2 $dof2 $chi1 $dof1
266 |     tclout ftest
267 |     set fprob $xspec_tclout
268 | 
269 | # Put the chi-square and DOF of each fit into file
270 | 
271 |     puts $fileout "$i $chi1 $dof1 $chi2 $dof2 $fprob"
272 | 
273 | #   Reset everything
274 | 
275 |     data none
276 |     model none
277 | 
278 | # end of loop
279 | 
280 | }
281 | # ----------------------------------------------------------
282 | 
283 | # Close the file.
284 | 
285 |   close $fileout
286 | 
287 | # end of script
288 | 
289 |   exit
290 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # xspec
  2 | This page gives XSPEC (and shell) scripts for performing Monte Carlo tests within XSPEC. 
  3 | 
  4 | * source.pha - The "real" dataset for the worked example
  5 | * xmmpn.rmf - The response matrix for the data
  6 | * xmmpn.arf - The ancillary response file for the data
  7 | * sim-rand.xcm - XSPEC script to produce simulated spectra
  8 | * groupall - Bash script to run GRPPHA over all simulated spectra
  9 | * fit.xcm - XSPEC script to fit all (grouped) simulated spectra
 10 | 
 11 | Read the [mc.pdf](mc.pdf) file for my (circa 2006) explanation.
 12 | 
 13 | ## Warning
 14 | 
 15 | These scripts were written 2005-2006, tested using XSPEC v12.2.1ao running under Scientific Linux 4.2. They have not been maintained since then. I offer no guarantee they will work on other systems.
 16 | 
 17 | ## Referencing the scripts
 18 | 
 19 | If you make use of this code in your work, please do cite the following paper for which these scripts were originally developed.
 20 | 
 21 | [Hurkett, C., Vaughan S., et al. 2008, ApJ, v679, p587](http://adsabs.harvard.edu/abs/2008ApJ...679..587H)
 22 | 
 23 | ## Explanation
 24 | 
 25 | ### Monte Carlo methods
 26 | 
 27 | Monte Carlo methods use random (or quasi-random) data to solve
 28 | problems. In the context of hypothesis testing, one 
 29 | generates randomised data based on the null hypothesis model,
 30 | taking care to make the fake data as realistic as possible, 
 31 | and uses them as a 'control' sample with which to calibrate the
 32 | test statistic. In the source detection example one would simulate
 33 | a large ensemble of images, by randomising the background level,
 34 | and measure the flux at the source position in each one. 
 35 | The frequency distribution (histogram) of the fluxes produced from
 36 | the fake data 
 37 | is a Monte Carlo estimate of the reference PDF.
 38 | As well as making sure the data are simulated as accurately as
 39 | possible one must also simulate a large number of datasets 
 40 | so that the histogram of the test statistics from the simulations
 41 | converges on the true PDF. Even if there is 
 42 | no analytical expression for the reference distribution, it is
 43 | always possible to find it using the Monte Carlo method so long
 44 | as one can simulate a sufficient number of (realistic) fake data.
 45 | 
 46 | Monte Carlo methods are extremely powerful and conceptually simple.
 47 | The drawback is that they may require a large amount of computer
 48 | processing time to generate and analyse a large quantity of simulated
 49 | data. 
 50 | 
 51 | ### Application to the F-test
 52 | 
 53 | Maybe you are trying to test for an emission line in a spectrum;
 54 | adding the line to the model improves the fit a bit, but you don't
 55 | know whether the improvement should be considered significant or
 56 | not. 
 57 | You could use the F-test but one of the
 58 | assumptions behind the F-test is not valid in this case [1]. Normally you
 59 | would measure the F-statistic and compare this with 
 60 | a reference distribution
 61 | -- this tells you how unexpected your value of
 62 | F is. (In this case the reference distribution for the F-test is the
 63 | Fisher-Snedecor distribution.) 
 64 | The reference distribution gives you a probability, or p-value, for
 65 | the given value of F. 
 66 | If the probability is small (let's say
 67 | p=0.001) then you conclude this result is unlikely to have occurred by
 68 | chance, so it must be a significant detection (people often quote this
 69 | by inverting the false alarm probability: 100*[1-p] = 99.9 per
 70 | cent confidence). But, as we just said, the case of adding a line 
 71 | violates one of the fundamental assumptions behind the F-test and so
 72 | you cannot use the textbook reference 
 73 | distribution to go from an F-value (what you measure from the data) to
 74 | a p-value (how significant it is). But we can solve the problem
 75 | using a Monte Carlo approach.
 76 | 
 77 | ### An overview of the method
 78 | 
 79 | The general idea is as follows. First you need to define exactly what
 80 | it is you want to test. If you want a clear answer you need a clear
 81 | question! In the case of line detection, perhaps you are comparing
 82 | a power law to a power law plus an emission line. 
 83 | The null
 84 | hypothesis is that the simpler of these two models is true --
 85 | in this example the
 86 | null hypothesis is that the spectrum is just a power law. The
 87 | alternative hypothesis is that the spectrum is a power law plus an
 88 | emission line. The way you go about making a hypothesis test is to
 89 | measure some test statistic from the data. Maybe you used the F-test
 90 | and measured an F-value. (The F-value comes from the decrease in
 91 | chi-square when you add a line to the model, and the number of
 92 | degrees of freedom.) But you don't know the reference distribution to
 93 | turn this into a probability. What you need to do is make a large
 94 | batch of fake data for which the null hypothesis is true, and measure
 95 | the same test statistic for each of the fake datasets. So you make a
 96 | fake dataset, measure the test statistic, make another one, measure
 97 | the test statistic, etc. etc. If you keep doing this you will build up
 98 | the distribution of the test statistic assuming the null hypothesis is
 99 | true (because all your fake data are produced using the null
100 | hypothesis). In the 'power law vs. power law plus line' example what
101 | we do is simulate a spectrum of a power law, then fit the data using a
102 | power law with and without a line, and then measure the F-value. Over
103 | and over again. As you perform more simulations you build up a clearer
104 | picture of the distribution of F-values (if the null hypothesis is
105 | true). 
106 | 
107 | You can then
108 | ask the question: how many simulations show a larger test
109 | statistic (e.g. F-value) than the one I got for my real data? Did
110 | the value I got 
111 | for my test statistic appear in many of the simulations or only
112 | very rarely? Maybe the F-value was 6.71 from the real data. And when
113 | we ran the 1,000 simulations we found only 3 out of the 1,000 had a
114 | value bigger than this.
115 | We could conclude there is a 3/1000 chance of getting an F-value like
116 | the one observed if the null hypothesis is true. 
117 | This is the false alarm probability, and since it is quite small 
118 | we may interpret it as indicating the
119 | null hypothesis is false, and we therefore favour the
120 | alternative hypothesis. 
121 | In other words we could say the line is
122 | detected at 99.7 per cent confidence (because 997/1000 simulations
123 | showed a smaller F-value). 
124 | 
125 | ### Outline of a simple MC method
126 | 
127 | A simple Monte Carlo significance test works along the following lines:
128 | 
129 | 1. Define the null and alternative hypotheses 
130 | 
131 | 2. Choose a test statistic: call it T
132 | 
133 | 3. Measure the test statistic of the real data: call it T_0
134 | 
135 | 4. Definite loop. For each i=1,2,...,N:
136 | 
137 |    a. Produce a simulated data set: D_i
138 |    
139 |    b. Measure the test statistic from the simulated data: T_i
140 | 
141 | 5. Calculate where T_0 falls in the distribution of T_i
142 | 
143 | The p-value is fraction of the T_i values that exceed the measured
144 | T_0 value: p = n[ T_i >= T_0 ]/N. Inverting this the significance
145 | is 1-p = n[ T_0 > T_i ]/N. Ensure
146 | N is large or this will not be a very accurate representation (the
147 | error on the p value is sqrt[p(1-p)/N], which comes from the
148 | binomial formula).
149 | 
150 | It is vital that you make a *fair* measurement of the
151 | test statistic from the simulated data -- you must be careful not to
152 | bias this measurement 
153 | based on your prior experience of the real data. 
154 | What ever you did to the real data, you must also do to the 
155 | simulated ('control') data, otherwise you are not performing 
156 | a fair like-for-like test.
157 | 
158 | ### Script files for XSPEC
159 | 
160 | That's the theory dealt with. Now for a worked example of
161 | running a Monte Carlo test using XSPEC.
162 | 
163 | XSPEC will allow you to run a sequence of commands from a script. This
164 | means you can automate the process of generating and fitting a
165 | large sequence of fake data. If you are trying to examine 1,000 spectral
166 | simulations this really is the only way to do it. Basically all you do
167 | is write the appropriate commands into an XSPEC script file '\*.xcm'
168 | and then you can run it from within XSPEC. XSPEC uses
169 | TCL (Tool Command Language) to control its operation, so you can also
170 | add basic 
171 | control structures (like loops) and input/output commands to your
172 | script. In this way an XSPEC script with a few TCL commands is
173 | quite a powerful tool.
174 | 
175 | A slight technical problem is that you may need more than just
176 | XSPEC. If your real data were grouped have have 20 counts per bin,
177 | then you must do this to your fake data too. But this needs
178 | to be done by GRPPHA -- outside of XSPEC. What I do is break down the
179 | process into a set of basic tasks, each of which has its own script. 
180 | 
181 | 1. I use one
182 | XSPEC script to produce N fake datasets. 
183 | 2. Then I use a shell script
184 | (from the UNIX command line, outside of XSPEC) to run GRPPHA on each
185 | of the fake data files. 
186 | 3. Then I have another XSPEC script to load each
187 | of the (now grouped!) fake data files in turn, fit them, and save the
188 | results to a text file. 
189 | 4. Then I use an R script, or a Fortran program
190 | (or whatever) to examine the results and see how the simulated
191 | distribution of the test statistic compares to the 'real' number.  
192 | 
193 | There are a number of ways to run an XSPEC script like this. One is
194 | to simply use the '@' symbol, like you would a normal XSPEC '\*.xcm' file.
195 | ```
196 | xspec>  @script.xcm 
197 | ```
198 | A better way is to use the following from the UNIX command line
199 | ```
200 | unix>  xspec - script.xcm 
201 | ```
202 | This will start XSPEC and run the script. If you end the script 
203 | with an *exit* command it will then return you to the UNIX command line.
204 | 
205 | 
206 | 
207 | [1]: There are two conditions that must be satisfied for the F-test to
208 | follow its expected theoretical reference distribution. These are that
209 | the two models being compared are *nested*, and that the null
210 | values of the additional parameters are not on the boundary of
211 | possible parameter space. This second condition is violated when
212 | testing for a line (or any other additive component) because the null
213 | value of one of the new parameters (normalisation) is zero, which is
214 | the boundary of the parameter space. 
215 | You should also have enough counts per bin to be able to use chi-square
216 | properly as well. (Or use direct maximum likelihood fitting.)
217 | See [Protassov et al. (2002)](http://adsabs.harvard.edu/abs/2002ApJ...571..545P).
218 | 


--------------------------------------------------------------------------------