├── .Rbuildignore
├── .gitattribrutes
├── .gitignore
├── .travis.yml
├── 01-introduction.Rmd
├── 02-SimpleWorld.Rmd
├── 03-what-is-smsim.Rmd
├── 04-data-prep.Rmd
├── 05-smsim-in-R.R
├── 05-smsim-in-R.Rmd
├── 06-alternative-approaches.Rmd
├── 07-CakeMap.Rmd
├── 08-validation.Rmd
├── 09-no-microdata.Rmd
├── 10-household-allocation.Rmd
├── 11-Tresis_chapter.Rmd
├── 12-smsim-for-abm.Rmd
├── 13-appendix.Rmd
├── 14-glossary.Rmd
├── 15-references.Rmd
├── DESCRIPTION
├── LICENSE
├── Makefile
├── NAMESPACE
├── NetLogo
    ├── NLv01.R
    ├── SimpleWorldVersion1.nlogo
    ├── SimpleWorldVersion2.nlogo
    ├── SimpleWorldVersion3.nlogo
    ├── SimpleWorldVersion4.nlogo
    ├── agents.csv
    ├── ints_df.RData
    └── multiSimRun.R
├── README.Rmd
├── README.md
├── additional-material
    ├── 13-additional.Rmd
    ├── input-data-mipfp.csv
    ├── input-data-mipfp.csv.ods
    └── reformatting-mipfp-example.Rmd
├── backup-code
    ├── .Rapp.history
    ├── CakeMap-dataknut.R
    └── tests.R
├── bibliography.bib
├── build.R
├── cache-data-prep.RData
├── cache-smsim-in-R.RData
├── code
    ├── CakeMap.R
    ├── CakeMapInts.R
    ├── CakeMapMipfpCon1Convert.R
    ├── CakeMapMipfpData.R
    ├── CakeMapPlot.R
    ├── CakeMapTimeAnalysis.R
    ├── CakeMapWithMipfp.R
    ├── ConvertIpfpWeights.R
    ├── SimpleWorld.R
    ├── bbuild.R
    ├── book-functions.R
    ├── build-CRC-version.R
    ├── functions.R
    ├── gregwt.R
    ├── ipfpMultiDim.R
    ├── optim-cakeMap.R
    ├── optim-tests-SimpleWorld.R
    └── parallel-ipfp.R
├── courses
    └── course-info-3day.Rmd
├── data
    ├── Belgium
    │   ├── BelgiqueConting.txt
    │   ├── ContrainteAge.txt
    │   ├── ContrainteDipl.txt
    │   ├── ContrainteGenre.txt
    │   ├── ContrainteStatut.txt
    │   ├── HH_cons_INS92094
    │   └── HH_sample
    ├── CakeMap
    │   ├── age-sex-raw.csv
    │   ├── area-cat.R
    │   ├── cars-raw.csv
    │   ├── categorise.R
    │   ├── cons.csv
    │   ├── inc-est-2001.csv
    │   ├── ind.csv
    │   ├── load-all.R
    │   ├── nssec-raw.csv
    │   ├── process-age.R
    │   ├── process-car.R
    │   ├── process-nssec.R
    │   └── wards.RData
    └── SimpleWorld
    │   ├── age.csv
    │   ├── ind-full.csv
    │   ├── ind.csv
    │   └── sex.csv
├── deploy.sh
├── elsevier-harvard.csl
├── figures
    ├── Belgium
    │   ├── BadSize.png
    │   ├── CM_ENF.png
    │   ├── Couples.png
    │   ├── NonAssigne.png
    │   ├── diplome.png
    │   ├── diplome_statut.png
    │   └── statut.png
    ├── CakeMap-lores.png
    ├── Couple_SE.png
    ├── HH-CO.png
    ├── HHCouplesBelgium.png
    ├── HHCouplesNamur.jpg
    ├── IllustrationCouples.png
    ├── Jojo.png
    ├── Jojo_JASS.png
    ├── Jojo_JASS2.png
    ├── RandomUnif100000.png
    ├── TAEOptim_GenSA_Mo.pdf
    ├── TAEOptim_GenSA_Mo.png
    ├── TRESISModels.png
    ├── TimeCakeMap.png
    ├── TimeOptim_GenSA_Mo.pdf
    ├── TimeOptim_GenSA_Mo.png
    ├── Trafic_Jojo2.png
    ├── Trafic_jojo.png
    ├── agri-example-hynes-2008.png
    ├── austerity.png
    ├── co-vs-ipf-schema.png
    ├── cover-image.jpg
    ├── fit-obs-sim-simple-5.png
    ├── fsimple1.png
    ├── history01.png
    ├── incomeCake.png
    ├── integerisation-algorithms.png
    ├── jtg.png
    ├── msim-flow.png
    ├── msim-schema.png
    ├── nl-chooser.png
    ├── nl-graphics-window.png
    ├── nl-income-boxplots.png
    ├── nl-plots.png
    ├── nl-simpleworld-negotiating.png
    ├── nl-simpleworld-populated.png
    ├── nl-simpleworld.png
    ├── nl-sliders.png
    ├── nl-ticks.png
    ├── nl-zones.png
    ├── optim-its.png
    ├── optim-time.png
    ├── raw-data-screenshot.jpeg
    ├── rstudio-autocomplete.png
    ├── rstudio-environment.png
    ├── simPop-results-eg.png
    ├── simpleworld-1.png
    ├── studio-basic.png
    ├── vingtile.png
    └── why-msim-maup.png
├── fractional_weights
    ├── BA-MakeCakeSimFractional.R
    ├── BA-process-final_micro_fractional_cakes_geo.do
    ├── README.md
    └── cakes_geo.csv
├── frontmatter
    ├── pream.tex
    └── preface.tex
├── index.Rmd
├── krantz.cls
├── notes
    ├── BA-notes.md
    ├── L1.Rmd
    ├── mipfp-notes.R
    ├── seville-notes.R
    └── simPop-notes.R
├── output
    ├── .gitignore
    └── ints_df.csv
├── slides
    ├── Applying-IPF-and-CO.Rmd
    ├── SM-for-ABM.Rmd
    ├── SM-without-microdata.Rmd
    ├── introduction.Rmd
    ├── r-rstudio-practical.Rmd
    ├── simpop-intro.Rmd
    └── spatial-microdata-in-r.Rmd
├── sms-book-citation.bib
├── spatial-microsim-book.Rproj
└── www
    ├── .gitignore
    ├── bootstrap-theme.min.css
    ├── bootstrap.min.css
    ├── bootstrap.min.js
    ├── glyphicons-halflings-white.png
    ├── glyphicons-halflings.png
    ├── highlight.css
    └── toc.js


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitattribrutes:
--------------------------------------------------------------------------------
1 | README.md merge=ours
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | spatial-microsim-book*
 2 | backup-code/
 3 | figures/CakeMap.png
 4 | *.tex
 5 | *.aux
 6 | *.orig
 7 | *.docx
 8 | .Rproj.user
 9 | *.pdf
10 | .history
11 | *.backup
12 | *.md~
13 | *.toc
14 | *.kilepr
15 | .Rhistory
16 | .RData
17 | *.gz
18 | ggmap*
19 | .Rproj.user
20 | *.cls
21 | book/
22 | _site/
23 | temp.*
24 | book.Rmd
25 | *.html
26 | drafts/
27 | .dropbox
28 | *.out
29 | comments.ods
30 | cache-CakeMap.RData
31 | .~lock.book.docx#
32 | desktop.ini
33 | *.log
34 | book.odt
35 | frontmatter/rough-drafts/stackXquestion.R
36 | *.bak
37 | master.zip
38 | BEL_adm4.rds
39 | libs
40 | _book
41 | _bookdown_files
42 | _main*
43 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | dist: trusty
 4 | sudo: required
 5 | warnings_are_errors: false
 6 | before_install:
 7 |   - sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable --yes
 8 |   - sudo add-apt-repository ppa:opencpu/jq --yes
 9 |   - sudo apt-get --yes --force-yes update -qq
10 |   - sudo apt-get install --yes libudunits2-dev libproj-dev libgeos-dev libgdal-dev libv8-dev
11 |   - sudo apt-get install --yes libjq-dev libprotobuf-dev libprotoc-dev protobuf-compiler
12 | 
13 | r_packages:
14 |   - rgdal
15 |   - sf
16 |   - devtools
17 | 
18 | script:
19 |   - R CMD INSTALL ../spatial-microsim-book/
20 |   - Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::gitbook", clean = FALSE)'
21 |   
22 | after_success:
23 |   - test $TRAVIS_PULL_REQUEST == "false" && test $TRAVIS_BRANCH == "master" && bash deploy.sh
24 | 
25 | notifications:
26 |   email:
27 |     on_success: change
28 |     on_failure: change
29 | 


--------------------------------------------------------------------------------
/13-appendix.Rmd:
--------------------------------------------------------------------------------
  1 | # Appendix: Getting up-to-speed with R {#apR}
  2 | 
  3 | As mentioned in Chapter 1, R is a general purpose programming
  4 | language focussed on data analysis and modelling.  This small tutorial aims to
  5 | teach the basics of R, from the perspective of spatial microsimulation research.
  6 | It should also be useful to people with existing R skills, to re-affirm their
  7 | knowledge base and see how it is applicable to spatial microsimulation.
  8 | 
  9 | R's design is built on the idea that everything that exists is an object and everything
 10 | that happens is a function. It is a *vectorised*, *object orientated* and
 11 | *functional* programming language (Wickham 2014). This means that R
 12 | understands vector algebra, all data accessible to R resides in a number of
 13 | named objects and functions must be used to modify objects. We will
 14 | look at each of these in some code below.
 15 | 
 16 | ## R understands vector algebra {#vector-alg}
 17 | 
 18 | A vector is simply an ordered list of numbers (Beezer 2008).
 19 | Imagine two vectors, each consisting of 3 elements:
 20 | 
 21 | $$a = (1,2,3); b = (9,8,6) $$
 22 | 
 23 | To say that R understands vector algebra is to say that it knows how to
 24 | handle vectors in the same way a mathematician does: 
 25 | 
 26 | $$a + b = (a_1 + b_1, a_2 + b_2, c_3 + c_3  ) = (10,10,9) $$
 27 | 
 28 | This may not seem remarkable, but it is. Most programming
 29 | languages are not vectorised, so they would see $a + b$ differently.
 30 | In Python, for example, this is the answer we get:^[We can
 31 | get the right answer in Python, by typing the following:
 32 | `import numpy; a=numpy.array([1,2,3]); b=numpy.array([9,8,6]); a+b`.]
 33 | 
 34 | ```{r, engine='python', eval=FALSE}
 35 | a = [1,2,3]
 36 | b = [9,8,6]
 37 | print(a + b)
 38 | ```
 39 | 
 40 | `## [1, 2, 3, 9, 8, 6]`
 41 | 
 42 | In R, the operation *just works*, intuitively:
 43 | 
 44 | ```{r}
 45 | a <- c(1, 2, 3)
 46 | b <- c(9, 8, 6)
 47 | a + b
 48 | ```
 49 | 
 50 | This conciseness is clearly very useful in spatial microsimulation, as numeric
 51 | variables of the same length are common (e.g. the attributes of individuals in a
 52 | zone) and can be acted on with a minimum of effort.
 53 | 
 54 | ## R is object orientated {#R-object}
 55 | 
 56 | In R, everything that exists is an object with a name and a class. This is
 57 | useful, because R's functions know automatically how to behave differently on
 58 | different objects depending on their class.
 59 | 
 60 | To illustrate the point, let's create two objects, each with a different class
 61 | and see how the function `summarise` behaves differently, depending on the type.
 62 | This behaviour is *polymorphism*  [@Matloff2011]:
 63 | 
 64 | ```{r}
 65 | # Create a character and a vector object
 66 | char_obj <- c("red", "blue", "red", "green")
 67 | num_obj <- c(1, 4, 2, 532.1)
 68 | 
 69 | # Summary of each object
 70 | summary(char_obj)
 71 | summary(num_obj)
 72 | 
 73 | # Summary of a factor object
 74 | fac_obj <- factor(char_obj)
 75 | summary(fac_obj)
 76 | ```
 77 | 
 78 | In the example above, the output from `summary` for the numeric object `num_obj`
 79 | was very different from that of the character vector `char_obj`. Note that
 80 | although the same information was contained in `fac_obj` (a factor), the output
 81 | from `summary` changes again.
 82 | 
 83 | Note that objects can be called almost anything in R with the exceptions of
 84 | names beginning with a number or containing operator symbols such as `-`, `^`
 85 | and brackets. It is good practice to think about what the purpose of an object
 86 | is before naming it: using clear and concise names can save you a huge amount of
 87 | time in the long run.
 88 | 
 89 | 
 90 | ## Subsetting in R {#subsetting}
 91 | 
 92 | R has powerful, concise and (over time) intuitive methods for taking subsets of
 93 | data. Using the SimpleWorld example we loaded in *Data preparation*,
 94 | let's explore the `ind` object in more detail, to see
 95 | how we can select the parts of an object we are most interested in. As before,
 96 | we need to load the data:
 97 | 
 98 | ```{r}
 99 | ind <- read.csv("data/SimpleWorld/ind.csv") 
100 | ```
101 | 
102 | Now, it is easy from within R to call a single individual (e.g. individual 3)
103 | using the square bracket notation:
104 | 
105 | ```{r}
106 | ind[3,]
107 | ```
108 | 
109 | The above example takes a subset of `ind` all elements present on the 3rd row:
110 | for a 2 dimensional table, anything to the left of the comma refers to rows and
111 | anything to the right refers to columns. Note that `ind[2:3,]` and
112 | `ind[c(3,5),]` also take subsets of the `ind` object: the square brackets can
113 | take *vector* inputs as well as single numbers.
114 | 
115 | We can also subset by columns: the second dimension. Confusingly, this can be
116 | done in four ways, because `ind` is an R `data.frame`^[This can be ascertained
117 | by typing `class(ind)`. It is useful to know the class of different R objects,
118 | so make good use of the `class()` function.] and a data frame can behave
119 | simultaneously as a list, a matrix and a data frame (only the results of the
120 | first are shown):
121 | 
122 | ```{r}
123 | ind$age # data.frame column name notation I
124 | # ind[, 2] # matrix notation
125 | # ind["age"] # column name notation II
126 | # ind[[2]] # list notation
127 | # ind[2] # numeric data frame notation
128 | ```
129 | 
130 | It is also possible to subset cells by both rows and columns simultaneously.
131 | Let us select query the gender of the 4th individual, as an example
132 | (pay attention to the relative location of the comma inside the square brackets):
133 | 
134 | ```{r}
135 | ind[4, 3] # The attribute of the 4th individual in column 3
136 | ```
137 | 
138 | A commonly used trick in R that helps with the analysis of individual level data
139 | is to subset a data frame based on one or more of its variables. Let's subset
140 | first all females in our dataset and then all females over 50:
141 | 
142 | ```{r}
143 | ind[ind$sex == "f", ]
144 | ind[ind$sex == "f" & ind$age > 50, ]
145 | ```
146 | 
147 | In the above code, R uses relational operators of equality (`==`) and inequality
148 | (`>`) which can be used in combination using the `&` symbol. This works because,
149 | as well as integer numbers, one can also place *boolean* variables into square
150 | brackets: `ind$sex == "f"` returns a binary vector consisting solely of `TRUE`
151 | and `FALSE` values.^[Thus, yet another way to invoke the 2nd column of `ind` is
152 | the following: `ind[c(F, T, F)]`! Here, `T` and `F` are shorthand for "TRUE" and
153 | "FALSE" respectively.] 
154 | 
155 | ## Further R resources {#further}
156 | 
157 | The above tutorial should provide a sufficient grounding in R for beginners to
158 | understand the practical examples in the book.  However, R is a deep language
159 | and there is much else to learn that will be of benefit to your modelling
160 | skills. There are many excellent books and tutorials that teach the fundamentals
161 | of R for a variety of applications.
162 | The following resources, in ascending order of difficulty,
163 | are highly recommended:
164 | 
165 | - *Introduction to visualising spatial data in R* (Lovelace and Cheshire 2014)
166 | provides an introductory tutorial on handling spatial data in R, including the
167 | administrative zone data which often form the building blocks of spatial microsimulation
168 | models in R.
169 | - *Introduction to scientific programming and simulation using R*
170 | (Jones et al. 2014) is an
171 | accessible and highly practical course that will form a solid foundation
172 | for a range of modelling applications, including spatial microsimulation.
173 | - *An Introduction to R* (Venables et al. 2014)
174 | is the foundational introductory R manual, written by the
175 | software's core developers and is available on-line for free.
176 | It is terse and covers some advanced topics, but
177 | provides a useful reference on the fundamentals of R as a language.
178 | - *Advanced R* 
179 | (Wickham 2014) (http://www.crcpress.com/product/isbn/9781466586963)
180 | delves into the heart
181 | of the R language. It contains many advanced topics, but the introductory
182 | chapters are straightforward. Browsing some of the pages on
183 | Advanced R's website (http://adv-r.had.co.nz/) and
184 | trying to answer the questions that open each chapter
185 | provides a taste of the book and an excellent
186 | way of testing and improving one's understanding of the R language.
187 | 
188 | ```{r, echo=F}
189 | # There are alternatives to R and in the next section we will consider a few of these.
190 | ```
191 | 


--------------------------------------------------------------------------------
/14-glossary.Rmd:
--------------------------------------------------------------------------------
  1 | # Glossary
  2 | 
  3 | -   **Algorithm**: a series of computer commands executed in a
  4 |     specific order for a pre-defined purpose.
  5 |     Algorithms process input data and produce outputs.
  6 |     
  7 | -   **Constraints** are variables used to estimate the number (or weight)
  8 |     of individuals in each zone. Also referred to by the longer name of
  9 |     **constraint variable**. We tend to use the term **linking variable**
 10 |     in this book because they *link* aggregate and individual level datasets.
 11 | 
 12 | -   **Combinatorial optimisation** is an approach to spatial
 13 |     microsimulation that generates spatial microdata by randomly
 14 |     selecting individuals from a survey dataset and measuring the fit
 15 |     between the simulated output and the constraint variables. If the
 16 |     fit improves after any particular change, the change is kept.
 17 |     Williamson (2007) provides a practical user manual. @Harland2013
 18 |     provides a practical demonstration of the method implemented in
 19 |     the Java-based Flexible Modelling Framework (FMF).
 20 | 
 21 | -   **Data frame**: a type of object (formally referred to as a class)
 22 |     in R, data frames are square tables composed of rows and columns of
 23 |     information. As with many things in R, the best way to understand
 24 |     data frames is to create them and experiment. The following creates
 25 |     a data frame with two variables: name and height:
 26 | 
 27 |     Note that each new variable is entered using the command `c()` this is
 28 |     how R creates objects with the *vector* data class, a one
 29 |     dimensional matrix — and that text data must be entered in quote
 30 |     marks.
 31 | 
 32 | -   **Deterministic reweighting** is an approach to generating spatial
 33 |     microdata that allocates fractional weights to individuals based on
 34 |     how representative they are of the target area. It differs from
 35 |     combinatorial optimisation approaches in that it requires no random
 36 |     numbers. The most frequently used method of deterministic
 37 |     reweighting is IPF.
 38 | 
 39 | -   **For loops** are instructions that tell the computer to run a
 40 |     certain set of command repeatedly. `for(i in 1:9) print(i)`, for
 41 |     example will print the value of i 9 times. The best way to further
 42 |     understand for loops is to try them out.
 43 | 
 44 | -   **Iteration**: one instance of a process that is repeated many times
 45 |     until a predefined end point, often within an *algorithm*.
 46 | 
 47 | -   **Iterative proportional fitting** (IPF): an iterative process
 48 |     implemented in mathematics and algorithms to find the maximum
 49 |     likelihood of cells that are constrained by multiple sets of
 50 |     marginal totals. To make this abstract definition even more
 51 |     confusing, there are multiple terms which refer to the process,
 52 |     including ‘biproportional fitting’ and ‘matrix raking’. In plain
 53 |     English, IPF in the context of spatial microsimulation can be
 54 |     defined as *a statistical technique for allocating weights to
 55 |     individuals depending on how representative they are of different
 56 |     zones*. IPF is a type of deterministic reweighting, meaning that
 57 |     random numbers are not needed to generate the result and that the
 58 |     output weights are real (not integer) numbers.
 59 |     
 60 | -   A **linking variable** is a variable that is shared between individual and 
 61 |     aggregate level data. Common examples include age and sex (the linking variables
 62 |     used in the SimpleWorld example): questions that are commonly asked in all
 63 |     kinds of survey. Linking variables are also referred to as 
 64 |     **constraint variables** because they *constrain* the weights for individuals
 65 |     in each zone.
 66 |     
 67 | -   **Microdata** is the non-geographical individual level dataset from which
 68 |     synthetic **spatial microdata** are usually derived. This sample of the
 69 |     target population has also been labelled as the 'seed'
 70 |     (e.g. Barthelemy and Toint, 2012) and simply the 'survey data' in the academic
 71 |     literature. The term microdata is used in this book for its brevity and
 72 |     semantic link to spatial microdata.
 73 |     
 74 | -   The **population base** roughly equivalent to the 'target population',
 75 |     used by statisticians to describe the population about whom they wish to
 76 |     draw conclusions based on a 'sample population'.
 77 |     The sample population, is the group of individuals who
 78 |     we have individual level data for.
 79 |     In aggregate level data, the **population base** is the
 80 |     complete set of individuals represented by the counts.
 81 |     A common example is the variable "Hours worked":
 82 |     only people aged 16 to 74 are generally thought of as working, so, if there is
 83 |     no `NA` (no answer) category, the population base is not the same as the total
 84 |     population of an area. A common problem faced by people using spatial microsimulation
 85 |     methods is incompatibility between aggregate constraints that use different     
 86 |     population bases.
 87 |     
 88 | -   **Population synthesis** is the process of converting input data (generally
 89 |     non-geographical **microda** and geographically aggregated 
 90 |     **constraint variables**) into **spatial microdata**.
 91 |     
 92 | -   **Spatial microdata** is the name given to individual level data allocated
 93 |     to mutually exclusive geographical zones (see Figure 5.1 above). Spatial
 94 |     microdata is useful because it provides multi level information, about the
 95 |     relationships between individuals and where they live. However, due to the
 96 |     high costs of large surveys and restrictions on the release of geocoded
 97 |     individual level data, spatial microdata is rarely available to researchers.
 98 |     To overcome this issue, most spatial microsimulation research employs methods
 99 |     of **population synthesis** to generate representative spatial microdata.
100 |     
101 | -    **Spatial microsimulation** is the name given to an approach to modelling that
102 |     comprises a series of techniques that
103 |     generate, analyse and model individual level data allocated to small
104 |     administrative zones. Spatial microsimulation is an approach for
105 |     understanding processes that operate on individual and geographical levels.
106 |     
107 | -    A **weight matrix** is a 2 dimensional array that links non-spatial
108 |     *microdata* to geographical zones. Each row in the weight matrix represents
109 |     an individual and each column represents a zone. Thus, in R notation,
110 |     the weight matrix `w` has dimensions of `nrow(ind)` rows by `nrow(cons)`
111 |     where `ind` and `cons` are the microdata and constraints respectively.
112 |     The value of `w[i,j]` represents the extent to which individual `i` is
113 |     representative of zone `j`. `sum(w)` is the total population of the study area.
114 |     The weight matrix is an efficient way of storing spatial microdata because
115 |     it does not require a new row for every additional individual in the study
116 |     area. For a weight matrix to be converted into spatial microdata, all the
117 |     values of the wieghts must be integers. The conversion of an integer weight
118 |     matrix into an integer weight matrix is known as *integerisation*.
119 | 
120 | ```{r, echo=FALSE}
121 | # Any words that are highlighted in the main text can go in here
122 | ```
123 | 


--------------------------------------------------------------------------------
/15-references.Rmd:
--------------------------------------------------------------------------------
1 | # Bibliography {#bibliography}
2 | 
3 | ```{r, echo=FALSE}
4 | # How to create: %s/\n/\r\r/gc in vim
5 | ```
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: smsbook
 2 | Title: Spatial Microsimulation with R: a book
 3 | Version: 0.0.1
 4 | Authors@R: c(person("Robin", "Lovelace", role = c("aut", "cre"), email = "rob00x@gmail.com"), 
 5 |     person("Morgane", "Dumont", role = c("aut")))
 6 | Imports:
 7 |     bookdown,
 8 |     knitr,
 9 |     rmarkdown,
10 |     png,
11 |     ggmap,
12 |     GREGWT,
13 |     dplyr,
14 |     ipfp,
15 |     rgeos,
16 |     mipfp,
17 |     rgdal,
18 |     gridExtra,
19 |     maptools,
20 |     jpeg,
21 |     tmap,
22 |     tidyr,
23 |     mlogit,
24 |     simPop,
25 |     reticulate
26 | Remotes: 
27 |     emunozh/GREGWT
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Robin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | html:
 2 | 	Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::gitbook", clean = FALSE)'
 3 | 	cp -fvr _main.utf8.md _book/main.md
 4 | 	# cp -fvr css/style.css _book/
 5 | 	# cp -fvr images _book/
 6 | 
 7 | build:
 8 | 	make html
 9 | 	Rscript -e 'browseURL("_book/index.html")'
10 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/NAMESPACE


--------------------------------------------------------------------------------
/NetLogo/NLv01.R:
--------------------------------------------------------------------------------
  1 | require(RNetLogo)
  2 | require(dplyr)
  3 | require(ggplot2)
  4 | require(extrafont)
  5 | loadfonts()
  6 | 
  7 | ############
  8 | ## section 1
  9 | ############
 10 | NLStart("C:\\Program Files (x86)\\NetLogo 5.1.0")
 11 | NLLoadModel("C:/Users/sfos0247/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo")
 12 | 
 13 | NLStart("/usr/local/netlogo-5.1.0")
 14 | NLLoadModel("/home/mz/Documents/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo")
 15 | 
 16 | NLCommand("setup")
 17 | NLReport("ticks")
 18 | NLCommand("go")
 19 | NLReport("ticks")
 20 | NLDoCommand(50,"go")
 21 | NLReport("ticks")
 22 | 
 23 | test <- NLDoReport(10,"go", c(" ticks",
 24 |                               "count inhabitants with [zone = 1]",
 25 |                               "count inhabitants with [zone = 2]",
 26 |                               "count inhabitants with [zone = 3]"), 
 27 |                    as.data.frame = TRUE)
 28 | head(test)
 29 | current.state <- NLGetAgentSet(c("who","income", "zone"), 
 30 |                                "inhabitants")
 31 | boxplot(current.state$income~current.state$zone,
 32 |         xlab="Zone", ylab="Income", main=paste("Income distribution after",
 33 |                                                NLReport("ticks"), "ticks" ))
 34 | 
 35 | NLDoCommandWhile (" (ticks <= 100) " , "go")
 36 | NLReport("ticks")
 37 | 
 38 | NLQuit()
 39 | 
 40 | ############
 41 | ## section 2
 42 | ############
 43 | 
 44 | NLStart("/usr/local/netlogo-5.1.0", gui=FALSE)
 45 | NLLoadModel("/home/mz/Documents/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo")
 46 | 
 47 | SimpleWorld <- function(time.stable = 100) {
 48 |   NLCommand("setup")
 49 |   NLDoCommandWhile (paste("(count inhabitants with [zone = 1] < 33) and",
 50 |                           "(count inhabitants with [zone = 2] < 33) and",
 51 |                           "(count inhabitants with [zone = 3] < 33) and",
 52 |                           "(time-stable <= ", time.stable, ") ") , "go")
 53 |   NLGetAgentSet("history", "inhabitants")
 54 | }
 55 | 
 56 | NLCommand("random-seed 42")
 57 | inhabitant.histories <- SimpleWorld(50)
 58 | NLQuit()
 59 | 
 60 | ## analysis 
 61 | dim(inhabitant.histories)[1]*dim(inhabitant.histories)[2]
 62 | history <- as.data.frame(matrix(unlist(inhabitant.histories), ncol=4, byrow=TRUE))
 63 | colnames(history) <- c("id", "tick","income", "zone")
 64 | 
 65 | require(dplyr)
 66 | changes <- group_by(history, id) %>%
 67 |   mutate( change=c(0,diff(zone))) %>%
 68 |   summarize(start.income = income[1],
 69 |             end.income = tail(income,1),
 70 |             income.change = end.income - start.income,
 71 |             zone.changes = sum(change != 0)
 72 |             )
 73 | 
 74 | par(oma=c(0.5,4,0.5,4), mar=c(4,1,1,2), mfrow=c(1,3), xpd=FALSE)
 75 | plot(zone.changes ~ start.income , data=changes, axes=FALSE, xlab="")
 76 | axis(1)
 77 | axis(2)
 78 | mtext("Starting income", 1, line=3)
 79 | mtext( "Number of zone changes", 2, line=3)
 80 | abline(lm(zone.changes ~ start.income , data=changes))
 81 | plot(zone.changes ~ end.income , data=changes, axes=FALSE, xlab="")
 82 | axis(1)
 83 | mtext( "Final income", 1, line=3)
 84 | abline(lm(zone.changes ~ end.income , data=changes))
 85 | plot(zone.changes ~ income.change , data=changes, xlab="", axes=FALSE)
 86 | axis(1)
 87 | axis(4)
 88 | mtext( "Net income gain", 1, line=3)
 89 | mtext( "Number of zone changes", 4, line=3)
 90 | abline(lm(zone.changes ~ income.change , data=changes))
 91 | 
 92 | 
 93 | ## section 3
 94 | require(RNetLogo)
 95 | require(dplyr)
 96 | require(ggplot2)
 97 | require(extrafonts)
 98 | loadfonts()
 99 | 
100 | NLStart("C:\\Program Files (x86)\\NetLogo 5.1.0", gui=FALSE)
101 | NLLoadModel("C:/Users/sfos0247/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo")
102 | setwd("C:/Users/sfos0247/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo")
103 | 
104 | 
105 | SimpleWorld <- function(angle.of.vision=360, distance.of.vision=10, time.stable = 200) {
106 |   NLCommand (paste("set average-bribeability", 100))
107 |   NLCommand (paste("set stdev-bribeability", 0))
108 |   NLCommand (paste("set angle-of-vision", angle.of.vision))
109 |   NLCommand (paste("set distance-of-vision", distance.of.vision))
110 |   NLCommand("setup")
111 |   NLDoCommandWhile (paste("(time-stable <= ", time.stable, ") ") , "go")
112 |   c(NLReport(c("ticks - time-stable", nrow(unique(NLGetAgentSet( "zone", "inhabitants"))))))
113 | }
114 | 
115 | MultipleSimulations <- function (reps=1, a.o.v = 360, d.o.v = c(5,10)){
116 |   p.s <- expand.grid(rep = seq(1, reps), a.o.v = a.o.v, d.o.v = d.o.v) 
117 |   reslut.list <- lapply(as.list(1:nrow(p.s)), function(i) 
118 |     setNames(cbind(p.s[i,], SimpleWorld(p.s[i,2], p.s[i,3])), c("rep", "a.o.v", "d.o.v", "ticks", "zones")))
119 |   do.call(rbind, reslut.list)
120 | }
121 | 
122 | MultipleSimulations(2,360,c(5,10))
123 | 
124 | # results.df <-   MultipleSimulations2(20,seq(60,360,30),seq(1,10))
125 | #save(results.df, file="multiSimRun.R")
126 | load("multiSimRun.R")
127 | head(results.df)
128 | 
129 | 
130 | 
131 | # summaries for plots
132 | av.ticks2 <- results.df %>%
133 |   group_by(a.o.v, d.o.v) %>%
134 |  # filter(zones == 1) %>%
135 |   summarize(mean.ticks = mean(ticks, na.rm=TRUE))
136 | 
137 | zones <- results.df %>%
138 |   group_by(a.o.v, d.o.v, zones) %>%
139 |   summarize(height = n()/20) %>%
140 |   group_by(a.o.v, d.o.v) %>%
141 |   arrange(desc(zones)) %>%
142 |   mutate(shift=-0.5 + cumsum(height)-height + height/2)
143 | 
144 | 
145 | ## fig 10
146 | png(file="zones.png", height=450, width=750, family="Garamond") 
147 | 
148 | ggplot(zones, aes(a.o.v,y=d.o.v + shift, fill=as.factor(zones), height=height)) + 
149 |   geom_tile(col="white") + xlab('aov') + ylab('dov') +
150 |   scale_fill_manual(values=c("gray30", "gray50",
151 |                              "gray80"), name="No of zones") +
152 |   theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), 
153 |                      panel.grid.minor = element_blank(),axis.text=element_text(size=16),
154 |                      title=element_text(size=16),
155 |                      legend.text=element_text(size=12),
156 |                      legend.key = element_rect(colour = "white")) +
157 |   scale_x_continuous(breaks=seq(60,360,30) ) +
158 |   scale_y_continuous(breaks=seq(1,10,1) ) +
159 |   xlab("Angle of vision") +
160 |   ylab("Distance of vision") +
161 |   guides(fill = guide_legend(override.aes = list(colour = NULL))) 
162 | dev.off()
163 | 
164 | ##fig 11
165 | png(file="ticks.png", height=450, width=750, family="Garamond") 
166 | ggplot(av.ticks, aes(a.o.v,y=d.o.v , fill=mean.ticks)) + 
167 |   geom_tile(col="white")+
168 |   theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), 
169 |                      panel.grid.minor = element_blank(), axis.text=element_text(size=16),
170 |                      title=element_text(size=16),
171 |                      legend.text=element_text(size=12)) +
172 |   scale_x_continuous(breaks=seq(60,360,30) ) +
173 |   scale_y_continuous(breaks=seq(1,10) ) +
174 |   scale_fill_gradient(name = "Tick count", trans = "log",low="gray80",high="gray20", 
175 |                       breaks=c(50,100, 500, 1000, 5000)) +
176 |   xlab("Angle of vision") +
177 |   ylab("Distance of vision")
178 | dev.off()
179 | 
180 | 


--------------------------------------------------------------------------------
/NetLogo/agents.csv:
--------------------------------------------------------------------------------
 1 | 1,1,59,"m",2868
 2 | 2,1,54,"m",2474
 3 | 3,1,35,"m",2231
 4 | 3,1,35,"m",2231
 5 | 3,1,35,"m",2231
 6 | 4,1,73,"f",3152
 7 | 5,1,49,"f",2473
 8 | 5,1,49,"f",2473
 9 | 5,1,49,"f",2473
10 | 5,1,49,"f",2473
11 | 4,1,73,"f",3152
12 | 3,1,35,"m",2231
13 | 1,2,59,"m",2868
14 | 2,2,54,"m",2474
15 | 4,2,73,"f",3152
16 | 4,2,73,"f",3152
17 | 4,2,73,"f",3152
18 | 4,2,73,"f",3152
19 | 5,2,49,"f",2473
20 | 1,2,59,"m",2868
21 | 3,2,35,"m",2231
22 | 2,2,54,"m",2474
23 | 3,3,35,"m",2231
24 | 4,3,73,"f",3152
25 | 4,3,73,"f",3152
26 | 5,3,49,"f",2473
27 | 5,3,49,"f",2473
28 | 5,3,49,"f",2473
29 | 5,3,49,"f",2473
30 | 5,3,49,"f",2473
31 | 4,3,73,"f",3152
32 | 1,3,59,"m",2868
33 | 3,3,35,"m",2231
34 | 


--------------------------------------------------------------------------------
/NetLogo/ints_df.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/NetLogo/ints_df.RData


--------------------------------------------------------------------------------
/NetLogo/multiSimRun.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/NetLogo/multiSimRun.R


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output:
 3 |   md_document:
 4 |     variant: markdown_github
 5 | ---
 6 | 
 7 | <!-- README.md is generated from README.Rmd. Please edit that file - rmarkdown::render('README.Rmd', output_format = 'md_document', output_file = 'README.md') -->
 8 | 
 9 | ```{r, echo = FALSE}
10 | knitr::opts_chunk$set(
11 |   collapse = TRUE,
12 |   comment = "#>",
13 |   fig.path = "figures/"
14 | )
15 | is_online = curl::has_internet()
16 | ```
17 | 
18 | # Spatial Microsimulation with R
19 | 
20 | [![DOI](https://zenodo.org/badge/20914/Robinlovelace/spatial-microsim-book.svg)](https://zenodo.org/badge/latestdoi/20914/Robinlovelace/spatial-microsim-book) [![Build Status](https://travis-ci.org/Robinlovelace/spatial-microsim-book.svg?branch=master)](https://travis-ci.org/Robinlovelace/spatial-microsim-book)
21 | 
22 | This repository hosts the code and data used in *Spatial Microsimulation with R*, a book by [Robin Lovelace](http://robinlovelace.net/) and [Morgane Dumont](https://directory.unamur.be/staff/modumont), (with chapters [11](http://spatial-microsim-book.robinlovelace.net/tresis.html) and [12](http://spatial-microsim-book.robinlovelace.net/abm) contributed by [Richard Ellison](http://sydney.edu.au/business/staff/richard.ellison) and [Maja Založnik](https://www.oxfordmartin.ox.ac.uk/people/565) respectively).
23 | 
24 | The book is now published and is available from [CRC Press](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548).
25 | 
26 | We hope to continue to update the book as methods evolve and we encourage contributions on any part of the book, including:
27 | 
28 | - Improvements to the text, e.g. clarifying unclear sentences, fixing typos (see guidance from [Yihui Xie](https://yihui.name/en/2013/06/fix-typo-in-documentation/)).
29 | - Changes to the code, e.g. to do things in a more efficient way.
30 | - Suggestions on content (see the project's [issue tracker](https://github.com/Robinlovelace/spatial-microsim-book/issues)).
31 | 
32 | The latest version of the book can be viewed at
33 | [the book's homepage at spatial-microsim-book.robinlovelace.net](http://spatial-microsim-book.robinlovelace.net/).
34 | Anyone can contribute to this book [here](https://github.com/Robinlovelace/spatial-microsim-book).
35 | 
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- README.md is generated from README.Rmd. Please edit that file - rmarkdown::render('README.Rmd', output_format = 'md_document', output_file = 'README.md') -->
 2 | Spatial Microsimulation with R
 3 | ==============================
 4 | 
 5 | [![DOI](https://zenodo.org/badge/20914/Robinlovelace/spatial-microsim-book.svg)](https://zenodo.org/badge/latestdoi/20914/Robinlovelace/spatial-microsim-book) [![Build Status](https://travis-ci.org/Robinlovelace/spatial-microsim-book.svg?branch=master)](https://travis-ci.org/Robinlovelace/spatial-microsim-book)
 6 | 
 7 | This repository hosts the code and data used in *Spatial Microsimulation with R*, a book by [Robin Lovelace](http://robinlovelace.net/) and [Morgane Dumont](https://directory.unamur.be/staff/modumont), (with chapter [10](http://spatial-microsim-book.robinlovelace.net/ha.html) contributed by [Johan Barthélemy](https://smart.uow.edu.au/people/UOW192467.html), chapter [11](http://spatial-microsim-book.robinlovelace.net/tresis.html) contributed by [Richard Ellison](http://sydney.edu.au/business/staff/richard.ellison) and [David Hensher](http://sydney.edu.au/business/staff/david.hensher)  and chapter [12](http://spatial-microsim-book.robinlovelace.net/abm) contributed by [Maja Založnik](https://www.oxfordmartin.ox.ac.uk/people/565)).
 8 | 
 9 | The book is now published and is available from [CRC Press](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548).
10 | 
11 | We hope to continue to update the book as methods evolve and we encourage contributions on any part of the book, including:
12 | 
13 | -   Improvements to the text, e.g. clarifying unclear sentences, fixing typos (see guidance from [Yihui Xie](https://yihui.name/en/2013/06/fix-typo-in-documentation/)).
14 | -   Changes to the code, e.g. to do things in a more efficient way.
15 | -   Suggestions on content (see the project's [issue tracker](https://github.com/Robinlovelace/spatial-microsim-book/issues)).
16 | 
17 | The latest version of the book can be viewed at [the book's homepage at spatial-microsim-book.robinlovelace.net](http://spatial-microsim-book.robinlovelace.net/). Anyone can contribute to this book [here](https://github.com/Robinlovelace/spatial-microsim-book).
18 | 


--------------------------------------------------------------------------------
/additional-material/13-additional.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Additional tools and techniques"
 3 | layout: default
 4 | bibliography: bibliography.bib
 5 | ---
 6 | 
 7 | # Additional tools and techniques {#additional}
 8 | 
 9 | ```{r, echo=FALSE}
10 | 
11 | ```
12 | 
13 | ## R packages for spatial microsimulation
14 | 
15 | Most of the code presented so far --- 
16 | with the exception of plotting commands based on **ggplot2**,
17 | data manipulation tools from **dplyr** and some packages offering
18 | niche functionality such as **ipfp** and **RNetLogo** --- has been implemented in R's base functions.
19 | This is deliberate: base R offers robustness of
20 | code and minimisation of installation dependencies.
21 | There are often dozens of ways of doing one thing
22 | in R, and a bewildering number of packages offering to help.
23 | The way that will be understood by the greatest number
24 | of people and work on the maximum number of computers
25 | is often preferable, however, unless there is a
26 | clear advantage to using additional packages. 
27 | The behaviour of contributed packages
28 | (excepting the `r-recommended` [packages](see http://cran.r-project.org/bin/linux/debian/README))
29 | may change unexpectedly,
30 | whereas core R functions are likely to remain stable over many decades to come.
31 | 
32 | With this caveat in mind, let's begin our tour of
33 | some contributed R that are of use for
34 | spatial microsimulation.
35 | 
36 | ### **GREGWT**
37 | 
38 | ### **sms**
39 | 
40 | ### **multilevel**
41 | 
42 | ## The Flexible Modelling Framework (FMF)
43 | 
44 | ## Allocation of home-work locations
45 | 
46 | A question that arises after spatial microdata have been allocated to
47 | geographical zones is: where exactly in the zone do the people inhabit?
48 | For some applications this may not matter but for others, such as disaster
49 | management and
50 | or transport planning, the precise location of an individual
51 | is important [@Smith2014; @Hanaoka2014].
52 | 
53 | ```{r, echo=FALSE}
54 | # TODO: add reference for above
55 | ```
56 | 
57 | In this section we will see how spatial microdata can be allocated first
58 | to 'urban areas' (to prevent people being placed in the sea, for example)
59 | and then to individual buildings based on freely available Open Street Map
60 | data. This process also makes sense from the perspective of visualisation:
61 | the typical choropleth map outputs of spatial microsimulation models
62 | over-represent low density rural areas and under-represent dense
63 | urban areas in terms of visual 'real-estate'. Plotting attributes only
64 | in the buildings where they occur can help overcome this issue.
65 | 
66 | ```{r, echo=FALSE}
67 | # TODO: add figure from O'Brien's online map
68 | ```
69 | 
70 | ## Spatial interaction modelling
71 | 
72 | In this example we will demonstrate a method for
73 | evaluating the distributional impact of a new pathway
74 | in an urban setting. 
75 | 
76 | ```{r}
77 | ## Advanced applications in agent-based modelling
78 | ```
79 | 
80 | 


--------------------------------------------------------------------------------
/additional-material/input-data-mipfp.csv:
--------------------------------------------------------------------------------
 1 | Health,Age Group, White, Mixed, Asian and Asian British, Black or Black British, Chinese or Other ethnic group,Total
 2 |  Very good health, 0 to 15,67315,4151,7283,3844,987,83580
 3 | , 16 to 24,35665,1403,4387,1706,540,43701
 4 | , 25 to 34,34106,948,4919,1865,753,42591
 5 | , 35 to 44,32047,516,2759,1650,445,37417
 6 | , 45 to 54,26311,317,1184,980,171,28963
 7 | , 55 to 64,16263,88,525,293,92,17261
 8 | , 65 to 74,7897,38,143,89,18,8185
 9 | , 75 and over,3170,12,55,31,5,3273
10 | ,Sum,222774,7473,21255,10458,3011,264971
11 |  Good health, 0 to 15,14044,1047,2709,1046,321,19167
12 | , 16 to 24,16018,687,2146,710,260,19821
13 | , 25 to 34,21479,748,3444,1065,457,27193
14 | , 35 to 44,25922,433,2950,1190,407,30902
15 | , 45 to 54,28949,345,1982,1044,253,32573
16 | , 55 to 64,26483,130,1178,355,117,28263
17 | , 65 to 74,19747,76,474,221,67,20585
18 | , 75 and over,12868,42,201,110,19,13240
19 | ,Sum,165510,3508,15084,5741,1901,191744
20 |  Fair health, 0 to 15,1699,153,323,144,41,2360
21 | , 16 to 24,2438,115,280,120,33,2986
22 | , 25 to 34,3602,140,563,171,78,4554
23 | , 35 to 44,6017,161,760,323,112,7373
24 | , 45 to 54,9288,137,897,384,112,10818
25 | , 55 to 64,12566,80,787,191,92,13716
26 | , 65 to 74,13323,53,540,252,57,14225
27 | , 75 and over,17776,55,359,223,35,18448
28 | ,Sum,66709,894,4509,1808,560,74480
29 |  Bad health, 0 to 15,366,36,72,30,9,513
30 | , 16 to 24,443,15,55,18,4,535
31 | , 25 to 34,917,25,105,54,36,1137
32 | , 35 to 44,1951,45,191,90,43,2320
33 | , 45 to 54,3408,66,249,124,53,3900
34 | , 55 to 64,4554,27,355,76,23,5035
35 | , 65 to 74,4123,17,235,92,18,4485
36 | , 75 and over,6307,25,178,99,24,6633
37 | ,Sum,22069,256,1440,583,210,24558
38 |  Very bad health, 0 to 15,148,5,24,13,4,194
39 | , 16 to 24,132,5,19,10,4,170
40 | , 25 to 34,223,12,34,11,8,288
41 | , 35 to 44,520,7,50,27,11,615
42 | , 45 to 54,970,20,81,44,11,1126
43 | , 55 to 64,1250,18,82,23,14,1387
44 | , 65 to 74,1201,4,57,24,11,1297
45 | , 75 and over,1971,7,77,44,8,2107
46 | ,Sum,6415,78,424,196,71,7184
47 | Total, 0 to 15,83572,5392,10411,5077,1362,105814
48 | , 16 to 24,54696,2225,6887,2564,841,67213
49 | , 25 to 34,60327,1873,9065,3166,1332,75763
50 | , 35 to 44,66457,1162,6710,3280,1018,78627
51 | , 45 to 54,68926,885,4393,2576,600,77380
52 | , 55 to 64,61116,343,2927,938,338,65662
53 | , 65 to 74,46291,188,1449,678,171,48777
54 | , 75 and over,42092,141,870,507,91,43701
55 | ,Sum,483477,12209,42712,18786,5753,562937
56 | 


--------------------------------------------------------------------------------
/additional-material/input-data-mipfp.csv.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/additional-material/input-data-mipfp.csv.ods


--------------------------------------------------------------------------------
/additional-material/reformatting-mipfp-example.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## Load data
 4 | 
 5 | ```{r}
 6 | ctabs <- read.csv("additional-material/input-data-mipfp.csv")
 7 | ctabs
 8 | ```
 9 | 
10 | 
11 | 
12 | ```{r, echo=FALSE}
13 | pkgs <- c("readxl")
14 | lapply(pkgs, library, character.only = T)
15 | 
16 | df <- read_excel("/tmp/Case Study 3 Data.xlsx", sheet = 2)
17 | df <- df[1:48,]
18 | head(df)
19 | 
20 | n1 <- as.character(df[2,3:7]) # first set of names
21 | 
22 | 
23 | # remove sums
24 | d <- df
25 | sel <- df[,2] == "Sum"
26 | d <- d[!sel,]
27 | d <- d[-c(1:2)]
28 | d <- d[1:5]
29 | d <- d[-c(1, nrow(d)),]
30 | d <- d[-1,]
31 | 
32 | # convert to multi-dimensional array
33 | dnames <- c("age", "eth", "health")
34 | names
35 | dims <- c(8, 5, 5)
36 | ```
37 | 
38 | ```{r, eval=FALSE}
39 | # dvec <- as.numeric(d)
40 | # apply(dvec, MARGIN = 2, FUN = class)
41 | dvec <- apply(X = d, MARGIN = 2, FUN = as.numeric)
42 | 
43 | seed <- array(data = dvec, dim = dims, dimnames = dnames)
44 | 
45 | # marginal
46 | 
47 | # read-in the data
48 | 
49 | # for zone 1 
50 | target.data <- list(c1, c2, c3) # each c is marginal for 1 constraint and 1 zone
51 | Ipfp() # for zone 1
52 | 
53 | result <- as.list(1:348)
54 | result[[1]] #
55 | for(i in 1:nrow(constraints)){
56 |   target.data <- list(c1 = ...,
57 |     )
58 |   result[[i]] <- Ipfp()
59 | }
60 | 
61 | ```
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/backup-code/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/backup-code/.Rapp.history


--------------------------------------------------------------------------------
/backup-code/CakeMap-dataknut.R:
--------------------------------------------------------------------------------
 1 | ############################################
 2 | #### From the spatial-microsim-book project  
 3 | #### https://github.com/Robinlovelace/spatial-microsim-book
 4 | ############################################
 5 | 
 6 | # Additions from Ben Anderson (@dataknut)
 7 | 
 8 | # Loading the data: Ensure R is in the right working directory 
 9 | ind <- read.csv("data/CakeMap/ind.csv")
10 | cons <- read.csv("data/CakeMap/cons.csv")
11 | 
12 | # Take a quick look at the data
13 | head(ind)
14 | head(cons)
15 | 
16 | # load constraints separately - normally this would be first stage
17 | con1 <- cons[1:12] # load the age/sex constraint
18 | con2 <- cons[13:14] # load the car/no car constraint
19 | con3 <- cons[15:24] # socio-economic class
20 | 
21 | cat_labs <- names(cons) # category names, from correct from cons.R
22 | 
23 | # set-up aggregate values - column for each category
24 | source("data/CakeMap/categorise.R") # this script must be customised to input data
25 | 
26 | # check constraint totals - should be true
27 | sum(ind_cat[,1:ncol(con1)]) == nrow(ind) # is the number in each category correct?
28 | sum(ind_cat[,ncol(con1)+1:ncol(con2)]) == nrow(ind) 
29 | 
30 | # create 2D weight matrix (individuals, areas)
31 | weights <- array(NA, dim=c(nrow(ind),nrow(cons))) 
32 | 
33 | # convert survey data into aggregates to compare with census (3D matix)
34 | ind_agg <- matrix(colSums(ind_cat), nrow(cons), ncol(cons), byrow = T)
35 | ind_agg[1:5,1:10] # look at what we've created - n. individuals replicated throughout
36 | 
37 | ############## The IPF part #############
38 | 
39 | library(ipfp)
40 | cons <- apply(cons, 2, as.numeric)
41 | ind_catt <- t(ind_cat)
42 | # set up initial vector as a load of 1s
43 | x0 <- rep(1, nrow(ind))
44 | # you can use x0 as a way to start from the original survey weights
45 | # as it just has to be a numeric initial vector (length ncol)
46 | # this might be useful if you have a small number of constraints but
47 | # if you have many the effect of the IPF will tend to drown them out
48 | 
49 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, 20))
50 | 
51 | ### Convert back to aggregates for testing
52 | for (i in 1:nrow(cons)){ # convert con1 weights back into aggregates
53 |   ind_agg[i,]   <- colSums(ind_cat * weights[,i])}
54 | 
55 | # test results for first row (not necessary for model)
56 | # you could iterate over this to test each zone
57 | ind_agg[1,1:15] - cons[1,1:15] # should be zero for final column - last constraint
58 | # which should remind us that IPF works to an order - so the last constraint is
59 | # fitted perfectly. This might matter if you think other constraints should be fitted perfectly...
60 | cor(as.numeric(ind_agg), as.numeric(cons)) # fit between contraints and estimate
61 | 
62 | # at this point RL wants to integrise to create a spatial microdataset of whole 'units'
63 | # But we don't have to - for many applications we may want to keep all the survey units (people or households)
64 | # with their fractional weights to avoid losing information. It also helps if we're interested in distributional
65 | # statistics for each area.
66 | 
67 | # to do this simply join the weights matrix back on to the original individual data
68 | # we have to assume R has kept them in the correct order!
69 | 
70 | # just do a column bind
71 | ind_final <- cbind(ind,weights)
72 | View(ind_final)
73 | # so now we have a weight for each individual for each zone and from here on we can do 
74 | # a range of weighted statistics or collapse to tables by zone etc etc
75 | # Would be a good idea at this point to rename the zone columns to their actual geography.


--------------------------------------------------------------------------------
/backup-code/tests.R:
--------------------------------------------------------------------------------
 1 | con_age <- read.csv("data/SimpleWorld/age.csv")
 2 | con_sex <- read.csv("data/SimpleWorld/sex.csv")
 3 | ind <- read.csv("data/SimpleWorld/ind.csv")
 4 | 
 5 | (ind$age <- cut(ind$age, breaks = c(0, 49, 120), labels = c("a0_49", "a50+")))
 6 | 
 7 | names(con_age) <- levels(ind$age) # rename aggregate variables
 8 | h
 9 | # make the number of constraints larger - to see benefit of parallel processing
10 | cons <- cbind(con_age, con_sex) 
11 | 
12 | cat_age <- model.matrix(~ ind$age - 1)
13 | cat_sex <- model.matrix(~ ind$sex - 1)[, c(2, 1)]
14 | (ind_cat <- cbind(cat_age, cat_sex)) # combine flat representations of the data
15 | 
16 | cons <- apply(cons, 2, as.numeric) # convert matrix to numeric data type
17 | # cons <- cons[sample(3, size = 500, replace = T),]
18 | 
19 | weights <- matrix(data = NA, nrow = nrow(ind), ncol = nrow(cons))
20 | 
21 | ind_catt <- t(ind_cat)
22 | x0 <- rep(1, nrow(ind))
23 | 
24 | weights <- apply(cons, MARGIN = 1, FUN =  function(x) ipfp(as.numeric(x), ind_catt, x0, tol = 1, maxit = 20))
25 | 
26 | # Tests of parallel implementation of ipf
27 | library(parallel)
28 | 
29 | detectCores()
30 | cl <- makeCluster(getOption("cl.cores", 4))
31 | cl <- clusterExport(cl, varlist = c("ipfp", "ind_cat", "ind"))
32 | 
33 | parApply(cons, MARGIN = 1, FUN =  function(x) ipfp(as.numeric(x), t(ind_cat), x0 = rep(1,nrow(ind))))
34 | 
35 | 
36 | xArray <- array(NA, dim = c(3,3))
37 | xMatrix <- matrix(NA, nrow = 3, ncol = 3)
38 | identical(xArray, xMatrix)
39 | 
40 | for(i in 1:ncol(weights)){
41 |   weights[,i] <- ipfp(cons[i,], ind_catt, x0, maxit = 20)
42 | }
43 | 
44 | # Demonstration of incorrect ipfp
45 | weights1 <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, tol = 1, maxit = 20))
46 | weights2 <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, 20))
47 |   
48 |   


--------------------------------------------------------------------------------
/build.R:
--------------------------------------------------------------------------------
 1 | # TODO for the book project overall
 2 | # Individual chapters on eprints
 3 | # Implement regex to make bibliography happen in CRC press style
 4 | # Add urls to all the references and packages
 5 | # Propensity to cycle
 6 | # IPF in R/loglin/mipfp/GREGWT
 7 | # Reference UrbanSim
 8 | # Chapter summaries at outset?
 9 | # Mention of collaborative project early on
10 | 
11 | pkgs <- c("knitr", "rmarkdown", "png", "ggmap", "dplyr", "ipfp", "rgeos", "mipfp", "rgdal", "gridExtra", "maptools", "jpeg", "tmap", "tidyr", "mlogit", "simPop")
12 | wpacks <- pkgs %in% installed.packages()
13 | install.packages(pkgs[!wpacks])
14 | 
15 | # file.copy(from = "~/Documents/smr.bib", to = "bibliography.bib", overwrite = T)
16 | 
17 | # # View the order chapters will be knitted (see code/book-functions.R)
18 | # # chap_ord <- c(7,16,10,5,12,2,8,4,13,14,15,11,1,3,6,9)
19 | # cfiles <- list.files(pattern = "*.Rmd$")
20 | # # cfiles <- cfiles[chap_ord] # chapter order
21 | # cfiles
22 | # 
23 | # # Add book header
24 | # book_header = readLines(textConnection('---
25 | # title: "Spatial microsimulation with R"
26 | # output:
27 | # \ \ pdf_document:
28 | # \ \ \ \ fig_caption: yes
29 | # \ \ \ \ highlight: monochrome
30 | # \ \ \ \ includes: null
31 | # \ \ \ \ keep_tex: yes
32 | # \ \ \ \ number_sections: yes
33 | # \ \ \ \ toc: yes
34 | # bibliography: bibliography.bib
35 | # csl: elsevier-harvard.csl
36 | # layout: default
37 | # ---'))
38 | # 
39 | # source("code/book-functions.R")
40 | # # file.remove("book.Rmd")
41 | # # Rmd_bind(book_header = book_header) 
42 | # Rmd_bind_mod(book_header = book_header)
43 | # 
44 | # # Packages needed to build the book
45 | # # install.packages("knitr", "rmarkdown", "png", "ggmap", "dplyr", "ipfp") 
46 | # library(knitr)
47 | # library(rmarkdown)
48 | # 
49 | # # Build the book:
50 | # render("book.Rmd", output_format = "pdf_document")
51 | 
52 | 
53 | # Build the CRC-formated version - requires local files
54 | # need to build the .tex manually for references to compile
55 | # source("code/build-CRC-version.R")
56 | # # Make latex-specific changes automated
57 | # booktex <- readLines("spatial-microsim-book.tex")
58 | # booktex[grep("\\{Glossary\\}", booktex)]
59 | # booktex <- gsub(pattern = "chapter\\{Glossary\\}", "chapter*\\{Glossary\\}\n\\\\addcontentsline{toc}{chapter}{Glossary}
60 | # ", booktex)
61 | # writeLines(booktex, "spatial-microsim-book.tex")
62 | # in case index does not build - run again!
63 | # system("pdflatex --interaction=nonstopmode  spatial-microsim-book.tex")
64 | 
65 | # For website build see gh-pages version
66 | 
67 | # Files to move to gh-pages branch
68 | # file.remove("book.Rmd")
69 | 
70 | # Remove latex-specific document links for website
71 | # cfiles <- list.files("/tmp", pattern = "*.Rmd", full.names = T)
72 | # for(i in cfiles){
73 | #   text <- readLines(i)
74 | #   sel <- grepl("\\(\\#", text)
75 | #   text <- text[!sel]
76 | #   writeLines(text, con = i)
77 | # }
78 | 
79 | # # # regex with R - convert book ready for regexxing
80 | # d <- readLines("introduction.Rmd")
81 | # sel <- grep("@", d)
82 | # s <- d[sel]
83 | # gsub(".+?(?=a)", replacement = "", s, perl = T) # test of greedy matching
84 | # 
85 | # # select quotes
86 | # 
87 | # s <- grep(" @", d)
88 | # s <- grep("\\ @|\\[@", d)
89 | # d[s]
90 | 
91 | # backup
92 | # system("cp -rv ~/Dropbox/spatial-microsim-book /media/robin/data/backups/")
93 | 
94 | # command-line tools for dif tracking
95 | # latexdiff book-b4-comments.tex book.tex > dif.tex
96 | # pdflatex dif.tex 
97 | 
98 | 


--------------------------------------------------------------------------------
/cache-data-prep.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/cache-data-prep.RData


--------------------------------------------------------------------------------
/cache-smsim-in-R.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/cache-smsim-in-R.RData


--------------------------------------------------------------------------------
/code/CakeMap.R:
--------------------------------------------------------------------------------
 1 | ############################################
 2 | #### From the spatial-microsim-book project  
 3 | #### https://github.com/Robinlovelace/spatial-microsim-book
 4 | ############################################
 5 | 
 6 | library(dplyr) # load dplyr package for joining datasets
 7 | 
 8 | # Loading the data: Ensure R is in the right working directory 
 9 | ind <- read.csv("data/CakeMap/ind.csv")
10 | cons <- read.csv("data/CakeMap/cons.csv")
11 | 
12 | # Take a quick look at the data
13 | head(ind)
14 | head(cons)
15 | 
16 | # Load constraints separately - normally this would be first stage
17 | con1 <- cons[1:12] # load the age/sex constraint
18 | con2 <- cons[13:14] # load the car/no car constraint
19 | con3 <- cons[15:24] # socio-economic class
20 | 
21 | cat_labs <- names(cons) # category names, from correct from cons.R
22 | 
23 | # Set-up aggregate values - column for each category
24 | source("data/CakeMap/categorise.R") # script to create binary dummy variables
25 | # Check constraint totals - should be true
26 | sum(ind_cat[,1:ncol(con1)]) == nrow(ind) # is the number in each category correct?
27 | sum(ind_cat[,ncol(con1)+1:ncol(con2)]) == nrow(ind) 
28 | 
29 | # Create 2D weight matrix (individuals, areas)
30 | weights <- array(NA, dim=c(nrow(ind),nrow(cons))) 
31 | 
32 | # Convert survey data into aggregates to compare with census 
33 | ind_agg <- matrix(colSums(ind_cat), nrow(cons), ncol(cons), byrow = T)
34 | 
35 | # Iterative proportional fitting (IPF) stage
36 | library(ipfp) # load the ipfp package -may need install.packages("ipfp")
37 | cons <- apply(cons, 2, as.numeric) # convert the constraints to 'numeric'
38 | ind_catt <- t(ind_cat) # transpose the dummy variables for ipfp
39 | x0 <- rep(1, nrow(ind)) # set the initial weights
40 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = 20))
41 | 
42 | ### Convert back to aggregates
43 | ind_agg <- t(apply(weights, 2, function(x) colSums(x * ind_cat)))
44 | 
45 | # test results for first row (not necessary for model)
46 | ind_agg[1,1:15] - cons[1,1:15] # should be zero or close to zero
47 | cor(as.numeric(ind_agg), as.numeric(cons)) # fit between contraints and estimate
48 | 
49 | # Integerise if integer results are required - open code/CakeMapInt.R to see how
50 | source("code/CakeMapInts.R")
51 | 
52 | # Benchmarking
53 | # library(microbenchmark)
54 | # microbenchmark(source("CakeMap.R"), times = 1) # 2.05 s
55 | # # How long does this operation take in pure R?
56 | # old <- setwd("~/repos/smsim-course/")
57 | # microbenchmark(source("cMap.R"), times = 1) # 76.72 s
58 | # setwd(old)


--------------------------------------------------------------------------------
/code/CakeMapInts.R:
--------------------------------------------------------------------------------
 1 | # Script 'integerising' CakeMap weights, generating, exploring spatial microdata
 2 | 
 3 | source("code/functions.R") # functions for spatial microsimulation, inc. int_trs
 4 | 
 5 | ints <- unlist(apply(weights, 2, function(x) int_expand_vector(int_trs(x)))) # generate integerised result
 6 | ints_df <- data.frame(id = ints, zone = rep(1:nrow(cons), round(colSums(weights))))
 7 | ind$id <- 1:nrow(ind) # assign each individual an id
 8 | 
 9 | # Create spatial microdata, by joining the ids with associated attributes
10 | ints_df <- inner_join(ints_df, ind) # commented out to increase run-times
11 | 
12 | # Exploration of individual-level variability in class by zone
13 | class(ints_df$NSSEC8) # what class is the class variable?
14 | ints_df$NSSEC <- as.numeric(ints_df$NSSEC8) # convert to numeric class
15 | ints_df$NSSEC[ ints_df$NSSEC > 10] <- NA # dealing with NA data
16 | sd_nssec <- aggregate(ints_df$NSSEC, by = list(ints_df$zone), FUN = sd, na.rm = TRUE)
17 | which.max(sd_nssec$x) # which zone has the greatest variability in NS-SEC groups
18 | 


--------------------------------------------------------------------------------
/code/CakeMapMipfpCon1Convert.R:
--------------------------------------------------------------------------------
 1 | # Transform con1 into an 3D-array : con1_convert
 2 | 
 3 | names <- c(list(rownames(cons)),dimnames(weight_init)[c(4,6)])
 4 | con1_convert <- array(NA, dim=c(nrow(cons),2,6), dimnames = names)
 5 | 
 6 | for(zone in rownames(cons)){
 7 |   for (sex in dimnames(con1_convert)$Sex){
 8 |     for (age in dimnames(con1_convert)$ageband4){
 9 |       con1_convert[zone,sex,age] <- con1[zone,paste(sex,age,sep="")]
10 |     }
11 |   }
12 | }
13 | 
14 | # check margins per zone: 
15 | table(rowSums(con1)==apply(con1_convert, 1, sum))


--------------------------------------------------------------------------------
/code/CakeMapMipfpData.R:
--------------------------------------------------------------------------------
 1 | # Loading the CakeMap data 
 2 | # Changing the categories'names to have the same names in ind and cons
 3 | 
 4 | 
 5 | # Loading the data: Ensure R is in the right working directory 
 6 | ind <- read.csv("data/CakeMap/ind.csv")
 7 | cons <- read.csv("data/CakeMap/cons.csv")
 8 | 
 9 | # Load constraints separately - normally this would be first stage
10 | con1 <- cons[1:12] # load the age/sex constraint
11 | con2 <- cons[13:14] # load the car/no car constraint
12 | con3 <- cons[15:24] # socio-economic class
13 | 
14 | # Rename the categories in "ind" to correspond to the one of cons
15 | ind$Car <- sapply(ind$Car, FUN = switch, "Car", "NoCar")
16 | ind$Sex <- sapply(ind$Sex, FUN = switch, "m", "f")
17 | ind$NSSEC8 <- as.factor(ind$NSSEC8)
18 | levels(ind$NSSEC8) <- colnames(con3)
19 | ind$ageband4 <- gsub(pattern = "-", replacement = "_", x = ind$ageband4)
20 | 
21 | 


--------------------------------------------------------------------------------
/code/CakeMapPlot.R:
--------------------------------------------------------------------------------
 1 | # Script for plotting the output of cMap
 2 | # Must be run after cMap.R and TRS-integerisation.R
 3 | 
 4 | # Load the prerequisite packages - you may need to install these 
 5 | # E.g install.packages("ggplot2")
 6 | x <- c("dplyr", "rgeos", "rgdal", "ggmap")
 7 | lapply(x, library, character.only = T)
 8 | 
 9 | # save geographic names to the cakes output
10 | geonames <- read.csv("data/CakeMap/cars-raw.csv")
11 | head(geonames)
12 | geonames <- as.character(geonames[3:126,2])
13 | # NB: cakes.R must be run first
14 | source("code/CakeMap.R")
15 | cakes = as_data_frame(ind_agg)
16 | geocakes <- bind_cols(id = geonames, cakes)
17 | head(geocakes)
18 | 
19 | # load the geographic data
20 | load("data/CakeMap/wards.RData")
21 | wards <- spTransform(wards, CRSobj=CRS("+init=epsg:4326")) # transform CRS
22 | 
23 | # prepare to join the geographic data with cake data
24 | names(wards)
25 | names(geocakes)[1] <- names(wards)[1] <- "id" # rename geocakes' geonames for join
26 | head(geocakes)
27 | head(wards@data)
28 | head(left_join(wards@data, geocakes))
29 | wards@data <- left_join(wards@data, geocakes)
30 | 
31 | # fortify the data for ggplot2
32 | # you'll need to install.packages("rgeos") if not already installed
33 | wardsF <- fortify(wards, region="id")
34 | head(wardsF)
35 | names(wards)
36 | wardsF <- inner_join(wardsF, wards@data, by = "id")
37 | head(wardsF) # see average cake consumption added
38 | 
39 | # set up bounding box
40 | bb <- b <- bbox(wards)
41 | bb[1, ] <- (b[1, ] - mean(b[1, ])) * 1.05 + mean(b[1, ])
42 | bb[2, ] <- (b[2, ] - mean(b[2, ])) * 1.05 + mean(b[2, ])
43 | b[1, ] <- (b[1, ] - mean(b[1, ])) * 1.7 + mean(b[1, ])
44 | b[2, ] <- (b[2, ] - mean(b[2, ])) * 1.7 + mean(b[2, ])
45 | 
46 | # map the result!
47 | ggplot() + 
48 |   geom_polygon(data=wardsF, aes(long, lat, group=group, fill=avCake), color = "black", alpha=0.2)
49 | baseMap <- get_map(location=bb, maptype="terrain")
50 | # baseMap <- get_map(location=b, zoom=10, source='osm')
51 | #  baseMap <- get_map(location=b, source='stamen')
52 | 
53 | # ggmap(baseMap) + 
54 | ggplot() +
55 |   geom_polygon(data=wardsF, aes(long, lat, group=group, fill=avCake), alpha=0.5) + 
56 |   geom_path(data=wardsF, aes(long, lat, group=group), color="black", alpha=0.3) +
57 |   scale_fill_continuous(low = "green", high = "red", name = "Simulated\naverage\nfreqency\nof cake\nconsumption\n(times/wk)") + xlim(bb[1,]) + ylim(bb[2,]) + 
58 |   theme_minimal()
59 | ## ggsave("figures/CakeMap.png")
60 | 
61 | # analysis
62 | imd <- read.csv("data/CakeMap/inc-est-2001.csv")
63 | summary(imd$NAME %in% wards$NAME)
64 | summary(pmatch(wards$NAME, imd$NAME))
65 | which(imd$NAME %in% wards$NAME) %in% pmatch(wards$NAME, imd$NAME)
66 | head(inner_join(wards@data, imd))
67 | wards@data <- inner_join(wards@data, imd)
68 | plot(wards$Avinc, wards$avCake)
69 | cor(wards$Avinc, wards$avCake, use='complete.obs')
70 | 
71 | # individual level analysis
72 | levels(ind$NCakes)
73 | ind$NCakes <- factor(ind$NCakes, c("<0.5", levels(ind$NCakes)[c(1,2,3,4)]))
74 | levels(ind$NCakes)
75 | ind$avnumcakes <- 1
76 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[1]] <- 0.1
77 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[2]] <- 0.5
78 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[3]] <- 1.5
79 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[4]] <- 4
80 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[5]] <- 8
81 | summary(ind$avnumcakes)
82 | ind$NSSEC8 <- as.character(ind$NSSEC8)
83 | aggregate(ind$avnumcakes ~ ind$NSSEC8, FUN=mean)
84 | summary(ind$avnumcakes)
85 | mean(ind$avnumcakes[ ind$NSSEC8 == "1.1" | ind$NSSEC8 == "1.2" | ind$NSSEC8 == "2" ])
86 | mean(ind$avnumcakes[ ind$NSSEC8 == "8" | ind$NSSEC8 == "7" | ind$NSSEC8 == "6" ])
87 | 
88 | (hm  <-  table(ind$NCakes, ind$NSSEC8))
89 | heatmap(hm)
90 | heatmap(hm, Rowv=NA, Colv=NA)
91 | 
92 | library(gplots) # for another kind of heat map
93 | 
94 | heatmap.2(hm, Rowv=NA, Colv=NA, xlab = "Socio-economic class", ylab = "Frequency of cake consumption")
95 | hmm <- melt(hm)
96 | ggplot(hmm) + geom_tile(aes(Var1, as.character(Var2), fill = value)) +
97 |   scale_fill_continuous(low="green", high="red")


--------------------------------------------------------------------------------
/code/CakeMapTimeAnalysis.R:
--------------------------------------------------------------------------------
 1 | # comparison of times to perform ipfp and mipfp
 2 | 
 3 | Neval = 1
 4 | 
 5 | times <- array(0,dim=c(2,Neval))
 6 | 
 7 | for (i in 1:Neval){
 8 |   print(i)
 9 |   times[1,i] <- system.time(apply(cons_prop, 1, function(x) ipfp(x, ind_catt, x0, tol = 1e-12)))[1]
10 | 
11 |   times[2,i] <- system.time(Ipfp( weight_init, descript, target, print = FALSE, tol=1e-12))[1]
12 | }
13 | 
14 | # with a problem of this size, ipfp seems to be better
15 | # we want to see how the times evolves if the individuals available are more
16 | 
17 | ind_catt2 <- cbind(ind_catt, ind_catt, ind_catt)
18 | x02 <- cbind(x0,x0,x0)
19 | weight_init2 <- weight_init * 3
20 | 
21 | 
22 | minInd <- 200
23 | maxInd <- 2000
24 | pas <- 100
25 | 
26 | times2 <- array(0,dim=c(3,ceiling((maxInd-minInd)/pas)+1))
27 | i=1
28 | 
29 | for (indiv in seq(minInd,maxInd,pas)){
30 |   print(indiv)
31 |   times2[1,i] <- indiv
32 |   times2[2,i] <- system.time(apply(cons_prop, 1, function(x) ipfp(x, ind_catt2[,1:indiv], x02[1:indiv], tol = 1e-12)))[1]
33 |   
34 |   times2[3,i] <- system.time(Ipfp( weight_init*indiv/916, descript, target, print = FALSE, tol=1e-12))[1]
35 |   i=i+1
36 | }
37 | 
38 | plot(times2[1,],times2[2,],pch=c(1),ylim=c(min(times2[2,])-1,max(times2[2,])+1), main= "Time to generate a weight matrix \n with tol=1e-12 ",xlab="Number of invididuals in the microdata",ylab="Time")
39 | par(new=TRUE)
40 | plot(times2[1,],times2[3,],pch=c(3),ylim=c(min(times2[2,])-1,max(times2[2,])+1),axes=F,ann=F)
41 | legend("topleft",c("ipfp","mipfp"),pch = c(1,3))
42 | 


--------------------------------------------------------------------------------
/code/CakeMapWithMipfp.R:
--------------------------------------------------------------------------------
  1 | library(mipfp)
  2 | 
  3 | 
  4 | source("code/CakeMapMipfpData.R")
  5 | 
  6 | 
  7 | # Initial weight matrix
  8 | weight_init_onezone <- table(ind)
  9 | # Check order of the variables
 10 | dimnames(weight_init_onezone) 
 11 | 
 12 | #########################################
 13 | # All zones together
 14 | #########################################
 15 | # Repeat the initial matrix n_zone times
 16 | init_cells <- rep(weight_init_onezone, each = nrow(cons))
 17 | 
 18 | # Define the names
 19 | names <- c(list(rownames(cons)),as.list(dimnames(weight_init_onezone)))
 20 | 
 21 | # Structure the data
 22 | weight_init <- array(init_cells, dim = 
 23 |                        c(nrow(cons),dim(weight_init_onezone)),
 24 |                      dimnames = names)
 25 | 
 26 | ###########################################
 27 | # Check constraint's totals
 28 | ###########################################
 29 | 
 30 | # check the totals per zone
 31 | table(rowSums(con2)==rowSums(con1))
 32 | table(rowSums(con3)==rowSums(con1))
 33 | table(rowSums(con2)==rowSums(con3))
 34 | 
 35 | # 1 and 2 are the same, 3 is different
 36 | 
 37 | # Observe the global total
 38 | sum(con1)
 39 | sum(con2)
 40 | sum(con3)
 41 | # 1 and 2 are the same, 3 is different
 42 | 
 43 | ########################################
 44 | # convert the constraint 1 to be readable for mipfp
 45 | ########################################
 46 | 
 47 | source("code/CakeMapMipfpCon1Convert.R")
 48 | 
 49 | ########################################
 50 | # To correctly perform the Ipfp process, 
 51 | # we have to use coherent constraints, 
 52 | # with same marginals per zone.
 53 | # Since NSSEC contains less individuals and this could be due by the 
 54 | # possibily of having "NA", we consider con1 and con2 as example.
 55 | con3_prop <- con3*rowSums(con2)/rowSums(con3)
 56 | 
 57 | # Check the new marginals per zone
 58 | table(rowSums(con2)==rowSums(con3_prop))
 59 | # This is now ok
 60 | 
 61 | # Perform the Ipfp function
 62 | target <- list(con1_convert,as.matrix(con2),as.matrix(con3_prop))
 63 | descript <- list(c(1,4,6), c(1,3),c(1,5))
 64 | 
 65 | 
 66 | 
 67 | weight_mipfp <- Ipfp( weight_init, descript, target, 
 68 |                       print = TRUE,tol=1e-12)
 69 | ##########################################
 70 | # Quality of this IPF
 71 | ##########################################
 72 | # con1
 73 | max(abs(con1_convert-apply(weight_mipfp$x.hat,c(1,4,6),sum)))
 74 | 
 75 | # con2
 76 | max(abs(con2-apply(weight_mipfp$x.hat,c(1,3),sum)))
 77 | 
 78 | # con3
 79 | max(abs(con3_prop-apply(weight_mipfp$x.hat,c(1,5),sum)))
 80 | 
 81 | # con3 is well fitted for all zones, but con1 and 
 82 | # con2 have some municipalities with big errors
 83 | 
 84 | 
 85 | ################################################
 86 | # Convert ipfp result for comparison 
 87 | ################################################
 88 | # first execute the CakeMap.R
 89 | weight_ipfp <- array(0, dim=dim(weight_init), dimnames = dimnames(weight_init))
 90 | 
 91 | for (indiv in 1:nrow(ind)){
 92 |   temp <- weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] 
 93 |   weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] <- temp +weights[indiv,] 
 94 | }
 95 | 
 96 | # compare result;
 97 | 
 98 | which.max(abs(weight_ipfp-weight_mipfp$x.hat),ind)
 99 | sum(weight_ipfp)
100 | sum(weight_mipfp$x.hat)
101 | 
102 | plot(weight_ipfp,weight_mipfp$x.hat)
103 | max(apply(weight_mipfp$x.hat,1,sum)-apply(weight_ipfp,1,sum))
104 | # The sum of ipfp is the one of the third constraint... 
105 | # This is due to the order of the constraints
106 | # Indeed, if we change the order of the constraints and re-calculate the ipfp:
107 | 
108 | cons <- cons[,c(15:24,1:14)]
109 | ind_cat <- ind_cat[,c(15:24,1:14)]
110 | 
111 | 
112 | # Iterative proportional fitting (IPF) stage
113 | library(ipfp) # load the ipfp package -may need install.packages("ipfp")
114 | cons <- apply(cons, 2, as.numeric) # convert the constraints to 'numeric'
115 | ind_catt <- t(ind_cat) # transpose the dummy variables for ipfp
116 | x0 <- rep(1, nrow(ind)) # set the initial weights
117 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = 20))
118 | 
119 | # And the sum is now the same and the results also
120 | sum(weights)
121 | 
122 | 
123 | ################################################
124 | # Convert ipfp result for comparison 
125 | ################################################
126 | 
127 | weight_ipfp <- array(0, dim=dim(weight_init), dimnames = dimnames(weight_init))
128 | 
129 | for (indiv in 1:nrow(ind)){
130 |   temp <- weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] 
131 |   weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] <- temp +weights[indiv,] 
132 | }
133 | 
134 | 
135 | sum(weight_ipfp-weight_mipfp$x.hat)
136 | 


--------------------------------------------------------------------------------
/code/ConvertIpfpWeights.R:
--------------------------------------------------------------------------------
1 | # first execute the CakeMap.R
2 | weight_ipfp <- array(0, dim=dim(weight_init), dimnames = dimnames(weight_init))
3 | 
4 | for (indiv in 1:nrow(ind)){
5 |   temp <- weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] 
6 |   weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] <- temp +weights[indiv,] 
7 | }


--------------------------------------------------------------------------------
/code/SimpleWorld.R:
--------------------------------------------------------------------------------
 1 | # SimpleWorld.R
 2 | # Raw code needed to run the SimpleWorld example
 3 | ind <- read.csv("data/SimpleWorld/ind.csv") 
 4 | class(ind) # verify the data type of the object
 5 | ind # print the individual-level data
 6 | 
 7 | con_age <- read.csv("data/SimpleWorld/age.csv")
 8 | con_sex <- read.csv("data/SimpleWorld/sex.csv")
 9 | 
10 | # Convert age into a categorical variable with user-chosen labels
11 | (ind$age <- cut(ind$age, breaks = c(0, 49, 120), labels = c("a0_49", "a50+")))
12 | names(con_age) <- levels(ind$age) # rename aggregate variables
13 | cons <- cbind(con_age, con_sex)
14 | 
15 | cat_age <- model.matrix(~ ind$age - 1)
16 | cat_sex <- model.matrix(~ ind$sex - 1)[, c(2, 1)]
17 | (ind_cat <- cbind(cat_age, cat_sex)) # combine flat representations of the data
18 | 
19 | colSums(ind_cat) # view the aggregated version of ind
20 | ind_agg <- colSums(ind_cat) # save the result
21 | 
22 | rbind(cons[1,], ind_agg) # test compatibility between ind_agg and cons objects
23 | 
24 | weights <- matrix(data = NA, nrow = nrow(ind), ncol = nrow(cons))
25 | dim(weights) # the dimension of the weight matrix: 5 rows by 3 columns
26 | 
27 | library(ipfp) # load the ipfp library after: install.packages("ipfp")
28 | cons <- apply(cons, 2, as.numeric) # convert matrix to numeric data type
29 | ind_catt <- t(ind_cat) # save transposed version of ind_cat
30 | x0 <- rep(1, nrow(ind)) # save the initial vector
31 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = 20))
32 | 
33 | source("code/functions.R")
34 | 
35 | set.seed(0)
36 | int_pp(x = c(0.333, 0.667, 3))
37 | int_pp(x = c(1.333, 1.333, 1.333))
38 | 
39 | 
40 | # Method 2: using apply
41 | ints <- apply(weights, 2, int_trs) # generate integerised result
42 | indices <- NULL
43 | ints <- for(i in 1:ncol(ints)){
44 |   indices <- c(indices, int_expand_vector(ints[,i]))
45 | }
46 | 
47 | ints_df <- data.frame(id = indices, zone = rep(1:nrow(cons), colSums(weights)))
48 | 
49 | ind_full <- read.csv("data/SimpleWorld/ind-full.csv")
50 | library(dplyr) # install.packages(dplyr) if not installed
51 | ints_df <- inner_join(ints_df, ind_full)
52 | 
53 | 
54 | ## ------------------------------------------------------------------------
55 | ints_df[ints_df$zone == 2, ]
56 | 


--------------------------------------------------------------------------------
/code/bbuild.R:
--------------------------------------------------------------------------------
 1 | # code to build the book
 2 | b <- readLines("book.tex") # read in tex file
 3 | 
 4 | bgn <- grep("Introduction", b)[1]
 5 | b <- b[bgn:(length(b) - 2)]
 6 | b <- gsub("\\\\section\\{", "\\\\chapter\\{", x = b)
 7 | b <- gsub("subsection\\{", "section\\{", x = b)
 8 | 
 9 | # Take a subset of b (to test where fails occur)
10 | # b <- b[1:400]
11 | 
12 | # Additional material to include
13 | # a <- "\\usepackage{hyperref}"
14 | 
15 | # kp <- 9 # where do package descriptions end?
16 | # kf <- grep("mainmatter", k) # frontmatter up to and including here
17 | kf <- readLines("frontmatter/pream.tex")
18 | 
19 | # kp <- k[1:kp]
20 | # kf <- k[(length(kp) + 1):kf] # frontmatter
21 | 
22 | # kp <- c(kp, a)
23 | 
24 | k <- c(kf, b, c("\\printindex", "\\end{document}"))
25 | 
26 | 
27 | biblilines <- grep("section\\*\\{Bibliography\\}|\\{section\\}\\{Bibliography\\}", x = k)
28 | kbl <- k[biblilines]
29 | kbl <- gsub(pattern = "section", replacement = "chapter", x = kbl)
30 | 
31 | # glosline <-  grep("chapter\\{Glossary\\}", x = k)
32 | # k[(glosline - 1):(glosline + 2)]
33 | # k[glosline + 1] <- "\\addcontentsline{toc}{chapter}{Glossary}"
34 | 
35 | k[biblilines] <- kbl
36 | 
37 | # add part 1
38 | p1 <- "\\part{Introducing spatial microsimulation with R}"
39 | p2 <- "\\part{Generating spatial microdata}"
40 | p3 <- "\\part{Modelling spatial microdata}"
41 | 
42 | ps <- grep(pattern = "\\chapter\\{Intro|\\chapter\\{Data|\\chapter\\{The T", x = k)
43 | 
44 | k <- c(
45 |   k[1:(ps[1] -1)],
46 |   p1, k[ps[1]:(ps[2] -1)],
47 |   p2, k[ps[2]:(ps[3] -1)],
48 |   p3, k[ps[3]:length(k)]
49 |   )
50 | 
51 | 
52 | writeLines(k, con = "spatial-microsim-book.tex")
53 | 
54 | # out-takes - code not used
55 | # b[1] <- gsub("documentclass\\[\\]\\{article\\}", "documentclass\\[krantz1,ChapterTOCs\\]\\{krantz\\}", x = b[1]) # change 1st line
56 | # gsub("\\\\section\\{", "\\\\chapter\\{", x = b[108]) # test gsub


--------------------------------------------------------------------------------
/code/book-functions.R:
--------------------------------------------------------------------------------
 1 | # Book building functions
 2 | Rmd_bind <- function(dir = ".",
 3 |   book_header = readLines(textConnection("---\ntitle: 'Title'\n---")),
 4 |   chap_ord = NULL){
 5 |   old <- setwd(dir)
 6 |   if(length(grep("book.Rmd$", list.files())) > 0){
 7 |     warning("book.Rmd already exists")
 8 |   }
 9 |   cfiles <- list.files(pattern = "*.Rmd", )
10 |   cfiles <- cfiles[-grep("book", cfiles)]
11 |   if(!is.null(chap_ord)) cfiles <- cfiles[chap_ord] # chapter order
12 |   write(book_header, file = "book.Rmd", )
13 |   ttext <- NULL
14 |   for(i in 1:length(cfiles)){
15 |     text <- readLines(cfiles[i])
16 |     hspan <- grep("---", text)
17 |     text <- text[-c(hspan[1]:hspan[2])]
18 |     write(text, sep = "\n", file = "book.Rmd", append = T)
19 |   }
20 |   #     render("book.Rmd", output_format = "pdf_document")
21 |   setwd(old)
22 | }
23 | 
24 | Rmd_bind_mod <- function(dir = ".",
25 |   book_header = readLines(textConnection("---\ntitle: 'Title'\n---")),
26 |   chap_ord = NULL){
27 |   old <- setwd(dir)
28 |   if(length(grep("book.Rmd", list.files())) > 0){
29 |     warning("book.Rmd already exists")
30 |   }
31 |   cfiles <- list.files(pattern = "*.Rmd$", )
32 |   cfiles <- cfiles[-grep("book", cfiles)]
33 |   if(!is.null(chap_ord)) cfiles <- cfiles[chap_ord] # chapter order
34 |   write(book_header, file = "book.Rmd", )
35 |   ttext <- NULL
36 |   for(i in 1:length(cfiles)){
37 |     text <- readLines(cfiles[i])
38 |     hspan <- grep("---", text)
39 |     text <- text[-c(hspan[1]:hspan[2])]
40 |     refs <- grepl("# References", text) # Remove references section from each chapter
41 |     text <- text[!refs]
42 |     write(text, sep = "\n", file = "book.Rmd", append = T)
43 |   }
44 |   #     render("book.Rmd", output_format = "pdf_document")
45 |   setwd(old)
46 | }
47 | 


--------------------------------------------------------------------------------
/code/build-CRC-version.R:
--------------------------------------------------------------------------------
 1 | # Build the CRC-formatted version of the book
 2 | source("code/bbuild.R")
 3 | system("pdflatex --interaction=nonstopmode  spatial-microsim-book.tex")
 4 | system("pdflatex --interaction=nonstopmode  spatial-microsim-book.tex")
 5 | 
 6 | # tidy up the mess
 7 | tt <- list.files(pattern = "*.aux|*.toc|*.log|*.lot|*.gz|*.idx|*.ilg|*.ind|*.ggmap", )
 8 | 
 9 | for(i in tt){
10 |   system(paste('rm', i))
11 | }
12 | 


--------------------------------------------------------------------------------
/code/functions.R:
--------------------------------------------------------------------------------
 1 | # Functions useful for spatial microsimulation
 2 | # What others would be useful?
 3 | # Could any of these be improved?
 4 | # Let me know if so - rob00x@gmail.com
 5 | 
 6 | # 'Proportional probabilities' (PP) method of integerisation
 7 | # (see http://www.sciencedirect.com/science/article/pii/S0198971513000240):
 8 | int_pp <- function(x){
 9 |   xv <- as.vector(x)
10 |   xint <- rep(0, length(x))
11 |   xs <- sample(length(x), size = round(sum(x)), prob = x, replace = T)
12 |   xsumm <- summary(as.factor(xs))
13 |   topup <- as.numeric(names(xsumm))
14 |   xint[topup] <- xsumm
15 |   dim(xint) <- dim(x)
16 |   xint
17 | }
18 | 
19 | # 'Truncate, replicate, sample' (TRS) method of integerisation
20 | # (see http://www.sciencedirect.com/science/article/pii/S0198971513000240):
21 | int_trs <- function(x){
22 |   xv <- as.vector(x)
23 |   xint <- floor(xv)
24 |   r <- xv - xint
25 |   def <- round(sum(r)) # the deficit population
26 |   # the weights be 'topped up' (+ 1 applied)
27 |   topup <- sample(length(x), size = def, prob = r)
28 |   xint[topup] <- xint[topup] + 1
29 |   dim(xint) <- dim(x)
30 |   dimnames(xint) <- dimnames(x)
31 |   xint
32 | }
33 | 
34 | int_expand_vector <- function(x){
35 |   index <- 1:length(x)
36 |   rep(index, round(x))
37 | }
38 | 
39 | int_expand_array <- function(x){
40 |   # Transform the array into a dataframe
41 |   count_data <- as.data.frame.table(x)
42 |   # Store the indices of categories for the final population
43 |   indices <- rep(1:nrow(count_data), count_data$Freq)
44 |   # Create the final individuals
45 |   ind_data <- count_data[indices,]
46 |   ind_data
47 | }
48 | 
49 | 
50 | 
51 | # Total absolute error
52 | tae <- function(observed, simulated){
53 |   obs_vec <- as.numeric(observed)
54 |   sim_vec <- as.numeric(simulated)
55 |   sum(abs(obs_vec - sim_vec))
56 | }
57 | 
58 | # Number of times each unique matrix row appears
59 | umat_count <- function(x) {
60 |   xp <- apply(x, 1, paste0, collapse = "") # "pasted" version of constraints
61 |   freq <- table(xp) # frequency of occurence of each individual
62 |   xu <- unique(x) # save only unique individuals
63 |   rns <- as.integer(row.names(xu)) # save the row names of unique values of ind
64 |   xpu <- xp[rns]
65 |   o <- order(xpu, decreasing = TRUE) # the order of the output (to rectify table)
66 |   cbind(xu, data.frame(ind_num = freq[o], rns = rns)) # output
67 | }
68 | 
69 | # Generates list of outputs - requires dplyr
70 | umat_count_dplyr <- function(x){
71 |   x$p <- apply(x, 1, paste0, collapse = "")
72 |   up <- data.frame(p = unique(x$p)) # unique values in order they appeared
73 |   y <- dplyr::count(x, p) # fast freq table
74 |   umat <- inner_join(up, y) # quite fast
75 |   umat <- join(umat, x, match = "first")
76 |   list(u = umat, p = x$p) # return unique individuals and attributes
77 | }
78 | 


--------------------------------------------------------------------------------
/code/gregwt.R:
--------------------------------------------------------------------------------
 1 | #library(devtools)
 2 | devtools::install_github("emunozh/GREGWT")
 3 | library('GREGWT')
 4 | 
 5 | # Load the data from csv files stored under ../data
 6 | age = read.csv("../data/SimpleWorld/age.csv")
 7 | sex = read.csv("../data/SimpleWorld/sex.csv")
 8 | ind = read.csv("../data/SimpleWorld/ind-full.csv")
 9 | # Make categories for age
10 | ind$age <- cut(ind$age, breaks=c(0,49,Inf), labels = c("a0.49", "a.50."))
11 | # Add initial weights to survey
12 | ind$w <- vector(mode = "numeric", length=dim(ind)[1]) + 1
13 | 
14 | # prepare simulation data using GREGWT::prepareData
15 | data_in <- prepareData(cbind(age, sex), ind, census_area_id = F, breaks = c(2))
16 | 
17 | # prepare a data.frame to store the result
18 | fweights <- NULL
19 | Result <- as.data.frame(matrix(NA, ncol=3, nrow=dim(age)[1]))
20 | names(Result) <- c("area", "income", "cap.income")
21 | 
22 | # now we loop through all areas
23 | for(area in seq(dim(age)[1])){
24 |     gregwt = GREGWT(data_in, area_code = area)
25 |     fw <- gregwt$final_weights
26 |     fweights <- c(fweights, fw)
27 |     ## Estimate income
28 |     sum.income <- sum(fw * ind$income)
29 |     cap.income <- sum(fw * ind$income / sum(fw))
30 |     Result[area,] <- c(area, sum.income, cap.income)
31 | }
32 | 
33 | fweights <- matrix(fweights, nrow = nrow(ind))
34 | fweights
35 | 
36 | #ind_agg <- t(apply(fweights, 2, function(x) colSums(x * ind_cat)))
37 | #ind_agg
38 | 


--------------------------------------------------------------------------------
/code/ipfpMultiDim.R:
--------------------------------------------------------------------------------
  1 | require(cmm)
  2 | 
  3 | Ipfp <- function(seed, target.list, target.data, print = FALSE, iter = 1000, 
  4 |                  tol = 1e-10, na.target = FALSE) {
  5 |   # Update an array using the iterative proportional fitting procedure.
  6 |   #  
  7 |   # Args:
  8 |   #   seed: The initial multi-dimensional array to be updated. Each cell must
  9 |   #         be greater than 0.
 10 |   #   target.list: A list of the target margins provided in target.data. Each
 11 |   #                component of the list is an array whose cells indicates
 12 |   #                 which dimension the corresponding margin relates to.
 13 |   #   target.data: A list containing the data of the target margins. Each
 14 |   #                component of the list is an array storing a margin.
 15 |   #                The list order must follow the one defined in target.list. 
 16 |   #                Note that the cells of the arrays must be greater than 0.
 17 |   #   print: Verbose parameter: if TRUE prints the current iteration number
 18 |   #          and the value of the stopping criterion.
 19 |   #   iter: The maximum number of iteration allowed; must be greater than 0.
 20 |   #   tol: If the maximum absolute difference between two iteration is lower
 21 |   #        than the value specified by tol, then ipfp has reached convergence
 22 |   #        (stopping criterion); must be greater than 0.
 23 |   #   na.target: if set to TRUE, allows the targets to have NA cells. In that
 24 |   #              case the margins consistency is not checked.
 25 |   #
 26 |   # Returns: An array whose margins fit the target margins and of the same
 27 |   #          dimension as seed.
 28 |   
 29 |   # checking if NA in target cells if na.target is set to FALSE
 30 |   if (is.na(min(sapply(target.data, min))) & !na.target)  {
 31 |     stop('Error: NA values present in the margins - use na.target = TRUE!')
 32 |   }
 33 |   
 34 |   # checking non negativity condition for the seed and the target
 35 |   if (min(sapply(target.data, min), na.rm = na.target) < 0 | min(seed) < 0) {
 36 |     stop('Error: Target and Seed cells must be non-negative!')    
 37 |   }  
 38 |   
 39 |   # checking the strict positiviy of tol and iter
 40 |   if (iter < 1 | tol <= 0) {
 41 |     stop('Error: tol and iter must be strictly positive!')
 42 |   }
 43 |   
 44 |   # checking the margins consistency if no missing values in the targets
 45 |   check.margins <- TRUE
 46 |   
 47 |   if (na.target == FALSE) {
 48 |     if (length(target.data) > 1) {
 49 |       for (m in 2:length(target.data)) {      
 50 |         if (abs(sum(target.data[[m-1]]) - sum(target.data[[m]])) > 1e-10) {
 51 |           check.margins <- FALSE
 52 |           warning('Target not consistents - shifting to probabilities!
 53 |                   Check input data!\n')
 54 |           break
 55 |         }      
 56 |       }
 57 |     }
 58 |   } else {
 59 |     if (print) {
 60 |       cat('NOTE: Missing values present in target cells. ')
 61 |       cat('Margins consistency not checked!\n')  
 62 |     }        
 63 |   }
 64 |   
 65 |   # if margins are not consistent, shifting from frequencies to probabilities
 66 |   if (!check.margins) {
 67 |     seed <- seed / sum(seed)
 68 |     for (m in 1:length(target.data)) {
 69 |       target.data[[m]] <- target.data[[m]] / sum(target.data[[m]])
 70 |     }
 71 |   }
 72 |   
 73 |   if (print & check.margins & !na.target) {
 74 |     cat('Margins consistency checked!\n')
 75 |   } 
 76 |   
 77 |   # initial value is the seed
 78 |   result <- seed  
 79 |   converged <- FALSE
 80 |   tmp.evol.stp.crit <- vector(mode="numeric", length = iter)
 81 |   
 82 |   # ipfp iterations
 83 |   for (i in 1:iter) {
 84 |     
 85 |     if (print) {
 86 |       cat('... ITER', i, '\n')
 87 |     } 
 88 |     
 89 |     # saving previous iteration result (for testing convergence)
 90 |     result.temp <- result
 91 |             
 92 |     # loop over the constraints
 93 |     for (j in 1:length(target.list)) {
 94 |       # ... extracting current margins
 95 |       temp.sum <- apply(result, target.list[[j]], sum)
 96 |       # ... computation of the update factor, taking care of 0 and NA cells
 97 |       update.factor <- ifelse(target.data[[j]] == 0 | temp.sum == 0, 0,
 98 |                               target.data[[j]] / temp.sum)
 99 |       if (na.target == TRUE) {
100 |         update.factor[is.na(update.factor)] <- 1;
101 |       }
102 |       # ... apply the update factor
103 |       result <- sweep(result,target.list[[j]], update.factor, FUN = "*")
104 |     }
105 |     
106 |     # stopping criterion
107 |     stp.crit <- max(abs(result - result.temp))
108 |     tmp.evol.stp.crit[i] <- stp.crit
109 |     if (stp.crit < tol) {
110 |       converged <- TRUE
111 |       if (print) {
112 |         cat('Convergence reached after', i, 'iterations!\n')
113 |       } 
114 |       break
115 |     }
116 |     
117 |     if (print) {
118 |       cat ('       stoping criterion:', stp.crit, '\n')
119 |     }
120 |     
121 |   }
122 |   
123 |   # checking the convergence
124 |   if (converged == FALSE) {
125 |     warning('IPFP did not converged after ', iter, ' iteration(s)! 
126 |             This migh be due to 0 cells in the seed, maximum number 
127 |             of iteration too low or tolerance too small\n')
128 |   }        
129 |   
130 |   # computing final max difference between generated and target margins
131 |   diff.margins <- vector(mode = "numeric", length = length(target.list))
132 |   if (na.target == FALSE) {
133 |     for (j in 1:length(target.list)) {
134 |       diff.margins[j] = max(abs(target.data[[j]] 
135 |                                 - apply(result, target.list[[j]], sum))) 
136 |     }
137 |   }
138 |   
139 |   # storing the evolution of the stopping criterion
140 |   evol.stp.crit <- tmp.evol.stp.crit[1:i]
141 |   
142 |   # gathering the results in a list
143 |   result.list <- list("estimates" = result, "stp.crit" = stp.crit, 
144 |                       "conv" = converged, "dif.margins" = diff.margins,
145 |                       "evol.stp.crit" = evol.stp.crit);
146 |   
147 |   # returning the result
148 |   return(result.list)
149 |   
150 | }
151 | 
152 | # code from Thomas
153 | array2vector<-function(a) {
154 |   #transform  array a to vector, where last index moves fastest
155 | 
156 |   dim.array <- dim(a)
157 |   a <- aperm(a, seq(length(dim.array), 1, by = -1))
158 |   return(c(a))
159 | 
160 | }
161 | 
162 | vector2array<-function(vector, dim) {
163 |   #transform vector to array, where last index moves fastest  
164 | 
165 |   a <- array(vector, dim)
166 |   a <- aperm(a, seq(length(dim), 1, by = -1))
167 |   return(a)
168 | 
169 | }
170 | 
171 | covar <- function(estimate, sample, target.list) {
172 |   # Compute variance-covariance matrix of the estimators
173 |   # using the formula from Little and Wu (1911)
174 |   
175 |   n <- sum(sample)
176 |   sample.prob <- array2vector(sample / sum(sample))
177 |   estimate.prob <- array2vector(estimate / sum(estimate))
178 |   
179 |   D.sample   <- diag(1 / sample.prob)
180 |   D.estimate <- diag(1 / estimate.prob)
181 |   
182 |   # computation of A such that A * vector(estimate) = vector(target.data)
183 |   
184 |   # ... one line filled with ones
185 |   A.transp <- matrix(1, nrow = 1, ncol = length(estimate.prob))
186 |   
187 |   # ... constrainst (removing the first one since it is redundant information)
188 |   for (j in 1:length(target.list)) {
189 |     marg.mat <- MarginalMatrix(var = 1:length(dim(sample)), marg = target.list[[j]], dim = dim(sample))[-1,]
190 |     A.transp <- rbind(marg.mat, A.transp, deparse.level = FALSE)
191 |   }
192 |   
193 |   A <- t(A.transp)
194 |   
195 |   # computation of the orthogonal complement of A (using QR decomposition)
196 |   K <- qr.Q(qr(A), complete = TRUE)[,(dim(A)[2]+1):dim(A)[1]]
197 |   
198 |   # computation of the variance
199 |   estimate.var <- (1 / n) * K %*% solve((t(K) %*% D.estimate %*% K)) %*% t(K) %*% D.sample %*% K %*% solve(t(K) %*% D.estimate %*% K) %*% t(K)
200 |   
201 |   # returning the result
202 |   return(estimate.var)
203 |   
204 | }
205 | 


--------------------------------------------------------------------------------
/code/optim-cakeMap.R:
--------------------------------------------------------------------------------
 1 | # optim test CakeMap
 2 | 
 3 | source("code/CakeMap.R") # load cakemap data
 4 | indu <- unique(ind_cat)
 5 | rns <- as.integer(row.names(indu))
 6 | 
 7 | library(dplyr)
 8 | ind_cat$p <- apply(ind_cat, 1, paste0, collapse = "")
 9 | umat <- count(ind_cat, p, sort = TRUE)$n
10 | 
11 | ind_num <- apply(indu, 2, function(x) x * umat) # ind_num: unique row numbers to optimise
12 | 


--------------------------------------------------------------------------------
/code/optim-tests-SimpleWorld.R:
--------------------------------------------------------------------------------
  1 | # Optimisation experiments
  2 | source("code/SimpleWorld.R")
  3 | library(microbenchmark)
  4 | library(ggplot2)
  5 | 
  6 | # Look at the set-up data
  7 | x0
  8 | cons
  9 | ind
 10 | ind_cat
 11 | 
 12 | # Creating the function to optimise
 13 | # Setting up the input data
 14 | # ind_cat <- rbind(ind_cat, ind_cat[1,]) # add extra rows - just for testing
 15 | # indu <- unique(ind_cat) # save only unique individuals - dplyr::distinct forgets row.names
 16 | 
 17 | umat_count <- function(x) {
 18 |   xp <- apply(x, 1, paste0, collapse = "") # "pasted" version of constraints
 19 |   freq <- table(xp) # frequency of occurence of each individual
 20 |   xu <- unique(x) # save only unique individuals
 21 |   rns <- as.integer(row.names(xu)) # save the row names of unique values of ind
 22 |   xpu <- xp[rns]
 23 |   o <- order(xpu, decreasing = TRUE) # the order of the output (to rectify table)
 24 |   cbind(xu, data.frame(ind_n = freq[o], rns = rns)) # outputs
 25 | }
 26 | 
 27 | umat <- umat_count(ind_cat) 
 28 | indu <- apply(umat[1:ncol(ind_cat)], 2,
 29 |               function(x) x * umat$ind_n.Freq)
 30 | 
 31 | sim <- colSums(indu * c(1.2,3.5,1.5,4.5)) # test it on approximate dataset
 32 | sim - cons[1,] # test the function works
 33 | 
 34 | fun <- function(par, ind_n.Freq, con){
 35 |   sim <- colSums(par * ind_n.Freq)
 36 |   ae <- abs(sim - con) # Absolute error per category
 37 |   sum(ae) # the Total Absolute Error (TAE)
 38 | }
 39 | par <- c(1.2,3.5,1.5,4.5)
 40 | fun(par, indu, cons[1,]) # Shows the function in action
 41 | 
 42 | # Test the function on the weights obtained by IPF
 43 | # fun(weights[rns,1], indu, cons[1,]) # the weights generated by ipfp result in a tae of 0, better than optim
 44 | 
 45 | ores <- optim(par = rep(1, nrow(indu)), fn = fun, gr = "CG", ind_n.Freq = indu, con = cons[1,])
 46 | ores$par
 47 | fun(ores$par, indu, cons[1,]) # check TAE is low
 48 | fw <- ores$par[rep(1:nrow(umat), times = umat$ind_n.Freq)] # final weights
 49 | 
 50 | umat[1:ncol(ind_cat)][rep(1:nrow(umat), umat$ind_n.Freq),] # we've returned full circle to the correct population
 51 | 
 52 | # Next stage: try optimising the fit using diferent algorithms and do tests!
 53 | 
 54 | optim_optim_CG <- function(){
 55 |   optim(par = rep(1, nrow(indu)), fn = fun, gr = "CG", ind_n.Freq = indu, con = cons[1,])
 56 | }
 57 | 
 58 | # GenSA test
 59 | library(GenSA) # the library to test
 60 | out <- GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^5, nrow(indu)), control = list(maxit = 10), ind_n.Freq = indu, con = cons[1,])
 61 | out$value
 62 | 
 63 | # rgenoud 
 64 | # install.packages("rgenoud")
 65 | library(rgenoud)
 66 | set.seed(2014)
 67 | out <- genoud(nvars = nrow(indu), fn = fun, ind_n.Freq = indu, con = cons[1,], control = list(maxit = 1000), data.type.int = TRUE, Domains = matrix(c(rep(0, nrow(indu)),rep(100000, nrow(indu))), ncol = 2))
 68 | out$par
 69 | fun(par = out$par, ind_n.Freq = indu, con = cons)
 70 | fun(par = c(2,2,1,6), ind_n.Freq = indu, con = cons)
 71 | 
 72 | opt_res <- data.frame(algorithm = NA,
 73 |   maxit = NA,
 74 |   fit = NA,
 75 |   time = NA)
 76 | init<-fun(par = rep(1,nrow(indu)), ind_n.Freq = indu, con = cons)
 77 | opt_res <- rbind(opt_res, c("optim_Nelder", 0,init, NA))
 78 | opt_res <- rbind(opt_res, c("optim_SANN", 0,init, NA))
 79 | opt_res <- rbind(opt_res, c("optim_BFGS", 0,init, NA))
 80 | opt_res <- rbind(opt_res, c("optim_CG", 0,init, NA))
 81 | opt_res <- rbind(opt_res, c("ipf", 0,init, NA))
 82 | opt_res <- rbind(opt_res, c("GenSA", 0,init, NA))
 83 | 
 84 | Nb = 11 # default iteration number
 85 | set.seed(2014)
 86 | for(i in 1:Nb){
 87 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "Nelder-Mead", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
 88 |   opt_res <- rbind(opt_res, c("optim_Nelder", i, tmp_res$value, NA))
 89 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "SANN", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i ))
 90 |   opt_res <- rbind(opt_res, c("optim_SANN", i, tmp_res$value, NA))
 91 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "BFGS", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
 92 |   opt_res <- rbind(opt_res, c("optim_BFGS", i, tmp_res$value, NA))
 93 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "CG", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
 94 |   opt_res <- rbind(opt_res, c("optim_CG", i, tmp_res$value, NA))
 95 |   weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = i))
 96 |   tae <- sum(abs(colSums(weights[,1] * ind_cat) - cons[1,]))
 97 |   opt_res <- rbind(opt_res, c("ipf", i, tae, NA))
 98 |   tmp_res <- GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^2, nrow(indu)), control = list(maxit = i), ind_n.Freq = indu, con = cons[1,])
 99 |   opt_res <- rbind(opt_res, c("GenSA", i, tmp_res$value, NA))
100 | }
101 | 
102 | opt_res$fit <- as.numeric(opt_res$fit)
103 | opt_res$maxit <- as.numeric(opt_res$maxit)
104 | 
105 | qplot(data = opt_res, maxit, fit, linetype = algorithm, geom="line") +
106 |   ylab("Total Absolute Error") + xlab("Iterations") +
107 |   scale_linetype(name = "Algorithm") +
108 |    #scale_color_brewer(palette = 2, type = "qual") + 
109 |   theme_classic()
110 | # Save the plots! 
111 | # ggsave("figures/optim-its.png") # (original plot)
112 | # ggsave("figures/TAEOptim_GenSA_Mo.png")
113 | # ggsave("figures/TAEOptim_GenSA_Mo.pdf")
114 | 
115 | 
116 | 
117 | # Regenerate results for timings plot
118 | opt_res <- data.frame(algorithm = NA,
119 |   maxit = NA,
120 |   fit = NA,
121 |   time = NA)
122 | 
123 | 
124 | Nb = 11 # default iteration number
125 | set.seed(2014)
126 | for(i in 1:Nb){
127 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "Nelder-Mead", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
128 |   opt_res <- rbind(opt_res, c("optim_Nelder", i, tmp_res$value, NA))
129 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "SANN", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
130 |   opt_res <- rbind(opt_res, c("optim_SANN", i , tmp_res$value, NA))
131 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "BFGS", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
132 |   opt_res <- rbind(opt_res, c("optim_BFGS", i, tmp_res$value, NA))
133 |   tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "CG", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))
134 |   opt_res <- rbind(opt_res, c("optim_CG", i, tmp_res$value, NA))
135 |   weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = i))
136 |   tae <- sum(abs(colSums(weights[,1] * ind_cat) - cons[1,]))
137 |   opt_res <- rbind(opt_res, c("ipf", i, tae, NA))
138 |   tmp_res <- GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^2, nrow(indu)), control = list(maxit = i), ind_n.Freq = indu, con = cons[1,])
139 |   opt_res <- rbind(opt_res, c("GenSA", i, tmp_res$value, NA))
140 | }
141 | 
142 | opt_res$fit <- as.numeric(opt_res$fit)
143 | opt_res$maxit <- as.numeric(opt_res$maxit)
144 | 
145 | ### Timings
146 | mb <- NULL
147 | for(i in 1:Nb){
148 |   Nelder <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "Nelder-Mead", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i ))}
149 |   SANN <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "SANN", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i ))}
150 |   BFGS <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "BFGS", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))}
151 |   CG <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "CG", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))}
152 |   IPF <- function(){weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = i ))
153 |   tae <- sum(abs(colSums(weights[,1] * ind_cat) - cons[1,]))}
154 |   GENSA <- function(){GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^2, nrow(indu)), control = list(maxit = i), ind_n.Freq = indu, con = cons[1,])}
155 |   mb <- rbind(mb, print(microbenchmark(Nelder(), SANN(), BFGS(), CG(), IPF(), GENSA(), times = 20)))
156 | }
157 | 
158 | opt_res[2:dim(opt_res)[1],]$time <-  mb$mean
159 | opt_res$time<-as.numeric(opt_res$time)
160 | 
161 | qplot(data = opt_res, maxit, time, linetype = algorithm, geom="line") +
162 | #   ylim(NA, 3000) +
163 |   coord_cartesian(ylim = c(0, 2000)) +
164 |   ylab("Time (microseconds)") +
165 |   xlab("Number of iterations") +
166 |   scale_colour_brewer(palette = 2, type = "div") +
167 |   scale_linetype(name = "Algorithm") +
168 |   theme_classic() 
169 | # Save the plots!
170 | # ggsave("figures/optim-time.png")
171 | # ggsave("figures/TimeOptim_GenSA_Mo.png")
172 | # ggsave("figures/TimeOptim_GenSA_Mo.pdf")
173 | ### Background
174 | 
175 | # plot of time vs TAE
176 | qplot(data = opt_res, time, fit, linetype = algorithm, geom="line") +
177 |   ylab("TAE") + xlab("Time (microseconds)") + scale_color_brewer(palette = 2, type = "qual") + theme_classic() 
178 | 
179 | 
180 | ### Stack overflow - simplest form - representation of the above
181 | # See http://stackoverflow.com/questions/26160079/fast-concise-way-to-generate-ordered-frequency-count-of-unique-matrix-rows
182 | 
183 | 


--------------------------------------------------------------------------------
/code/parallel-ipfp.R:
--------------------------------------------------------------------------------
 1 | con_age <- read.csv("data/SimpleWorld/age.csv")
 2 | con_sex <- read.csv("data/SimpleWorld/sex.csv")
 3 | ind <- read.csv("data/SimpleWorld/ind.csv")
 4 | 
 5 | (ind$age <- cut(ind$age, breaks = c(0, 49, 120), labels = c("a0_49", "a50+")))
 6 | 
 7 | names(con_age) <- levels(ind$age) # rename aggregate variables
 8 | 
 9 | # make the number of constraints larger - to see benefit of parallel processing
10 | cons <- cbind(con_age, con_sex) 
11 | 
12 | cat_age <- model.matrix(~ ind$age - 1)
13 | cat_sex <- model.matrix(~ ind$sex - 1)[, c(2, 1)]
14 | (ind_cat <- cbind(cat_age, cat_sex)) # combine flat representations of the data
15 | 
16 | library(ipfp) # load the ipfp library after: install.packages("ipfp")
17 | cons <- apply(cons, 2, as.numeric) # convert matrix to numeric data type
18 | cons <- cons[sample(3, size = 500, replace = T),]
19 | 
20 | weights <- matrix(data = NA, nrow = nrow(ind), ncol = nrow(cons))
21 | 
22 | ind_catt <- t(ind_cat)
23 | x0 <- rep(1, nrow(ind))
24 | 
25 | # Tests of the speed of the for solution vs the apply solution
26 | ipfp_for <- function(){
27 |   for(i in 1:ncol(weights)){
28 |     weights[,i] <- ipfp(cons[i,], t(ind_cat), x0 = rep(1, nrow(ind)))
29 |   }
30 | }
31 | 
32 | ipfp_apply <- function(){
33 |   weights <- apply(cons, MARGIN = 1, FUN =  function(x) ipfp(as.numeric(x), t(ind_cat), x0 = rep(1,nrow(ind))))
34 | }
35 | 
36 | ipfp_for20 <- function(){
37 |   for(i in 1:ncol(weights)){
38 |     weights[,i] <- ipfp(cons[i,], t(ind_cat), x0 = rep(1, nrow(ind)), maxit = 20)
39 |   }
40 | }
41 | 
42 | ipfp_apply20 <- function(){
43 |   weights <- apply(cons, MARGIN = 1, FUN =  function(x) ipfp(as.numeric(x), t(ind_cat), x0 = rep(1,nrow(ind)), maxit = 20))
44 | }
45 | 
46 | ipfp_foric <- function(){
47 |   for(i in 1:ncol(weights)){
48 |     weights[,i] <- ipfp(cons[i,], ind_catt, x0 = rep(1, nrow(ind)))
49 |   }
50 | }
51 | 
52 | ipfp_applyic <- function(){
53 |   weights <- apply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), ind_catt, x0 = rep(1,nrow(ind))))
54 | }
55 | 
56 | ipfp_for20icx <- function(){
57 |   for(i in 1:ncol(weights)){
58 |     weights[,i] <- ipfp(cons[i,], ind_catt, x0, maxit = 20)
59 |   }
60 | }
61 | 
62 | ipfp_apply20icx <- function(){
63 |   weights <- apply(cons, MARGIN = 1, FUN =  function(x) ipfp(as.numeric(x), ind_catt, x0 , maxit = 20))
64 | }
65 | library(microbenchmark)
66 | microbenchmark(ipfp_for(), ipfp_apply(), ipfp_for20(), ipfp_apply20(), ipfp_foric(), ipfp_applyic(), ipfp_for20icx(), ipfp_apply20icx(), times = 5)
67 | 
68 | library(parallel)
69 | detectCores() # how many cores on the system?
70 | cl <- makeCluster(getOption("cl.cores", 4)) # make the cluster
71 | clusterExport(cl,c("ipfp","ind_catt", "x0")) # packages and objects to cluster
72 | 
73 | 
74 | 
75 | ind_catt <- t(ind_cat)
76 | 
77 | f3 <- function(cl){
78 |   weights_apply <- parApply(cl = cl, cons, 1, function(x) ipfp(x, ind_catt, x0))
79 | }
80 | 
81 | library(microbenchmark)
82 | microbenchmark(ipfp_for, ipfp_apply, ipfp_for20, ipfp_apply20, ipfp_foric, ipfp_applyic, ipfp_for20icx, ipfp_apply20icx(), f3(cl), times = 3 )
83 | 
84 | stopCluster(cl) # stop the cluster


--------------------------------------------------------------------------------
/courses/course-info-3day.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Course information"
 3 | author: "Robin Lovelace and Morgane Dumont"
 4 | output: html_document
 5 | ---
 6 | 
 7 | ```{r setup, include=FALSE}
 8 | knitr::opts_chunk$set(echo = TRUE)
 9 | ```
10 | 
11 | Course overview for Spatial Microsimulation course in Seville, 7^th^ - 9^th^ November.
12 | 
13 | ## Day 1 (9:30 to 17:30 with one hour break)
14 | 
15 | - [Introduction to spatial microsimulation in R](http://robinlovelace.net/spatial-microsim-book/slides/introduction.pdf) (RL 09:30 - 10:30)
16 |     - Course overview, aims and objectives (RL 9:30 - 10:00)
17 |     - Go-round of participants: course aspirations (RL 10:00 - 10:30)
18 | 
19 | - What is Spatial Microsimulation and its applications (with an emphasis on EU projects) (10:30 - 11:30)
20 |     - Agriculture (RL)
21 |     - Wealth distribution (RL)
22 |     - Transport (MD - 20 min)
23 |     
24 | - [Using R and RStudio](http://rpubs.com/RobinLovelace/146447) for spatial microsimulation (RL 11:30 - 13:00 See book appendix)
25 |     - Project management
26 |     - GitHub
27 |     - The RStudio Graphical User Interface (GUI)
28 |     - Using R
29 | 
30 | - [Creating spatial microdata in R](http://robinlovelace.net/spatial-microsim-book/slides/spatial-microdata-in-r.html) (MD 14:00 - 17:30)
31 |     - Loading the data
32 |     - Preparing the data 
33 |     - Reweighting procedures
34 |   
35 | ## Day 2
36 | 
37 | - [Applying the methods of IPF and Combinatorial Optimisation](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/slides/Applying-IPF-and-CO.Rmd) (9:30 - 12:00)
38 |     - Internal and External Validation (MD)
39 |     - Population synthesis with integerisation (MD)
40 |     - Introduction to Combinatorial Optimisation (MD)
41 | 
42 | - [simPop](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/slides/simpop-intro.Rmd) (RL 12:00 - 13:00)
43 | 
44 | - Spatial data with R (14:00 - 16:00)
45 |     - [Spatial data classes](http://rpubs.com/RobinLovelace/217921) (30 min)
46 |     - Practical working on the [Creating-maps-in-R](https://github.com/Robinlovelace/Creating-maps-in-R) (45 min Q & A)
47 |     - [Visualisation](https://github.com/Robinlovelace/Creating-maps-in-R/blob/master/vignettes/vspd-base-shiny.md) (30 min)
48 |     - [Simple features](https://github.com/edzer/sfr) (**sf** package) (15 min if time)
49 | 
50 | 
51 | 
52 | ## Day 3
53 | 
54 | 
55 | - Related topics (MD 9:30 - 10:30)
56 |   - Spatial micro-data for agent-based models 
57 |   - Spatial microsimulation without micro-data 
58 |   - Adding the household information 
59 |   - Choice of data and methods 
60 | 
61 | - Practical application (MD - 10:45 - 12:30)
62 |     
63 | - [Interactive mapping](https://github.com/Robinlovelace/Creating-maps-in-R/blob/master/vignettes/vspd-base-shiny.md) with **tmap**, **leaflet** and **shiny** (if time) (RL 12:30 - 13:00)
64 | 
65 | - Apply what you've learn on your own data (14:00 - 16:45)
66 | 
67 | - Conclusion to the course (16:45 - 17:00)
68 | 
69 | 


--------------------------------------------------------------------------------
/data/Belgium/ContrainteDipl.txt:
--------------------------------------------------------------------------------
  1 | "com"	"dipl"	"COUNT"
  2 | 91005	"Aucun"	172
  3 | 91013	"Aucun"	218
  4 | 91015	"Aucun"	123
  5 | 91030	"Aucun"	491
  6 | 91034	"Aucun"	349
  7 | 91054	"Aucun"	182
  8 | 91059	"Aucun"	173
  9 | 91064	"Aucun"	113
 10 | 91072	"Aucun"	140
 11 | 91103	"Aucun"	69
 12 | 91114	"Aucun"	300
 13 | 91120	"Aucun"	127
 14 | 91141	"Aucun"	175
 15 | 91142	"Aucun"	222
 16 | 91143	"Aucun"	109
 17 | 92003	"Aucun"	743
 18 | 92006	"Aucun"	96
 19 | 92035	"Aucun"	381
 20 | 92045	"Aucun"	173
 21 | 92048	"Aucun"	234
 22 | 92054	"Aucun"	124
 23 | 92087	"Aucun"	352
 24 | 92094	"Aucun"	2519
 25 | 92097	"Aucun"	71
 26 | 92101	"Aucun"	202
 27 | 92114	"Aucun"	206
 28 | 92137	"Aucun"	1064
 29 | 92138	"Aucun"	133
 30 | 92140	"Aucun"	545
 31 | 92141	"Aucun"	155
 32 | 92142	"Aucun"	414
 33 | 93010	"Aucun"	115
 34 | 93014	"Aucun"	584
 35 | 93018	"Aucun"	129
 36 | 93022	"Aucun"	324
 37 | 93056	"Aucun"	263
 38 | 93088	"Aucun"	468
 39 | 93090	"Aucun"	229
 40 | 91005	"CITE1"	678
 41 | 91013	"CITE1"	917
 42 | 91015	"CITE1"	431
 43 | 91030	"CITE1"	1441
 44 | 91034	"CITE1"	1432
 45 | 91054	"CITE1"	555
 46 | 91059	"CITE1"	545
 47 | 91064	"CITE1"	434
 48 | 91072	"CITE1"	435
 49 | 91103	"CITE1"	320
 50 | 91114	"CITE1"	1192
 51 | 91120	"CITE1"	478
 52 | 91141	"CITE1"	723
 53 | 91142	"CITE1"	858
 54 | 91143	"CITE1"	453
 55 | 92003	"CITE1"	2593
 56 | 92006	"CITE1"	421
 57 | 92035	"CITE1"	1240
 58 | 92045	"CITE1"	644
 59 | 92048	"CITE1"	956
 60 | 92054	"CITE1"	415
 61 | 92087	"CITE1"	1208
 62 | 92094	"CITE1"	10056
 63 | 92097	"CITE1"	304
 64 | 92101	"CITE1"	776
 65 | 92114	"CITE1"	794
 66 | 92137	"CITE1"	3347
 67 | 92138	"CITE1"	532
 68 | 92140	"CITE1"	2029
 69 | 92141	"CITE1"	587
 70 | 92142	"CITE1"	1668
 71 | 93010	"CITE1"	460
 72 | 93014	"CITE1"	1657
 73 | 93018	"CITE1"	338
 74 | 93022	"CITE1"	1111
 75 | 93056	"CITE1"	956
 76 | 93088	"CITE1"	1546
 77 | 93090	"CITE1"	777
 78 | 91005	"CITE2"	1636
 79 | 91013	"CITE2"	2142
 80 | 91015	"CITE2"	758
 81 | 91030	"CITE2"	3719
 82 | 91034	"CITE2"	3290
 83 | 91054	"CITE2"	1118
 84 | 91059	"CITE2"	1463
 85 | 91064	"CITE2"	1083
 86 | 91072	"CITE2"	1072
 87 | 91103	"CITE2"	732
 88 | 91114	"CITE2"	2998
 89 | 91120	"CITE2"	1199
 90 | 91141	"CITE2"	1840
 91 | 91142	"CITE2"	1577
 92 | 91143	"CITE2"	716
 93 | 92003	"CITE2"	6305
 94 | 92006	"CITE2"	1311
 95 | 92035	"CITE2"	3260
 96 | 92045	"CITE2"	1670
 97 | 92048	"CITE2"	2355
 98 | 92054	"CITE2"	1324
 99 | 92087	"CITE2"	2873
100 | 92094	"CITE2"	23911
101 | 92097	"CITE2"	1073
102 | 92101	"CITE2"	2305
103 | 92114	"CITE2"	1671
104 | 92137	"CITE2"	7019
105 | 92138	"CITE2"	1437
106 | 92140	"CITE2"	4319
107 | 92141	"CITE2"	1685
108 | 92142	"CITE2"	4689
109 | 93010	"CITE2"	1182
110 | 93014	"CITE2"	3427
111 | 93018	"CITE2"	680
112 | 93022	"CITE2"	2737
113 | 93056	"CITE2"	2283
114 | 93088	"CITE2"	4234
115 | 93090	"CITE2"	1454
116 | 91005	"CITE3"	1966
117 | 91013	"CITE3"	2223
118 | 91015	"CITE3"	817
119 | 91030	"CITE3"	4092
120 | 91034	"CITE3"	3590
121 | 91054	"CITE3"	1052
122 | 91059	"CITE3"	1818
123 | 91064	"CITE3"	1257
124 | 91072	"CITE3"	1242
125 | 91103	"CITE3"	842
126 | 91114	"CITE3"	3405
127 | 91120	"CITE3"	1273
128 | 91141	"CITE3"	2114
129 | 91142	"CITE3"	1435
130 | 91143	"CITE3"	652
131 | 92003	"CITE3"	6590
132 | 92006	"CITE3"	1702
133 | 92035	"CITE3"	3746
134 | 92045	"CITE3"	1893
135 | 92048	"CITE3"	2499
136 | 92054	"CITE3"	1643
137 | 92087	"CITE3"	3261
138 | 92094	"CITE3"	27136
139 | 92097	"CITE3"	1251
140 | 92101	"CITE3"	2907
141 | 92114	"CITE3"	1841
142 | 92137	"CITE3"	6885
143 | 92138	"CITE3"	1742
144 | 92140	"CITE3"	4632
145 | 92141	"CITE3"	2063
146 | 92142	"CITE3"	5568
147 | 93010	"CITE3"	1264
148 | 93014	"CITE3"	3440
149 | 93018	"CITE3"	729
150 | 93022	"CITE3"	3062
151 | 93056	"CITE3"	2373
152 | 93088	"CITE3"	4713
153 | 93090	"CITE3"	1550
154 | 91005	"CITE4"	112
155 | 91013	"CITE4"	183
156 | 91015	"CITE4"	48
157 | 91030	"CITE4"	231
158 | 91034	"CITE4"	193
159 | 91054	"CITE4"	69
160 | 91059	"CITE4"	93
161 | 91064	"CITE4"	82
162 | 91072	"CITE4"	93
163 | 91103	"CITE4"	55
164 | 91114	"CITE4"	144
165 | 91120	"CITE4"	82
166 | 91141	"CITE4"	133
167 | 91142	"CITE4"	87
168 | 91143	"CITE4"	42
169 | 92003	"CITE4"	344
170 | 92006	"CITE4"	91
171 | 92035	"CITE4"	202
172 | 92045	"CITE4"	112
173 | 92048	"CITE4"	162
174 | 92054	"CITE4"	113
175 | 92087	"CITE4"	196
176 | 92094	"CITE4"	1568
177 | 92097	"CITE4"	68
178 | 92101	"CITE4"	168
179 | 92114	"CITE4"	127
180 | 92137	"CITE4"	408
181 | 92138	"CITE4"	95
182 | 92140	"CITE4"	248
183 | 92141	"CITE4"	135
184 | 92142	"CITE4"	318
185 | 93010	"CITE4"	71
186 | 93014	"CITE4"	232
187 | 93018	"CITE4"	51
188 | 93022	"CITE4"	167
189 | 93056	"CITE4"	135
190 | 93088	"CITE4"	264
191 | 93090	"CITE4"	90
192 | 91005	"CITE5"	1208
193 | 91013	"CITE5"	1467
194 | 91015	"CITE5"	460
195 | 91030	"CITE5"	3026
196 | 91034	"CITE5"	2277
197 | 91054	"CITE5"	720
198 | 91059	"CITE5"	1462
199 | 91064	"CITE5"	1029
200 | 91072	"CITE5"	782
201 | 91103	"CITE5"	529
202 | 91114	"CITE5"	2016
203 | 91120	"CITE5"	856
204 | 91141	"CITE5"	2148
205 | 91142	"CITE5"	620
206 | 91143	"CITE5"	381
207 | 92003	"CITE5"	4062
208 | 92006	"CITE5"	1609
209 | 92035	"CITE5"	3375
210 | 92045	"CITE5"	1764
211 | 92048	"CITE5"	1796
212 | 92054	"CITE5"	1675
213 | 92087	"CITE5"	2419
214 | 92094	"CITE5"	26403
215 | 92097	"CITE5"	949
216 | 92101	"CITE5"	2980
217 | 92114	"CITE5"	1798
218 | 92137	"CITE5"	3966
219 | 92138	"CITE5"	1670
220 | 92140	"CITE5"	3059
221 | 92141	"CITE5"	2322
222 | 92142	"CITE5"	6509
223 | 93010	"CITE5"	766
224 | 93014	"CITE5"	2063
225 | 93018	"CITE5"	443
226 | 93022	"CITE5"	1821
227 | 93056	"CITE5"	1424
228 | 93088	"CITE5"	3678
229 | 93090	"CITE5"	727
230 | 91005	"CITE6"	22
231 | 91013	"CITE6"	25
232 | 91015	"CITE6"	10
233 | 91030	"CITE6"	51
234 | 91034	"CITE6"	62
235 | 91054	"CITE6"	5
236 | 91059	"CITE6"	24
237 | 91064	"CITE6"	23
238 | 91072	"CITE6"	19
239 | 91103	"CITE6"	15
240 | 91114	"CITE6"	33
241 | 91120	"CITE6"	14
242 | 91141	"CITE6"	47
243 | 91142	"CITE6"	9
244 | 91143	"CITE6"	11
245 | 92003	"CITE6"	76
246 | 92006	"CITE6"	52
247 | 92035	"CITE6"	101
248 | 92045	"CITE6"	39
249 | 92048	"CITE6"	24
250 | 92054	"CITE6"	58
251 | 92087	"CITE6"	24
252 | 92094	"CITE6"	741
253 | 92097	"CITE6"	21
254 | 92101	"CITE6"	75
255 | 92114	"CITE6"	49
256 | 92137	"CITE6"	46
257 | 92138	"CITE6"	69
258 | 92140	"CITE6"	55
259 | 92141	"CITE6"	81
260 | 92142	"CITE6"	248
261 | 93010	"CITE6"	10
262 | 93014	"CITE6"	30
263 | 93018	"CITE6"	6
264 | 93022	"CITE6"	25
265 | 93056	"CITE6"	21
266 | 93088	"CITE6"	37
267 | 93090	"CITE6"	22
268 | 91005	"NonConcerne"	1238
269 | 91013	"NonConcerne"	1653
270 | 91015	"NonConcerne"	524
271 | 91030	"NonConcerne"	2661
272 | 91034	"NonConcerne"	2327
273 | 91054	"NonConcerne"	764
274 | 91059	"NonConcerne"	1404
275 | 91064	"NonConcerne"	1036
276 | 91072	"NonConcerne"	774
277 | 91103	"NonConcerne"	562
278 | 91114	"NonConcerne"	2290
279 | 91120	"NonConcerne"	929
280 | 91141	"NonConcerne"	1701
281 | 91142	"NonConcerne"	850
282 | 91143	"NonConcerne"	412
283 | 92003	"NonConcerne"	4798
284 | 92006	"NonConcerne"	1328
285 | 92035	"NonConcerne"	2962
286 | 92045	"NonConcerne"	1515
287 | 92048	"NonConcerne"	1878
288 | 92054	"NonConcerne"	1364
289 | 92087	"NonConcerne"	2349
290 | 92094	"NonConcerne"	17431
291 | 92097	"NonConcerne"	960
292 | 92101	"NonConcerne"	2112
293 | 92114	"NonConcerne"	1646
294 | 92137	"NonConcerne"	4743
295 | 92138	"NonConcerne"	1464
296 | 92140	"NonConcerne"	3603
297 | 92141	"NonConcerne"	1825
298 | 92142	"NonConcerne"	4291
299 | 93010	"NonConcerne"	952
300 | 93014	"NonConcerne"	2335
301 | 93018	"NonConcerne"	509
302 | 93022	"NonConcerne"	1901
303 | 93056	"NonConcerne"	1510
304 | 93088	"NonConcerne"	3152
305 | 93090	"NonConcerne"	922
306 | 


--------------------------------------------------------------------------------
/data/Belgium/ContrainteGenre.txt:
--------------------------------------------------------------------------------
 1 | "com"	"gender"	"COUNT"
 2 | 91005	"Femmes"	3600
 3 | 91013	"Femmes"	4577
 4 | 91015	"Femmes"	1584
 5 | 91030	"Femmes"	8169
 6 | 91034	"Femmes"	6990
 7 | 91054	"Femmes"	2294
 8 | 91059	"Femmes"	3485
 9 | 91064	"Femmes"	2561
10 | 91072	"Femmes"	2314
11 | 91103	"Femmes"	1575
12 | 91114	"Femmes"	6301
13 | 91120	"Femmes"	2486
14 | 91141	"Femmes"	4492
15 | 91142	"Femmes"	2793
16 | 91143	"Femmes"	1409
17 | 92003	"Femmes"	13057
18 | 92006	"Femmes"	3312
19 | 92035	"Femmes"	7777
20 | 92045	"Femmes"	3994
21 | 92048	"Femmes"	4899
22 | 92054	"Femmes"	3372
23 | 92087	"Femmes"	6457
24 | 92094	"Femmes"	57004
25 | 92097	"Femmes"	2375
26 | 92101	"Femmes"	5878
27 | 92114	"Femmes"	4127
28 | 92137	"Femmes"	14325
29 | 92138	"Femmes"	3606
30 | 92140	"Femmes"	9450
31 | 92141	"Femmes"	4523
32 | 92142	"Femmes"	11951
33 | 93010	"Femmes"	2413
34 | 93014	"Femmes"	7103
35 | 93018	"Femmes"	1472
36 | 93022	"Femmes"	5668
37 | 93056	"Femmes"	4520
38 | 93088	"Femmes"	9290
39 | 93090	"Femmes"	3000
40 | 91005	"Hommes"	3432
41 | 91013	"Hommes"	4251
42 | 91015	"Hommes"	1587
43 | 91030	"Hommes"	7543
44 | 91034	"Hommes"	6530
45 | 91054	"Hommes"	2171
46 | 91059	"Hommes"	3497
47 | 91064	"Hommes"	2496
48 | 91072	"Hommes"	2243
49 | 91103	"Hommes"	1549
50 | 91114	"Hommes"	6077
51 | 91120	"Hommes"	2472
52 | 91141	"Hommes"	4389
53 | 91142	"Hommes"	2865
54 | 91143	"Hommes"	1367
55 | 92003	"Hommes"	12454
56 | 92006	"Hommes"	3298
57 | 92035	"Hommes"	7490
58 | 92045	"Hommes"	3816
59 | 92048	"Hommes"	5005
60 | 92054	"Hommes"	3344
61 | 92087	"Hommes"	6225
62 | 92094	"Hommes"	52761
63 | 92097	"Hommes"	2322
64 | 92101	"Hommes"	5647
65 | 92114	"Hommes"	4005
66 | 92137	"Hommes"	13153
67 | 92138	"Hommes"	3536
68 | 92140	"Hommes"	9040
69 | 92141	"Hommes"	4330
70 | 92142	"Hommes"	11754
71 | 93010	"Hommes"	2407
72 | 93014	"Hommes"	6665
73 | 93018	"Hommes"	1413
74 | 93022	"Hommes"	5480
75 | 93056	"Hommes"	4445
76 | 93088	"Hommes"	8802
77 | 93090	"Hommes"	2771
78 | 


--------------------------------------------------------------------------------
/data/Belgium/ContrainteStatut.txt:
--------------------------------------------------------------------------------
  1 | "com"	"statut"	"COUNT"
  2 | 91005	"Chômeurs"	298
  3 | 91013	"Chômeurs"	493
  4 | 91015	"Chômeurs"	157
  5 | 91030	"Chômeurs"	699
  6 | 91034	"Chômeurs"	798
  7 | 91054	"Chômeurs"	224
  8 | 91059	"Chômeurs"	250
  9 | 91064	"Chômeurs"	192
 10 | 91072	"Chômeurs"	196
 11 | 91103	"Chômeurs"	183
 12 | 91114	"Chômeurs"	599
 13 | 91120	"Chômeurs"	218
 14 | 91141	"Chômeurs"	314
 15 | 91142	"Chômeurs"	474
 16 | 91143	"Chômeurs"	146
 17 | 92003	"Chômeurs"	1377
 18 | 92006	"Chômeurs"	200
 19 | 92035	"Chômeurs"	501
 20 | 92045	"Chômeurs"	270
 21 | 92048	"Chômeurs"	510
 22 | 92054	"Chômeurs"	232
 23 | 92087	"Chômeurs"	559
 24 | 92094	"Chômeurs"	5638
 25 | 92097	"Chômeurs"	200
 26 | 92101	"Chômeurs"	428
 27 | 92114	"Chômeurs"	332
 28 | 92137	"Chômeurs"	1806
 29 | 92138	"Chômeurs"	209
 30 | 92140	"Chômeurs"	1007
 31 | 92141	"Chômeurs"	250
 32 | 92142	"Chômeurs"	921
 33 | 93010	"Chômeurs"	301
 34 | 93014	"Chômeurs"	976
 35 | 93018	"Chômeurs"	160
 36 | 93022	"Chômeurs"	579
 37 | 93056	"Chômeurs"	519
 38 | 93088	"Chômeurs"	825
 39 | 93090	"Chômeurs"	427
 40 | 91005	"Inactifs"	3842
 41 | 91013	"Inactifs"	5074
 42 | 91015	"Inactifs"	1734
 43 | 91030	"Inactifs"	8535
 44 | 91034	"Inactifs"	7834
 45 | 91054	"Inactifs"	2566
 46 | 91059	"Inactifs"	3694
 47 | 91064	"Inactifs"	2786
 48 | 91072	"Inactifs"	2521
 49 | 91103	"Inactifs"	1708
 50 | 91114	"Inactifs"	7008
 51 | 91120	"Inactifs"	2698
 52 | 91141	"Inactifs"	4951
 53 | 91142	"Inactifs"	3405
 54 | 91143	"Inactifs"	1748
 55 | 92003	"Inactifs"	14338
 56 | 92006	"Inactifs"	3405
 57 | 92035	"Inactifs"	8052
 58 | 92045	"Inactifs"	4148
 59 | 92048	"Inactifs"	5429
 60 | 92054	"Inactifs"	3490
 61 | 92087	"Inactifs"	6818
 62 | 92094	"Inactifs"	62471
 63 | 92097	"Inactifs"	2396
 64 | 92101	"Inactifs"	6132
 65 | 92114	"Inactifs"	4358
 66 | 92137	"Inactifs"	15883
 67 | 92138	"Inactifs"	3733
 68 | 92140	"Inactifs"	10288
 69 | 92141	"Inactifs"	4607
 70 | 92142	"Inactifs"	12608
 71 | 93010	"Inactifs"	2654
 72 | 93014	"Inactifs"	8119
 73 | 93018	"Inactifs"	1700
 74 | 93022	"Inactifs"	6355
 75 | 93056	"Inactifs"	5045
 76 | 93088	"Inactifs"	9755
 77 | 93090	"Inactifs"	3503
 78 | 91005	"Travailleurs"	2892
 79 | 91013	"Travailleurs"	3261
 80 | 91015	"Travailleurs"	1280
 81 | 91030	"Travailleurs"	6478
 82 | 91034	"Travailleurs"	4888
 83 | 91054	"Travailleurs"	1675
 84 | 91059	"Travailleurs"	3038
 85 | 91064	"Travailleurs"	2079
 86 | 91072	"Travailleurs"	1840
 87 | 91103	"Travailleurs"	1233
 88 | 91114	"Travailleurs"	4771
 89 | 91120	"Travailleurs"	2042
 90 | 91141	"Travailleurs"	3616
 91 | 91142	"Travailleurs"	1779
 92 | 91143	"Travailleurs"	882
 93 | 92003	"Travailleurs"	9796
 94 | 92006	"Travailleurs"	3005
 95 | 92035	"Travailleurs"	6714
 96 | 92045	"Travailleurs"	3392
 97 | 92048	"Travailleurs"	3965
 98 | 92054	"Travailleurs"	2994
 99 | 92087	"Travailleurs"	5305
100 | 92094	"Travailleurs"	41656
101 | 92097	"Travailleurs"	2101
102 | 92101	"Travailleurs"	4965
103 | 92114	"Travailleurs"	3442
104 | 92137	"Travailleurs"	9789
105 | 92138	"Travailleurs"	3200
106 | 92140	"Travailleurs"	7195
107 | 92141	"Travailleurs"	3996
108 | 92142	"Travailleurs"	10176
109 | 93010	"Travailleurs"	1865
110 | 93014	"Travailleurs"	4673
111 | 93018	"Travailleurs"	1025
112 | 93022	"Travailleurs"	4214
113 | 93056	"Travailleurs"	3401
114 | 93088	"Travailleurs"	7512
115 | 93090	"Travailleurs"	1841
116 | 


--------------------------------------------------------------------------------
/data/Belgium/HH_cons_INS92094:
--------------------------------------------------------------------------------
1 | HHsize	count
2 | 2	27871
3 | 3	11257
4 | 4	5063
5 | 


--------------------------------------------------------------------------------
/data/Belgium/HH_sample:
--------------------------------------------------------------------------------
 1 | HHID	HHsize	HHtype
 2 | 1	2	Couple
 3 | 2	3	NoCouple
 4 | 3 	3	Couple
 5 | 4	2	NoCouple
 6 | 5	4	NoCouple
 7 | 6	2	Couple
 8 | 7	3	Couple
 9 | 8	4	Couple
10 | 9	2	NoCouple
11 | 10	2	NoCouple
12 | 11	3	Couple
13 | 12	1	NoCouple
14 | 13	2	Couple
15 | 14	1	NoCouple
16 | 15	2	Couple
17 | 


--------------------------------------------------------------------------------
/data/CakeMap/area-cat.R:
--------------------------------------------------------------------------------
 1 | ## runs with integerisation code - produces categorised output with per area loop
 2 | 
 3 | # create new age/sex variable
 4 | AS <- paste0(intall[[i]]$Sex, intall[[i]]$ageband4)
 5 | unique(AS)
 6 | 
 7 | # matrix for constraint 1 - age/sex
 8 | m1 <- model.matrix(~AS-1)
 9 | 
10 | # matrix for con2 (car ownership)
11 | intall[[i]]$Car <- as.character(intall[[i]]$Car)
12 | m2 <- model.matrix(~intall[[i]]$Car-1)
13 | 
14 | # matrix for con3 (nssec)
15 | intall[[i]]$NSSEC8 <- as.character(intall[[i]]$NSSEC8)
16 | m3 <- model.matrix(~intall[[i]]$NSSEC8-1)
17 | 
18 | summary(intall[[i]]$NCakes)
19 | levels(ind$NCakes)
20 | intall[[i]]$avnumcakes <- 1
21 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[1]] <- 0.5
22 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[2]] <- 1.5
23 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[3]] <- 4
24 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[4]] <- 8
25 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[5]] <- 0.1
26 | summary(intall[[i]]$avnumcakes[])
27 | 
28 | # Polishing up
29 | area.cat <- data.frame(cbind(m1, m2, m3))
30 | names(ind_cat) <- cat_labs


--------------------------------------------------------------------------------
/data/CakeMap/categorise.R:
--------------------------------------------------------------------------------
 1 | # converts numeric variables into categorical variables
 2 | # Create 0/1 counts from survey data
 3 | 
 4 | # create new age/sex variable
 5 | AS <- paste0(ind$Sex, ind$ageband4)
 6 | unique(AS)
 7 | 
 8 | # matrix for constraint 1 - age/sex
 9 | m1 <- model.matrix(~AS-1)
10 | head(cons)
11 | head(m1)
12 | colnames(m1) <- names(cons)[1:12]
13 | head(m1)
14 | summary(rowSums(m1))
15 | 
16 | # matrix for con2 (car ownership)
17 | ind$Car <- as.character(ind$Car)
18 | m2 <- model.matrix(~ind$Car-1)
19 | head(m2)
20 | summary(m2)
21 | 
22 | # matrix for con3 (nssec)
23 | ind$NSSEC8 <- as.character(ind$NSSEC8)
24 | m3 <- model.matrix(~ind$NSSEC8-1)
25 | head(m3)
26 | names(cons)
27 | 
28 | # Polishing up
29 | ind_cat <- data.frame(cbind(m1, m2, m3))
30 | rm(m1, m2, m3)
31 | names(ind_cat) <- cat_labs
32 | head(ind_cat)
33 | 


--------------------------------------------------------------------------------
/data/CakeMap/inc-est-2001.csv:
--------------------------------------------------------------------------------
  1 | ,,OCODE,NAME,Avinc
  2 | 00CX,Bradford,00CXFA,Baildon,620
  3 | 00CX,Bradford,00CXFB,Bingley,600
  4 | 00CX,Bradford,00CXFC,Bingley Rural,620
  5 | 00CX,Bradford,00CXFD,Bolton,550
  6 | 00CX,Bradford,00CXFE,Bowling,360
  7 | 00CX,Bradford,00CXFF,Bradford Moor,340
  8 | 00CX,Bradford,00CXFG,Clayton,500
  9 | 00CX,Bradford,00CXFH,Craven,620
 10 | 00CX,Bradford,00CXFJ,Eccleshill,460
 11 | 00CX,Bradford,00CXFK,Great Horton,450
 12 | 00CX,Bradford,00CXFL,Heaton,480
 13 | 00CX,Bradford,00CXFM,Idle,580
 14 | 00CX,Bradford,00CXFN,Ilkley,720
 15 | 00CX,Bradford,00CXFP,Keighley North,520
 16 | 00CX,Bradford,00CXFQ,Keighley South,400
 17 | 00CX,Bradford,00CXFR,Keighley West,480
 18 | 00CX,Bradford,00CXFS,Little Horton,320
 19 | 00CX,Bradford,00CXFT,Odsal,460
 20 | 00CX,Bradford,00CXFU,Queensbury,580
 21 | 00CX,Bradford,00CXFW,Rombalds,720
 22 | 00CX,Bradford,00CXFX,Shipley East,450
 23 | 00CX,Bradford,00CXFY,Shipley West,580
 24 | 00CX,Bradford,00CXFZ,Thornton,510
 25 | 00CX,Bradford,00CXGA,Toller,390
 26 | 00CX,Bradford,00CXGB,Tong,420
 27 | 00CX,Bradford,00CXGC,Undercliffe,400
 28 | 00CX,Bradford,00CXGD,University,320
 29 | 00CX,Bradford,00CXGE,Wibsey,500
 30 | 00CX,Bradford,00CXGF,Worth Valley,630
 31 | 00CX,Bradford,00CXGG,Wyke,510
 32 | 00CY,Calderdale,00CYFA,Brighouse,540
 33 | 00CY,Calderdale,00CYFB,Calder Valley,590
 34 | 00CY,Calderdale,00CYFC,Elland,480
 35 | 00CY,Calderdale,00CYFD,Greetland and Stainland,630
 36 | 00CY,Calderdale,00CYFE,Hipperholme and Lightcliffe,610
 37 | 00CY,Calderdale,00CYFF,Illingworth,530
 38 | 00CY,Calderdale,00CYFG,Luddendenfoot,580
 39 | 00CY,Calderdale,00CYFH,Mixenden,420
 40 | 00CY,Calderdale,00CYFJ,Northowram and Shelf,610
 41 | 00CY,Calderdale,00CYFK,Ovenden,430
 42 | 00CY,Calderdale,00CYFL,Rastrick,600
 43 | 00CY,Calderdale,00CYFM,Ryburn,630
 44 | 00CY,Calderdale,00CYFN,St. John's,350
 45 | 00CY,Calderdale,00CYFP,Skircoat,620
 46 | 00CY,Calderdale,00CYFQ,Sowerby Bridge,510
 47 | 00CY,Calderdale,00CYFR,Todmorden,480
 48 | 00CY,Calderdale,00CYFS,Town,460
 49 | 00CY,Calderdale,00CYFT,Warley,500
 50 | 00CZ,Kirklees,00CZFA,Almondbury,490
 51 | 00CZ,Kirklees,00CZFB,Batley East,480
 52 | 00CZ,Kirklees,00CZFC,Batley West,470
 53 | 00CZ,Kirklees,00CZFD,Birkby,470
 54 | 00CZ,Kirklees,00CZFE,Birstall and Birkenshaw,560
 55 | 00CZ,Kirklees,00CZFF,Cleckheaton,560
 56 | 00CZ,Kirklees,00CZFG,Colne Valley West,540
 57 | 00CZ,Kirklees,00CZFH,Crosland Moor,450
 58 | 00CZ,Kirklees,00CZFJ,Dalton,480
 59 | 00CZ,Kirklees,00CZFK,Deighton,410
 60 | 00CZ,Kirklees,00CZFL,Denby Dale,670
 61 | 00CZ,Kirklees,00CZFM,Dewsbury East,450
 62 | 00CZ,Kirklees,00CZFN,Dewsbury West,450
 63 | 00CZ,Kirklees,00CZFP,Golcar,530
 64 | 00CZ,Kirklees,00CZFQ,Heckmondwike,560
 65 | 00CZ,Kirklees,00CZFR,Holme Valley North,610
 66 | 00CZ,Kirklees,00CZFS,Holme Valley South,670
 67 | 00CZ,Kirklees,00CZFT,Kirkburton,620
 68 | 00CZ,Kirklees,00CZFU,Lindley,580
 69 | 00CZ,Kirklees,00CZFW,Mirfield,600
 70 | 00CZ,Kirklees,00CZFX,Newsome,410
 71 | 00CZ,Kirklees,00CZFY,Paddock,440
 72 | 00CZ,Kirklees,00CZFZ,Spen,530
 73 | 00CZ,Kirklees,00CZGA,Thornhill,460
 74 | 00DA,Leeds,00DAFA,Aireborough,630
 75 | 00DA,Leeds,00DAFB,Armley,450
 76 | 00DA,Leeds,00DAFC,Barwick and Kippax,620
 77 | 00DA,Leeds,00DAFD,Beeston,440
 78 | 00DA,Leeds,00DAFE,Bramley,470
 79 | 00DA,Leeds,00DAFF,Burmantofts,390
 80 | 00DA,Leeds,00DAFG,Chapel Allerton,480
 81 | 00DA,Leeds,00DAFH,City and Holbeck,370
 82 | 00DA,Leeds,00DAFJ,Cookridge,620
 83 | 00DA,Leeds,00DAFK,Garforth and Swillington,610
 84 | 00DA,Leeds,00DAFL,Halton,620
 85 | 00DA,Leeds,00DAFM,Harehills,380
 86 | 00DA,Leeds,00DAFN,Headingley,390
 87 | 00DA,Leeds,00DAFP,Horsforth,680
 88 | 00DA,Leeds,00DAFQ,Hunslet,400
 89 | 00DA,Leeds,00DAFR,Kirkstall,490
 90 | 00DA,Leeds,00DAFS,Middleton,500
 91 | 00DA,Leeds,00DAFT,Moortown,610
 92 | 00DA,Leeds,00DAFU,Morley North,610
 93 | 00DA,Leeds,00DAFW,Morley South,580
 94 | 00DA,Leeds,00DAFX,North,650
 95 | 00DA,Leeds,00DAFY,Otley and Wharfedale,650
 96 | 00DA,Leeds,00DAFZ,Pudsey North,620
 97 | 00DA,Leeds,00DAGA,Pudsey South,550
 98 | 00DA,Leeds,00DAGB,Richmond Hill,400
 99 | 00DA,Leeds,00DAGC,Rothwell,590
100 | 00DA,Leeds,00DAGD,Roundhay,710
101 | 00DA,Leeds,00DAGE,Seacroft,390
102 | 00DA,Leeds,00DAGF,University,350
103 | 00DA,Leeds,00DAGG,Weetwood,540
104 | 00DA,Leeds,00DAGH,Wetherby,680
105 | 00DA,Leeds,00DAGJ,Whinmoor,510
106 | 00DA,Leeds,00DAGK,Wortley,490
107 | 00DB,Wakefield,00DBFA,Castleford Ferry Fryston,430
108 | 00DB,Wakefield,00DBFB,Castleford Glasshoughton,470
109 | 00DB,Wakefield,00DBFC,Castleford Whitwood,440
110 | 00DB,Wakefield,00DBFD,Crofton and Ackworth,570
111 | 00DB,Wakefield,00DBFE,Featherstone,450
112 | 00DB,Wakefield,00DBFF,Hemsworth,430
113 | 00DB,Wakefield,00DBFG,Horbury,550
114 | 00DB,Wakefield,00DBFH,Knottingley,440
115 | 00DB,Wakefield,00DBFJ,Normanton and Sharlston,480
116 | 00DB,Wakefield,00DBFK,Ossett,560
117 | 00DB,Wakefield,00DBFL,Pontefract North,490
118 | 00DB,Wakefield,00DBFM,Pontefract South,520
119 | 00DB,Wakefield,00DBFN,South Elmsall,460
120 | 00DB,Wakefield,00DBFP,South Kirkby,430
121 | 00DB,Wakefield,00DBFQ,Stanley and Altofts,580
122 | 00DB,Wakefield,00DBFR,Stanley and Wrenthorpe,610
123 | 00DB,Wakefield,00DBFS,Wakefield Central,430
124 | 00DB,Wakefield,00DBFT,Wakefield East,390
125 | 00DB,Wakefield,00DBFU,Wakefield North,450
126 | 00DB,Wakefield,00DBFW,Wakefield Rural,620
127 | 00DB,Wakefield,00DBFX,Wakefield South,610
128 | 


--------------------------------------------------------------------------------
/data/CakeMap/load-all.R:
--------------------------------------------------------------------------------
 1 | # Loading the aggregate dataset, saving as all.msim
 2 | getwd() # should be in the smsim-course folder
 3 | con1 <- read.csv("data/cakeMap/con1.csv") # age/sex variable
 4 | con2 <- read.csv("data/cakeMap/con2.csv") # no car / car
 5 | con3 <- read.csv("data/cakeMap/con3.csv") # ns-sec
 6 | names(con1)
 7 | names(con2)
 8 | names(con3)
 9 | 
10 | con2 <- data.frame(cbind(con2[,1] - con2[,2], con2[,2]))
11 | names(con2) <- c("Car", "NoCar")
12 | head(con2)
13 | 
14 | sum(con1); sum(con2); sum(con3)
15 | c(sum(con1), sum(con2), sum(con3)) / sum(con1) # how much the values deviate from expected
16 | 
17 | con.pop <- rowSums(con1) 
18 | con1 <- round(con1 * con.pop / rowSums(con1))
19 | con2 <- round(con2 * con.pop / rowSums(con2)) 
20 | con3 <- round(con3 * con.pop / rowSums(con3))
21 | 
22 | sum(con1); sum(con2); sum(con3); # all the numbers should be equal - this is close enough!
23 | 
24 | # bind all the data frames together
25 | all.msim <- cbind(con1 
26 |                   ,con2
27 |                   ,con3
28 |                   )
29 | 
30 | which(all.msim == 0) 
31 | range(all.msim) # range of values - there are no zeros
32 | mean(con.pop) # average number of individuals in each zone
33 | 
34 | # in case there are zeros, set just above 1 to avoid subsequent problems
35 | con1[con1 == 0] <- con2[con2 == 0] <- con3[con3 == 0] <- 0.0001   
36 | # previous step avoids zero values (aren't any in this case...)
37 | 
38 | head(all.msim)
39 | 
40 | category.labels <- names(all.msim) # define the category variables we're working with
41 | 
42 | write.csv(all.msim, "data/cakeMap/cons.csv", row.names=F)
43 | 
44 | 


--------------------------------------------------------------------------------
/data/CakeMap/process-age.R:
--------------------------------------------------------------------------------
 1 | # Converting the data into a suitable form
 2 | # We need the age to be classified as follows:
 3 | # 16 to 24, 25 to 34, 35 to 44, 45 to 54, 55 to 64, 65 to 74, 75 and over
 4 | # We will also categorise by male and female
 5 | 
 6 | # setwd("cakeMap/") # navigate into cakeMap directory 
 7 | # (try typing 'getwd() or Session > Set Working Directory if this does not work)
 8 | 
 9 | ageNames <- c("m16_24", "m25_34", "m35_44", "m45_54", "m55_64", "m65_74",
10 |   "f16_24", "f25_34", "f35_44", "f45_54", "f55_64", "f65_74") # the output we want
11 | 
12 | age <- read.csv("age-sex-raw.csv")
13 | names(age)
14 | age[1:3,6] # note that the first 2 rows are not needed
15 | rawNames <- age[1,]
16 | age <- age[-c(1,2),]
17 | class(age[,6]) # due to mix of character and numeric data, it's loaded factors
18 | 
19 | age <- read.csv("age-sex-raw.csv", skip=2) # reload data only selecting numbers
20 | head(age[1:7])
21 | class(age[,6]) # now its integer
22 | head(age)
23 | plot(colSums(age[6:ncol(age)]))
24 | 
25 | # first category: males 16 - 24
26 | sel <- seq(6, (24-16) * 2 + 6, by = 2)
27 | rawNames[sel] # double check we have the correct categories
28 | assign(x = ageNames[1], value = rowSums(age[,sel]))
29 | 
30 | # second category: males 25 - 34
31 | selt <- seq(max(sel) + 2, (34 - 25) * 2 + max(sel) + 2, by = 2)
32 | rawNames[selt] # double check we have the correct categories
33 | 
34 | con1 <- data.frame(matrix(nrow = nrow(age), ncol = length(ageNames)))
35 | names(con1) <- ageNames
36 | con1[1] <- rowSums(age[sel])
37 | 
38 | # automating the process
39 | for(i in 2:6){
40 |   sel <- seq(max(sel) + 2, 9 * 2 + max(sel) + 2, by = 2)
41 |   print(rawNames[sel]) # test it works
42 |   con1[i] <- rowSums(age[sel], na.rm=T)
43 | }
44 | 
45 | # first category: females 16 - 24
46 | sel <- seq(7, (24-16) * 2 + 7, by = 2)
47 | rawNames[sel] # double check we have the correct categories
48 | con1[7] <- rowSums(age[sel])
49 | names(con1)
50 | for(i in 2:6){
51 |   sel <- seq(max(sel) + 2, 9 * 2 + max(sel) + 2, by = 2)
52 |   print(rawNames[sel]) # test it works
53 |   con1[i+6] <- rowSums(age[sel], na.rm=T)
54 | }
55 | 
56 | plot(colSums(con1))
57 | write.csv(con1, "con1.csv", row.names = F)
58 | 


--------------------------------------------------------------------------------
/data/CakeMap/process-car.R:
--------------------------------------------------------------------------------
1 | # Script to process car ownership
2 | 
3 | car <- read.csv("cakeMap/cars-raw.csv", skip = 2)
4 | head(car)
5 | 
6 | write.csv(car[6:7], file="cakeMap/con2.csv", row.names=F)
7 | 


--------------------------------------------------------------------------------
/data/CakeMap/process-nssec.R:
--------------------------------------------------------------------------------
 1 | nssecNames <- c("1.1", "1.2", 2:8, "NA") 
 2 | nssec <- read.csv("cakeMap/nssec-raw.csv", skip=1)
 3 | head(nssec[1:6])
 4 | names(nssec)[1:10]
 5 | names(nssec) <- gsub(pattern="Age...Age.16.to.74...NS.SeC..National.Statistics.Socio.economic.Classification....", replacement="",
 6 |   names(nssec))
 7 | names(nssec)
 8 | Other <- rowSums(nssec[56:60])
 9 | plot(colSums(nssec[7:20]))
10 | (sel <- grep("^[0-9]", names(nssec)))
11 | nssec <- nssec[sel]
12 | 
13 | # clean up column names
14 | library(stringr)
15 | names(nssec) <- str_split_fixed(names(nssec), "\\.[A-Z]", 2)[,1]
16 | names(nssec) <- gsub("\\.$", "", names(nssec))
17 | head(nssec)
18 | 
19 | # remove "1" category, add Other
20 | nssec[1] <- NULL
21 | nssec <- cbind(nssec, Other)
22 | head(nssec)
23 | write.csv(nssec, "cakeMap/con3.csv", row.names = F)
24 | 
25 | 


--------------------------------------------------------------------------------
/data/CakeMap/wards.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/data/CakeMap/wards.RData


--------------------------------------------------------------------------------
/data/SimpleWorld/age.csv:
--------------------------------------------------------------------------------
1 | "a0.49","a.50+"
2 | 8,4
3 | 2,8
4 | 7,4
5 | 


--------------------------------------------------------------------------------
/data/SimpleWorld/ind-full.csv:
--------------------------------------------------------------------------------
1 | "id","age","sex","income"
2 | 1,59,"m",2868
3 | 2,54,"m",2474
4 | 3,35,"m",2231
5 | 4,73,"f",3152
6 | 5,49,"f",2473


--------------------------------------------------------------------------------
/data/SimpleWorld/ind.csv:
--------------------------------------------------------------------------------
1 | "id","age","sex"
2 | 1,59,"m"
3 | 2,54,"m"
4 | 3,35,"m"
5 | 4,73,"f"
6 | 5,49,"f"
7 | 


--------------------------------------------------------------------------------
/data/SimpleWorld/sex.csv:
--------------------------------------------------------------------------------
1 | "m","f"
2 | 6,6
3 | 4,6
4 | 3,8
5 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -o errexit -o nounset
 3 | BASE_REPO=$PWD
 4 | 
 5 | update_website() {
 6 |   cd ..; mkdir gh-pages; cd gh-pages
 7 |   git init
 8 |   git config user.name "Robin Lovelace"
 9 |   git config user.email "rob00x@gmail.com"
10 |   git config --global push.default simple
11 |   git remote add upstream "https://$GH_TOKEN@github.com/Robinlovelace/spatial-microsim-book.git"
12 |   git fetch upstream 2>err.txt
13 |   git checkout gh-pages
14 |   
15 |   cp -fvr $BASE_REPO/_book/* .
16 |   git add *.html; git add libs/; git add figures/
17 |   git add _main_files/*; git add *.json
18 |   git commit -a -m "Updating book (${TRAVIS_BUILD_NUMBER})"
19 |   git status
20 |   git push 2>err.txt
21 |   cd ..
22 | }
23 | 
24 | update_website


--------------------------------------------------------------------------------
/elsevier-harvard.csl:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="never" default-locale="en-US">
  3 |   <info>
  4 |     <title>Elsevier Harvard (with titles)</title>
  5 |     <id>http://www.zotero.org/styles/elsevier-harvard</id>
  6 |     <link href="http://www.zotero.org/styles/elsevier-harvard" rel="self"/>
  7 |     <link href="http://www.zotero.org/styles/ecology-letters" rel="template"/>
  8 |     <link href="http://www.elsevier.com/journals/biological-conservation/0006-3207/guide-for-authors#68000" rel="documentation"/>
  9 |     <author>
 10 |       <name>David Kaplan</name>
 11 |       <email>david.kaplan@ird.fr</email>
 12 |     </author>
 13 |     <contributor>
 14 |       <name>Simon Kornblith</name>
 15 |       <email>simon@simonster.com</email>
 16 |     </contributor>
 17 |     <contributor>
 18 |       <name>Bruce D'Arcus</name>
 19 |     </contributor>
 20 |     <contributor>
 21 |       <name>Curtis M. Humphrey</name>
 22 |     </contributor>
 23 |     <contributor>
 24 |       <name>Richard Karnesky</name>
 25 |       <email>karnesky+zotero@gmail.com</email>
 26 |       <uri>http://arc.nucapt.northwestern.edu/Richard_Karnesky</uri>
 27 |     </contributor>
 28 |     <contributor>
 29 |       <name>Sebastian Karcher</name>
 30 |     </contributor>
 31 |     <category citation-format="author-date"/>
 32 |     <category field="biology"/>
 33 |     <category field="generic-base"/>
 34 |     <updated>2014-03-04T00:09:00+00:00</updated>
 35 |     <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
 36 |   </info>
 37 |   <macro name="container">
 38 |     <choose>
 39 |       <if type="chapter paper-conference" match="any">
 40 |         <text term="in" prefix=", " suffix=": "/>
 41 |         <names variable="editor translator" delimiter=", " suffix=", ">
 42 |           <name name-as-sort-order="all" sort-separator=", " initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
 43 |           <label form="short" text-case="capitalize-first" prefix=" (" suffix=")"/>
 44 |         </names>
 45 |         <group delimiter=", ">
 46 |           <text variable="container-title" text-case="title"/>
 47 |           <text variable="collection-title" text-case="title"/>
 48 |         </group>
 49 |       </if>
 50 |       <else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
 51 |         <group prefix=", " delimiter=", ">
 52 |           <text variable="container-title"/>
 53 |           <text variable="collection-title"/>
 54 |         </group>
 55 |       </else-if>
 56 |       <else>
 57 |         <group prefix=". " delimiter=", ">
 58 |           <text variable="container-title" form="short"/>
 59 |           <text variable="collection-title"/>
 60 |         </group>
 61 |       </else>
 62 |     </choose>
 63 |   </macro>
 64 |   <macro name="author">
 65 |     <names variable="author">
 66 |       <name name-as-sort-order="all" sort-separator=", " initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
 67 |       <label form="short" prefix=" (" suffix=")" text-case="capitalize-first"/>
 68 |       <substitute>
 69 |         <names variable="editor"/>
 70 |         <names variable="translator"/>
 71 |         <text macro="title"/>
 72 |       </substitute>
 73 |     </names>
 74 |   </macro>
 75 |   <macro name="author-short">
 76 |     <names variable="author">
 77 |       <name form="short" and="text" delimiter=", " initialize-with=". "/>
 78 |       <substitute>
 79 |         <names variable="editor"/>
 80 |         <names variable="translator"/>
 81 |         <choose>
 82 |           <if type="bill book graphic legal_case legislation motion_picture report song" match="any">
 83 |             <text variable="title" form="short" font-style="italic"/>
 84 |           </if>
 85 |           <else>
 86 |             <text variable="title" form="short" quotes="true"/>
 87 |           </else>
 88 |         </choose>
 89 |       </substitute>
 90 |     </names>
 91 |   </macro>
 92 |   <macro name="access">
 93 |     <choose>
 94 |       <if variable="DOI">
 95 |         <text variable="DOI" prefix="doi:"/>
 96 |       </if>
 97 |       <else-if type="webpage">
 98 |         <group delimiter=" ">
 99 |           <text value="URL"/>
100 |           <text variable="URL"/>
101 |           <group prefix="(" suffix=").">
102 |             <text term="accessed" suffix=" "/>
103 |             <date variable="accessed">
104 |               <date-part name="month" form="numeric" suffix="."/>
105 |               <date-part name="day" suffix="."/>
106 |               <date-part name="year" form="short"/>
107 |             </date>
108 |           </group>
109 |         </group>
110 |       </else-if>
111 |     </choose>
112 |   </macro>
113 |   <macro name="title">
114 |     <choose>
115 |       <if type="report thesis" match="any">
116 |         <text variable="title"/>
117 |         <group prefix=" (" suffix=")" delimiter=" ">
118 |           <text variable="genre"/>
119 |           <text variable="number" prefix="No. "/>
120 |         </group>
121 |       </if>
122 |       <else-if type="bill book graphic legal_case legislation motion_picture report song speech" match="any">
123 |         <text variable="title"/>
124 |         <text macro="edition" prefix=", "/>
125 |       </else-if>
126 |       <else-if type="webpage">
127 |         <text variable="title"/>
128 |         <text value="WWW Document" prefix=" [" suffix="]"/>
129 |       </else-if>
130 |       <else>
131 |         <text variable="title"/>
132 |       </else>
133 |     </choose>
134 |   </macro>
135 |   <macro name="publisher">
136 |     <group delimiter=", ">
137 |       <text variable="publisher"/>
138 |       <text variable="publisher-place"/>
139 |     </group>
140 |   </macro>
141 |   <macro name="event">
142 |     <choose>
143 |       <if variable="event">
144 |         <text term="presented at" text-case="capitalize-first" suffix=" "/>
145 |         <text variable="event"/>
146 |       </if>
147 |     </choose>
148 |   </macro>
149 |   <macro name="issued">
150 |     <choose>
151 |       <if variable="issued">
152 |         <date variable="issued">
153 |           <date-part name="year"/>
154 |         </date>
155 |       </if>
156 |       <else>
157 |         <text term="no date" form="short"/>
158 |       </else>
159 |     </choose>
160 |   </macro>
161 |   <macro name="edition">
162 |     <group delimiter=" ">
163 |       <choose>
164 |         <if is-numeric="edition">
165 |           <number variable="edition" form="ordinal"/>
166 |         </if>
167 |         <else>
168 |           <text variable="edition" suffix="."/>
169 |         </else>
170 |       </choose>
171 |       <text value="ed"/>
172 |     </group>
173 |   </macro>
174 |   <macro name="locators">
175 |     <choose>
176 |       <if type="article-journal article-magazine article-newspaper" match="any">
177 |         <group prefix=" " delimiter=", ">
178 |           <group>
179 |             <text variable="volume"/>
180 |           </group>
181 |           <text variable="page"/>
182 |         </group>
183 |       </if>
184 |       <else-if type="bill book graphic legal_case legislation motion_picture report song thesis" match="any">
185 |         <group delimiter=", " prefix=". ">
186 |           <text macro="event"/>
187 |           <text macro="publisher"/>
188 |         </group>
189 |       </else-if>
190 |       <else-if type="chapter paper-conference" match="any">
191 |         <group delimiter=", " prefix=". ">
192 |           <text macro="event"/>
193 |           <text macro="publisher"/>
194 |           <group>
195 |             <label variable="page" form="short" suffix=" "/>
196 |             <text variable="page"/>
197 |           </group>
198 |         </group>
199 |       </else-if>
200 |       <else-if type="patent">
201 |         <text variable="number" prefix=". "/>
202 |       </else-if>
203 |     </choose>
204 |   </macro>
205 |   <citation et-al-min="3" et-al-use-first="1" disambiguate-add-givenname="true" disambiguate-add-year-suffix="true" collapse="year" cite-group-delimiter=", ">
206 |     <sort>
207 |       <key macro="author"/>
208 |       <key macro="issued" sort="descending"/>
209 |     </sort>
210 |     <layout prefix="(" suffix=")" delimiter="; ">
211 |       <group delimiter=", ">
212 |         <text macro="author-short"/>
213 |         <text macro="issued"/>
214 |         <group delimiter=" ">
215 |           <label variable="locator" form="short"/>
216 |           <text variable="locator"/>
217 |         </group>
218 |       </group>
219 |     </layout>
220 |   </citation>
221 |   <bibliography hanging-indent="true" entry-spacing="0" line-spacing="1">
222 |     <sort>
223 |       <key macro="author"/>
224 |       <key macro="issued" sort="descending"/>
225 |     </sort>
226 |     <layout>
227 |       <group suffix=".">
228 |         <text macro="author" suffix=","/>
229 |         <text macro="issued" prefix=" "/>
230 |         <group prefix=". ">
231 |           <text macro="title"/>
232 |           <text macro="container"/>
233 |           <text macro="locators"/>
234 |         </group>
235 |       </group>
236 |       <text macro="access" prefix=". "/>
237 |     </layout>
238 |   </bibliography>
239 | </style>
240 | 


--------------------------------------------------------------------------------
/figures/Belgium/BadSize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/BadSize.png


--------------------------------------------------------------------------------
/figures/Belgium/CM_ENF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/CM_ENF.png


--------------------------------------------------------------------------------
/figures/Belgium/Couples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/Couples.png


--------------------------------------------------------------------------------
/figures/Belgium/NonAssigne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/NonAssigne.png


--------------------------------------------------------------------------------
/figures/Belgium/diplome.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/diplome.png


--------------------------------------------------------------------------------
/figures/Belgium/diplome_statut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/diplome_statut.png


--------------------------------------------------------------------------------
/figures/Belgium/statut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/statut.png


--------------------------------------------------------------------------------
/figures/CakeMap-lores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/CakeMap-lores.png


--------------------------------------------------------------------------------
/figures/Couple_SE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Couple_SE.png


--------------------------------------------------------------------------------
/figures/HH-CO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/HH-CO.png


--------------------------------------------------------------------------------
/figures/HHCouplesBelgium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/HHCouplesBelgium.png


--------------------------------------------------------------------------------
/figures/HHCouplesNamur.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/HHCouplesNamur.jpg


--------------------------------------------------------------------------------
/figures/IllustrationCouples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/IllustrationCouples.png


--------------------------------------------------------------------------------
/figures/Jojo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Jojo.png


--------------------------------------------------------------------------------
/figures/Jojo_JASS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Jojo_JASS.png


--------------------------------------------------------------------------------
/figures/Jojo_JASS2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Jojo_JASS2.png


--------------------------------------------------------------------------------
/figures/RandomUnif100000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/RandomUnif100000.png


--------------------------------------------------------------------------------
/figures/TAEOptim_GenSA_Mo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TAEOptim_GenSA_Mo.pdf


--------------------------------------------------------------------------------
/figures/TAEOptim_GenSA_Mo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TAEOptim_GenSA_Mo.png


--------------------------------------------------------------------------------
/figures/TRESISModels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TRESISModels.png


--------------------------------------------------------------------------------
/figures/TimeCakeMap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TimeCakeMap.png


--------------------------------------------------------------------------------
/figures/TimeOptim_GenSA_Mo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TimeOptim_GenSA_Mo.pdf


--------------------------------------------------------------------------------
/figures/TimeOptim_GenSA_Mo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TimeOptim_GenSA_Mo.png


--------------------------------------------------------------------------------
/figures/Trafic_Jojo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Trafic_Jojo2.png


--------------------------------------------------------------------------------
/figures/Trafic_jojo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Trafic_jojo.png


--------------------------------------------------------------------------------
/figures/agri-example-hynes-2008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/agri-example-hynes-2008.png


--------------------------------------------------------------------------------
/figures/austerity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/austerity.png


--------------------------------------------------------------------------------
/figures/co-vs-ipf-schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/co-vs-ipf-schema.png


--------------------------------------------------------------------------------
/figures/cover-image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/cover-image.jpg


--------------------------------------------------------------------------------
/figures/fit-obs-sim-simple-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/fit-obs-sim-simple-5.png


--------------------------------------------------------------------------------
/figures/fsimple1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/fsimple1.png


--------------------------------------------------------------------------------
/figures/history01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/history01.png


--------------------------------------------------------------------------------
/figures/incomeCake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/incomeCake.png


--------------------------------------------------------------------------------
/figures/integerisation-algorithms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/integerisation-algorithms.png


--------------------------------------------------------------------------------
/figures/jtg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/jtg.png


--------------------------------------------------------------------------------
/figures/msim-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/msim-flow.png


--------------------------------------------------------------------------------
/figures/msim-schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/msim-schema.png


--------------------------------------------------------------------------------
/figures/nl-chooser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-chooser.png


--------------------------------------------------------------------------------
/figures/nl-graphics-window.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-graphics-window.png


--------------------------------------------------------------------------------
/figures/nl-income-boxplots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-income-boxplots.png


--------------------------------------------------------------------------------
/figures/nl-plots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-plots.png


--------------------------------------------------------------------------------
/figures/nl-simpleworld-negotiating.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-simpleworld-negotiating.png


--------------------------------------------------------------------------------
/figures/nl-simpleworld-populated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-simpleworld-populated.png


--------------------------------------------------------------------------------
/figures/nl-simpleworld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-simpleworld.png


--------------------------------------------------------------------------------
/figures/nl-sliders.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-sliders.png


--------------------------------------------------------------------------------
/figures/nl-ticks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-ticks.png


--------------------------------------------------------------------------------
/figures/nl-zones.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-zones.png


--------------------------------------------------------------------------------
/figures/optim-its.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/optim-its.png


--------------------------------------------------------------------------------
/figures/optim-time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/optim-time.png


--------------------------------------------------------------------------------
/figures/raw-data-screenshot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/raw-data-screenshot.jpeg


--------------------------------------------------------------------------------
/figures/rstudio-autocomplete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/rstudio-autocomplete.png


--------------------------------------------------------------------------------
/figures/rstudio-environment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/rstudio-environment.png


--------------------------------------------------------------------------------
/figures/simPop-results-eg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/simPop-results-eg.png


--------------------------------------------------------------------------------
/figures/simpleworld-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/simpleworld-1.png


--------------------------------------------------------------------------------
/figures/studio-basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/studio-basic.png


--------------------------------------------------------------------------------
/figures/vingtile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/vingtile.png


--------------------------------------------------------------------------------
/figures/why-msim-maup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/why-msim-maup.png


--------------------------------------------------------------------------------
/fractional_weights/BA-MakeCakeSimFractional.R:
--------------------------------------------------------------------------------
  1 | ############################################
  2 | #### From the spatial-microsim-book project  
  3 | #### https://github.com/Robinlovelace/spatial-microsim-book
  4 | ############################################
  5 | 
  6 | # Additions from Ben Anderson (@dataknut)
  7 | # clear out all old objects etc to avoid confusion
  8 | rm(list = ls()) 
  9 | 
 10 | # Loading the data: Ensure R is in the right working directory 
 11 | ind <- read.csv("../data/CakeMap/ind.csv")
 12 | cons <- read.csv("../data/CakeMap/cons.csv")
 13 | 
 14 | # Take a quick look at the data
 15 | head(ind)
 16 | head(cons)
 17 | 
 18 | # load constraints separately - normally this would be first stage
 19 | con1 <- cons[1:12] # load the age/sex constraint
 20 | con2 <- cons[13:14] # load the car/no car constraint
 21 | con3 <- cons[15:24] # socio-economic class
 22 | 
 23 | cat_labs <- names(cons) # category names, from correct from cons.R
 24 | 
 25 | # set-up aggregate values - column for each category
 26 | source("../data/CakeMap/categorise.R") # this script must be customised to input data
 27 | 
 28 | # check constraint totals - should be true
 29 | sum(ind_cat[,1:ncol(con1)]) == nrow(ind) # is the number in each category correct?
 30 | sum(ind_cat[,ncol(con1)+1:ncol(con2)]) == nrow(ind) 
 31 | 
 32 | # create 2D weight matrix (individuals, areas)
 33 | weights <- array(NA, dim=c(nrow(ind),nrow(cons))) 
 34 | 
 35 | # convert survey data into aggregates to compare with census (3D matix)
 36 | ind_agg <- matrix(colSums(ind_cat), nrow(cons), ncol(cons), byrow = T)
 37 | ind_agg[1:5,1:10] # look at what we've created - n. individuals replicated throughout
 38 | 
 39 | ############## The IPF part #############
 40 | # make sure you have this package
 41 | library(ipfp)
 42 | cons <- apply(cons, 2, as.numeric)
 43 | ind_catt <- t(ind_cat)
 44 | # set up initial vector as a load of 1s
 45 | x0 <- rep(1, nrow(ind))
 46 | # you can use x0 as a way to start from the original survey weights
 47 | # as it just has to be a numeric initial vector (length ncol)
 48 | # this might be useful if you have a small number of constraints but
 49 | # if you have many the effect of the IPF will tend to drown them out
 50 | 
 51 | # now loop over the zones and save ipfp results to weights
 52 | for(i in 1:ncol(weights)){
 53 |   weights[,i] <- ipfp(cons[i,], ind_catt, x0, maxit = 20)
 54 | }
 55 | 
 56 | ### Convert back to aggregates for testing
 57 | for (i in 1:nrow(cons)){ # convert con1 weights back into aggregates
 58 |   ind_agg[i,]   <- colSums(ind_cat * weights[,i])
 59 | }
 60 | 
 61 | # test results for first row (not necessary for model)
 62 | # you could iterate over this to test each zone
 63 | ind_agg[1,1:15] - cons[1,1:15] # should be zero for final column - last constraint
 64 | # which should remind us that IPF works to an order - so the last constraint is
 65 | # fitted perfectly. This might matter if you think other constraints should be fitted perfectly...
 66 | 
 67 | # Test correlations between original constraints and new aggregates
 68 | cor(as.numeric(ind_agg), as.numeric(cons)) # fit between contraints and estimate
 69 | # Might be worth then testing zone by zone
 70 | # save the results into corr_by_zone_res
 71 | corr_by_zone_res <- NULL
 72 | for (i in 1:nrow(cons)){ 
 73 |   corr_by_zone_res[i] <- cor(as.numeric(ind_agg[i,]), as.numeric(cons[i,]))
 74 | }
 75 | # look at range of zone by zone correlations
 76 | range(corr_by_zone_res)
 77 | 
 78 | # at this point RL wants to integerise to create a spatial microdataset of whole 'units'
 79 | # But we don't have to - for many applications we may want to keep all the survey units (people or households)
 80 | # with their fractional weights to avoid losing information. It also helps if we're interested in distributional
 81 | # statistics for each area.
 82 | 
 83 | # to do this simply reshape the weights so that each row is 1 individual per zone with weight
 84 | 
 85 | # make weights a dataframe first 
 86 | weights_df <- as.data.frame(weights)
 87 | # reshape it (needs 'stats' package)
 88 | weights_l <- reshape(weights_df, direction = "long", varying = names(weights_df), sep = "")
 89 | # fix the variable names after doing this
 90 | names(weights_l)[names(weights_l) == "time"] <- "zone" # why can't this work directly on the name?!
 91 | names(weights_l)[names(weights_l) == "V"] <- "weight"
 92 | names(weights_l)
 93 | 
 94 | # now do the internal join to match the indivudal level data to the long form file
 95 | # create id variable in ind (assumes final order = same!)
 96 | ind$id <- 1:nrow(ind)
 97 | # make sure you have this package
 98 | library(dplyr)
 99 | final_micro <- inner_join(weights_l,ind, by = "id")
100 | 
101 | # check
102 | names(final_micro)
103 | nrow(final_micro)
104 | # notice how many fewer rows there are than in the original CakeMap.R version 
105 | # - we have not needed to duplicate individuals as we are keeping the fractional weights
106 | 
107 | # now let's add the geonames
108 | # get geo names
109 | geonames <- read.csv("../data/CakeMap/cars-raw.csv")
110 | geonames_df <- as.data.frame(geonames[3:126,2])
111 | # create a zoneid
112 | geonames_df$zone <- 1:nrow(geonames_df)
113 | names(geonames_df)[1] <- "zone_name"
114 | 
115 | final_micro_geo <- inner_join(geonames_df,final_micro, by = "zone")
116 | 
117 | # so now we have our final long form synthetic fractional weights microdata table
118 | # with n * z rows where n = nrow(ind) and z = nrow(cons).
119 | # This is in contrast to the integerised version where we would have the 
120 | # sum of npop(zi) where npop(zi) is the population for each zone
121 | # This would be a much larger file...
122 | 
123 | # Test the results!!
124 | 
125 | # change nssec8 to numeric
126 | final_micro_geo$NSSEC8n <- as.numeric(final_micro_geo$NSSEC8)
127 | summary(final_micro_geo$NSSEC8n)
128 | 
129 | # careful, 97 = unset
130 | final_micro_geo$NSSEC8n[final_micro_geo$NSSEC8n > 10] <- NA
131 | summary(final_micro_geo$NSSEC8n)
132 | # use na.rm to ignore them
133 | # overall mean
134 | mean(final_micro_geo$NSSEC8n, na.rm = TRUE)
135 | # weighted mean - to show the difference
136 | weighted.mean(final_micro_geo$NSSEC8n, w = final_micro_geo$weight, na.rm = TRUE)
137 | 
138 | # mean by zone - this fails claiming x and w are different lengths, why?
139 | aggregate(final_micro_geo$NSSEC8n, by= list(final_micro_geo$zone), FUN = weighted.mean, w = final_micro_geo$weight, na.rm = TRUE)
140 | 
141 | # so for now, let's save the file out and do the stats in STATA!!
142 | write.csv(final_micro_geo, file = "final_micro_fractional_cakes_geo.csv", na = ".")
143 | 
144 | # now read the summary of cakes by zone (created in STATA) back in
145 | cakes_by_zone <- read.csv("cakes_geo.csv")
146 | 
147 | # and do the R mapping thing...
148 | # to do


--------------------------------------------------------------------------------
/fractional_weights/BA-process-final_micro_fractional_cakes_geo.do:
--------------------------------------------------------------------------------
 1 | * script to read in results of R spatial microsim using ipf and run weighted stats
 2 | * in theory this should also be done in R when I work out how!
 3 | 
 4 | * change this to your path!
 5 | local where "/Users/ben/Documents/Work/Papers and Conferences/spatial-microsim-r-course"
 6 | local path "`where'/spatial-microsim-book-git/fractional_weights" 
 7 | 
 8 | insheet using "`path'/final_micro_fractional_cakes_geo.csv", clear
 9 | 
10 | * create 'dummy' variables ready to collapse to weighted frequencies
11 | gen NCakes_rare = 0
12 | replace NCakes_rare = 1 if ncakes == "rarely"
13 | 
14 | gen NCakes_l1 = 0
15 | replace NCakes_l1 = 1 if ncakes == "<1"
16 | 
17 | gen NCakes_1_2 = 0
18 | replace NCakes_1_2 = 1 if ncakes == "1-2"
19 | 
20 | gen NCakes_3_5 = 0
21 | replace NCakes_3_5 = 1 if ncakes == "3-5"
22 | 
23 | gen NCakes_6m = 0
24 | replace NCakes_6m = 1 if ncakes == "6+"
25 | 
26 | * keep the data in memory
27 | * immediately obvious = stata's inability to hold 2 or more datasets at a time!
28 | preserve
29 | * collapse cakes by zone
30 | * this is what the weighted agregate in R should do
31 | collapse (sum) NC* [iw=weight], by(zone_name)
32 | 
33 | * list first 5 lines as a check
34 | li in 1/5
35 | 
36 | * save the results so we can add to a map
37 | outsheet using "`path'/cakes_geo.csv", comma replace
38 | 
39 | * put the data back so we can do other stuff
40 | restore
41 | 


--------------------------------------------------------------------------------
/fractional_weights/README.md:
--------------------------------------------------------------------------------
 1 | spatial-microsim-book
 2 | =====================
 3 | 
 4 | This is a variant on the CakeMap.R code which does not integerise the weights in order to select whole units but keeps the fractional weights and creates a long form data table with these weights attached to the individual cases.
 5 | 
 6 | As I have not (yet) worked out how to do a weighted 'aggregate' in R the script then outputs the table as a .csv file & I use the included STATA script to calculate the number of people in each cake category in each zone. This in turn outputs this result as a .csv file to be read back into R for mapping etc (to do!)
 7 | 
 8 | Comments welcome: dataknut@icloud.com
 9 | 
10 | 


--------------------------------------------------------------------------------
/fractional_weights/cakes_geo.csv:
--------------------------------------------------------------------------------
  1 | zone_name,NCakes_rare,NCakes_l1,NCakes_1_2,NCakes_3_5,NCakes_6m
  2 | "E05001341",788.7496,1083.087,3243.491,3429.721,2799.951
  3 | "E05001342",1027.168,1281.698,3895.244,4122.652,3094.239
  4 | "E05001343",926.5318,1259.879,3818.923,3970.354,3155.313
  5 | "E05001344",1015.429,1121.801,3473.085,3503.431,2353.254
  6 | "E05001345",1507.246,1394.03,4242.255,4123.901,2343.568
  7 | "E05001346",1380.517,1343.121,4190.762,4384.934,2214.666
  8 | "E05001347",2030.488,1468.18,4808.129,7922.088,1681.115
  9 | "E05001348",1040.052,1174.773,3562.537,3456.14,2339.497
 10 | "E05001349",816.2757,1131.446,3404.589,3583.401,3008.289
 11 | "E05001350",1218.7,1283.018,3732.672,3711.147,2595.464
 12 | "E05001351",1149.97,1176.715,3748.55,3618.266,2198.5
 13 | "E05001352",934.4416,1072.336,3559.614,3634.233,2053.375
 14 | "E05001353",918.9893,1186.807,3493.203,3713.727,2820.274
 15 | "E05001354",677.4254,946.1523,2857.008,3029.763,2408.652
 16 | "E05001355",1196.71,1153.158,3749.179,3666.576,2222.376
 17 | "E05001356",995.5418,1169.456,3581.889,3590.43,2678.683
 18 | "E05001357",1153.808,1217.382,3498.346,3426.038,2480.425
 19 | "E05001358",1552.208,1416.026,4306.451,4253.147,2297.168
 20 | "E05001359",1453.772,1287.276,4104.586,4190.749,2142.617
 21 | "E05001360",933.625,1183.684,3518.908,3702.542,2699.241
 22 | "E05001361",1152.941,1250.887,3623.927,3576.16,2530.086
 23 | "E05001362",1009.628,1127.131,3388.394,3515.785,2425.061
 24 | "E05001363",1074.221,1231.575,3692.031,3627.587,2620.585
 25 | "E05001364",1154.604,1224.759,4046.874,4454.391,2258.372
 26 | "E05001365",1541.816,1471.868,4326.89,4129.095,2733.33
 27 | "E05001366",520.4231,795.0496,2368.377,2542.555,2079.596
 28 | "E05001367",996.3626,1060.91,3113.027,3039.665,2172.035
 29 | "E05001368",1095.407,1205.545,3487.233,3506.009,2526.805
 30 | "E05001369",772.9461,1044.935,3116.72,3253.079,2658.319
 31 | "E05001370",957.0417,1096.865,3265.335,3303.233,2495.525
 32 | "E05001371",727.1841,822.8037,2390.928,2451.593,1899.492
 33 | "E05001372",726.9758,899.3874,2644.114,2576.089,2060.434
 34 | "E05001373",754.8888,855.8646,2503.203,2535.555,1924.489
 35 | "E05001374",609.1226,823.1774,2459.433,2564.737,2039.53
 36 | "E05001375",596.7382,796.1556,2374.542,2504.5,2023.064
 37 | "E05001376",877.9653,949.258,2757.127,2693.096,1988.554
 38 | "E05001377",599.7622,766.4562,2313.686,2342.798,1861.298
 39 | "E05001378",613.9335,826.2873,2452.079,2595.25,2111.45
 40 | "E05001379",935.9994,931.5789,2672.054,2592.041,1793.327
 41 | "E05001380",1067.006,1008.174,3221.914,3195.288,1721.617
 42 | "E05001381",698.3071,829.3501,2410.238,2472.328,1946.777
 43 | "E05001382",614.3989,818.204,2450.78,2530.658,2008.959
 44 | "E05001383",697.1795,873.6677,2687.298,2830.359,2036.496
 45 | "E05001384",796.624,878.3806,2552.94,2635.454,1963.601
 46 | "E05001385",793.0138,904.8457,2651.938,2598.89,1993.312
 47 | "E05001386",951.2349,914.8583,2627.314,2648.848,1917.744
 48 | "E05001387",773.8481,866.4239,2580.685,2541.224,1875.819
 49 | "E05001389",1394.413,1398.818,4296.884,4111.389,2713.496
 50 | "E05001390",1247.139,1275.695,4178.287,4087.808,2647.071
 51 | "E05001391",1193.686,1314.861,4133.524,4100.956,2884.973
 52 | "E05001392",960.0002,1168.988,3475.47,3581.839,2781.703
 53 | "E05001393",1002.853,1210.083,3552.11,3700.761,2908.193
 54 | "E05001396",1272.262,1291.213,3902.906,3857.708,2608.912
 55 | "E05001397",813.4417,1146.423,3412.729,3634.575,3083.831
 56 | "E05001398",1295.28,1344.619,4062.289,4091.768,2941.043
 57 | "E05001399",1171.867,1242.268,4124.636,4039.748,2584.48
 58 | "E05001400",1317.53,1278.765,4118.64,4073.498,2508.567
 59 | "E05001401",1149.252,1331.316,3937.425,4073.791,2977.216
 60 | "E05001402",1489.53,1350.304,4531.379,4793.342,2444.445
 61 | "E05001403",1028.351,1187.217,3686.912,3721.417,2682.103
 62 | "E05001405",933.0265,1335.367,4017.342,4198.878,3454.386
 63 | "E05001407",1118.539,1365.281,4105.374,4207.638,3137.168
 64 | "E05001408",1157.592,1412.459,4103.268,4253.207,3314.474
 65 | "E05001409",1096.099,1379.314,4121.874,4285.684,3448.028
 66 | "E05001410",1685.685,1369.134,4729.579,5844.794,1991.809
 67 | "E05001411",944.7602,1309.646,4059.667,4248.011,3393.915
 68 | "E05001412",1182,1535.306,4872.22,5053.163,3637.31
 69 | "E05001413",1189.969,1611.405,4798.66,5072.652,3861.313
 70 | "E05001414",2081.399,1912.993,5635.011,5782.487,3739.109
 71 | "E05001415",1847.153,1664.445,4807.342,4662.09,3016.97
 72 | "E05001416",1484.9,1613.027,4606.061,4681.025,3258.986
 73 | "E05001417",2361.826,1946.151,5593.217,5256.458,2920.347
 74 | "E05001418",1200.609,1592.462,4818.543,5161.541,3903.844
 75 | "E05001419",1846.276,1750.126,5303.719,5703.543,3050.336
 76 | "E05001420",2877.968,2201.887,7107.304,13815.4,2597.437
 77 | "E05001421",1427.939,1629.767,4631.379,4708.701,3530.215
 78 | "E05001422",1642.543,1801.259,5171.37,5302.86,3802.968
 79 | "E05001423",1076.644,1419.961,4119.665,4357.1,3486.629
 80 | "E05001424",2237.867,2005.265,5627.167,5365.581,2919.119
 81 | "E05001425",1105.931,1564.565,4645.131,5024.538,3931.835
 82 | "E05001426",765.8055,1202.659,3669.349,3952.576,3378.611
 83 | "E05001427",1097.612,648.4695,2751.216,14550.66,445.0432
 84 | "E05001428",1089.845,1467.317,4583.197,5055.402,3525.24
 85 | "E05001429",2558.201,1153.403,4643.45,13661.53,983.421
 86 | "E05001430",1848.058,1804.51,5024.786,4767.17,3201.475
 87 | "E05001431",1162.698,1501.475,4350.934,4631.97,3712.923
 88 | "E05001432",1645.859,1536.871,4944.128,7054.947,2258.196
 89 | "E05001433",1999.059,1985.569,5578.113,5434.126,3653.133
 90 | "E05001434",1244.736,1578.177,4960.665,5260.906,3586.516
 91 | "E05001435",1290.863,1641.593,4818.303,5115.008,4023.233
 92 | "E05001436",1407.143,1639.443,4671.05,4937.873,3666.492
 93 | "E05001437",1250.437,1586.9,4588.601,4764.63,3830.432
 94 | "E05001438",1380.138,1643.632,4693.117,4864.599,3696.514
 95 | "E05001439",1162.646,1468.913,4239.851,4441.493,3474.097
 96 | "E05001440",1194.214,1508.015,4809.567,5029.8,3416.404
 97 | "E05001441",1398.875,1552.783,4577.783,4594.58,3381.979
 98 | "E05001442",1325.444,1443.412,4939.498,7159.809,2469.836
 99 | "E05001443",1013.459,1343.515,4158.765,4491.534,3670.727
100 | "E05001444",926.3325,1122.747,3437.887,3512.31,2894.723
101 | "E05001445",1073.639,1079.739,3128.049,3009.876,2318.697
102 | "E05001446",1061.056,1186.51,3474.851,3606.515,2801.068
103 | "E05001447",1077.493,1174.029,3325.919,3456.62,2698.938
104 | "E05001448",885.837,1089.883,3243.205,3334.574,2701.501
105 | "E05001449",1099.474,1174.684,3417.173,3358.599,2694.071
106 | "E05001450",1090.877,1112.836,3352.628,3250.854,2510.805
107 | "E05001451",880.2195,1086.064,3183.007,3290.655,2662.054
108 | "E05001452",982.6305,1025.696,2903.444,2954.476,2290.754
109 | "E05001453",1119.731,1202.496,3518.786,3523.439,2685.548
110 | "E05001454",958.7019,1173.183,3381.786,3510.419,2840.91
111 | "E05001455",1049.057,1195.917,3364.1,3460.112,2708.814
112 | "E05001456",929.9057,1077.278,3123.703,3133.324,2536.788
113 | "E05001457",1268.862,1241.734,3795.465,3684.272,2858.667
114 | "E05001458",884.1049,1119.493,3333.133,3506.355,2760.913
115 | "E05001459",1227.668,1137.13,3502.168,3360.99,2299.044
116 | "E05001460",1259.151,1160.479,3542.685,3503.871,2548.814
117 | "E05001461",890.6943,1240.08,3709.46,3858.709,3218.056
118 | "E05001462",843.2431,943.4784,2903.69,2935.147,2221.441
119 | "E05001463",1116.767,1131.625,3318.236,3164.9,2293.471
120 | "E05001464",828.2547,1055.243,3101.471,3193.602,2590.429
121 | "E05008558",1114.753,1319.397,3994.055,4080.792,2901.003
122 | "E05008559",1013.427,1240.989,3726.994,3875.042,3076.547
123 | "E05008560",1315.003,1291.036,4159.125,3941.103,2544.733
124 | "E05008561",894.7399,1174.829,3489.584,3636.375,2909.472
125 | "E05008562",745.7589,1148.107,3708.319,4179.33,2579.484
126 | 


--------------------------------------------------------------------------------
/frontmatter/pream.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[krantz1,ChapterTOCs]{krantz} 
  2 | 
  3 | % settings from RStudio
  4 | \usepackage[T1]{fontenc}
  5 | \usepackage{lmodern}
  6 | \usepackage{amssymb,amsmath}
  7 | \usepackage{ifxetex,ifluatex}
  8 | \usepackage{fixltx2e} % provides \textsubscript
  9 | % use upquote if available, for straight quotes in verbatim environments
 10 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
 11 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
 12 |   \usepackage[utf8]{inputenc}
 13 | \else % if luatex or xelatex
 14 |   \ifxetex
 15 |     \usepackage{mathspec}
 16 |     \usepackage{xltxtra,xunicode}
 17 |   \else
 18 |     \usepackage{fontspec}
 19 |   \fi
 20 |   \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
 21 |   \newcommand{\euro}{€}
 22 | \fi
 23 | % use microtype if available
 24 | \IfFileExists{microtype.sty}{\usepackage{microtype}}{}
 25 | \usepackage{color}
 26 | \usepackage{fancyvrb}
 27 | \newcommand{\VerbBar}{|}
 28 | \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
 29 | \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
 30 | % Add ',fontsize=\small' for more characters per line
 31 | \usepackage{framed}
 32 | \definecolor{shadecolor}{RGB}{248,248,248}
 33 | \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
 34 | \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{{#1}}}}
 35 | \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{{#1}}}
 36 | \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{{#1}}}
 37 | \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{{#1}}}
 38 | \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{{#1}}}
 39 | \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{{#1}}}
 40 | \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{{#1}}}
 41 | \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{{#1}}}}
 42 | \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{{#1}}}
 43 | \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{{#1}}}
 44 | \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{{#1}}}
 45 | \newcommand{\RegionMarkerTok}[1]{{#1}}
 46 | \newcommand{\ErrorTok}[1]{\textbf{{#1}}}
 47 | \newcommand{\NormalTok}[1]{{#1}}
 48 | \usepackage{longtable,booktabs}
 49 | \usepackage{graphicx}
 50 | % Redefine \includegraphics so that, unless explicit options are
 51 | % given, the image width will not exceed the width of the page.
 52 | % Images get their normal width if they fit onto the page, but
 53 | % are scaled down if they would overflow the margins.
 54 | \makeatletter
 55 | \def\ScaleIfNeeded{%
 56 |   \ifdim\Gin@nat@width>\linewidth
 57 |     \linewidth
 58 |   \else
 59 |     \Gin@nat@width
 60 |   \fi
 61 | }
 62 | \makeatother
 63 | \let\Oldincludegraphics\includegraphics
 64 | {%
 65 |  \catcode`\@=11\relax%
 66 |  \gdef\includegraphics{\@ifnextchar[{\Oldincludegraphics}{\Oldincludegraphics[width=\ScaleIfNeeded]}}%
 67 | }%
 68 | 
 69 | \setlength{\parindent}{0pt}
 70 | \setlength{\parskip}{6pt plus 2pt minus 1pt}
 71 | \setlength{\emergencystretch}{3em}  % prevent overfull lines
 72 | 
 73 | % % Hadley's hacks
 74 | \usepackage[hyphens]{url}
 75 | \usepackage[setpagesize=false, % page size defined by xetex
 76 | unicode=false, % unicode breaks when used with xetex
 77 | % xetex,
 78 | hidelinks]{hyperref}
 79 | % Place links as footnotes
 80 | \renewcommand{\href}[2]{#2 (\url{#1})}
 81 | % Use ref for internal links
 82 | \renewcommand{\hyperref}[2][???]{\autoref{#1}}
 83 | \def\chapterautorefname{Chapter}
 84 | \def\sectionautorefname{Section}
 85 | \def\subsectionautorefname{Section}
 86 | \def\subsubsectionautorefname{Section}
 87 | 
 88 | % Krantz example
 89 | 
 90 | \usepackage{fixltx2e,fix-cm}
 91 | \usepackage{amssymb}
 92 | \usepackage{amsmath}
 93 | \usepackage{graphicx}
 94 | \usepackage{subfigure}
 95 | \usepackage{makeidx}
 96 | \usepackage{multicol}
 97 | \usepackage{cleveref}
 98 | 
 99 | \frenchspacing
100 | \tolerance=5000
101 | 
102 | \makeindex
103 | 
104 | \include{frontmatter/preamble} %place custom commands and macros here
105 | 
106 | \begin{document}
107 | 
108 | \frontmatter
109 | 
110 | \title{Spatial Microsimulation with R} %This is a placeholder titlepage,
111 | \author{Robin Lovelace and Morgane Dumont}
112 | \maketitle
113 | 
114 | % \include{frontmatter/dedication}
115 | \cleardoublepage
116 | \setcounter{page}{7} %previous pages will be reserved for frontmatter to be added in later.
117 | \tableofcontents
118 | % \include{frontmatter/foreword}
119 | \include{frontmatter/preface}
120 | \listoffigures
121 | \listoftables
122 | % \include{frontmatter/contributor}
123 | % \include{frontmatter/symbollist}
124 | 
125 | \mainmatter
126 | 


--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
 1 | --- 
 2 | title: 'Spatial Microsimulation with R'
 3 | author: 'Robin Lovelace and Morgane Dumont'
 4 | date: '`r Sys.Date()`'
 5 | site: "bookdown::bookdown_site"
 6 | # rmd_files: ["index.Rmd", "01-introduction.Rmd"]
 7 | output:
 8 |   bookdown::gitbook: default
 9 | documentclass: book
10 | link-citations: yes
11 | biblio-style: apalike
12 | github-repo: Robinlovelace/spatial-microsim-book
13 | url: 'https\://spatial-microsim-book.robinlovelace.net'
14 | twitter-handle: robinlovelace
15 | cover-image: figures/cover-image.jpg
16 | description: "Learn what how to model systems at individual to areal levels and discover how to do spatial microsimulation at in a reproducible manner using high performance, open source software."
17 | bibliography: bibliography.bib
18 | ---
19 |   
20 | # Welcome {-}
21 | 
22 | Welcome to the online home *Spatial Microsimulation with R*.
23 | 
24 | This is a book by [Robin Lovelace](http://robinlovelace.net/) and [Morgane Dumont](https://directory.unamur.be/staff/modumont) (with chapter [10](http://spatial-microsim-book.robinlovelace.net/ha.html) contributed by [Johan Barthélemy](https://smart.uow.edu.au/people/UOW192467.html), chapter [11](http://spatial-microsim-book.robinlovelace.net/tresis.html) contributed by [Richard Ellison](http://sydney.edu.au/business/staff/richard.ellison) and [David Hensher](http://sydney.edu.au/business/staff/david.hensher)  and chapter [12](http://spatial-microsim-book.robinlovelace.net/abm) contributed by [Maja Založnik](https://www.oxfordmartin.ox.ac.uk/people/565)).
25 | 
26 | [![](https://images.tandf.co.uk/common/jackets/amazon/978149871/9781498711548.jpg)](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548)
27 | 
28 | It is published by CRC Press. See their [online store](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548) if you'd like to buy a copy.
29 | If you'd like to crack on and use the content, feel free to browse the chapters via the drop-down menu on the left.
30 | Depending on your interests and level of experience we particularly recommend:
31 | 
32 | - Chapter [1](http://spatial-microsim-book.robinlovelace.net/intro.html): what is spatial microsimulation and what is it good for?
33 | - Chapter [2](http://spatial-microsim-book.robinlovelace.net/simpleworld): if you'd like to see a very simple worked-example of the process in action.
34 | - Chapter [11](http://spatial-microsim-book.robinlovelace.net/tresis.html): if you're interested in more advanced applications applicable to transport modelling
35 | - Chapter [12](http://spatial-microsim-book.robinlovelace.net/abm): for the links between spatial microsimulation and agent-based modelling.
36 | 
37 | Chapters [3](http://spatial-microsim-book.robinlovelace.net/what-is.html) to [9](http://spatial-microsim-book.robinlovelace.net/nomicrodata.html) explain, with reference to reproducible code 'chunks' embedded in the text, how to generate spatial microdata, with or without a sample population.
38 | Chapter [10](http://spatial-microsim-book.robinlovelace.net/ha.html) demontrates how to add household-level variables.
39 | 
40 | Furthermore, there are add-on chapters for beginners to R or the discipline.
41 | If you're completely new to R and programming in general, check-out the [appendix](http://spatial-microsim-book.robinlovelace.net/apr), which will get you up-to-speed quickly.
42 | There's also a [glossary](http://spatial-microsim-book.robinlovelace.net/glossary.html) that explains some of the jargon used in this field of research.
43 | 
44 | We've put *Spatial Microsimulation with R* on-line because we want to reduce barriers to learning.
45 | We've made it open source via a [GitHub repository](https://github.com/Robinlovelace/spatial-microsim-book) because we believe in reproducibility and collaboration.
46 | Comments and suggests are most welcome [there](https://github.com/Robinlovelace/spatial-microsim-book/issues).
47 | If the content of the book helps your research, please cite it ([Lovelace and Dumont, 2016](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/sms-book-citation.bib)).
48 | 
49 | ## Reference {-}
50 | 
51 | Lovelace, R., Dumont, M., 2016. Spatial microsimulation with R. CRC Press.
52 | 


--------------------------------------------------------------------------------
/notes/BA-notes.md:
--------------------------------------------------------------------------------
 1 | spatial-microsim-book/course
 2 | =====================
 3 | 
 4 | Notes from:
 5 | * An Introduction to Spatial Microsimulation using R
 6 | * Dr Robin Lovelace and colleagues
 7 | * Date: 18/09/2014 - 19/09/2014
 8 | * Venue: Room S1,S044-01-0034, First Floor, Alison Richard Building, Sidgwick Site, University of Cambridge, Cambridge
 9 |  * http://www.ncrm.ac.uk/training/show.php?article=5088
10 |  * Recommended pre-reading: http://eprints.ncrm.ac.uk/3348/
11 |  * See also https://github.com/Robinlovelace/spatial-microsim-book 
12 | 
13 | General course notes
14 | * Recent experience of speeding up R code a lot using a new package  - ipfp - http://cran.r-project.org/web/packages/ipfp/index.html
15 |  * this package is very fast, you can specify iterations & startng weights (if you wish) but not, seemingly, a stopping rule according to convergence (although investigate the tol = parameter as this seems to be the sum of squares of the difference between the original constraint vector and the current fitted constraint vector at iterstion i (so essentialy TAE), so could be used to control iterations)
16 | * Suggests looking at Flexible Modelling Framework (Harland) - alternative methods written in java
17 | * github vs BitBucket - latter can have private repositories
18 |  * use .gitignore to stop uploading particular files e.g. data/ or .dta
19 | * Big shout for "Spatial Microsimulation: A Reference Guide for Users" http://www.springer.com/social+sciences/population+studies/book/978-94-007-4622-0
20 | * suggests looking at Advanced Spatial Data Analysis in R (Bivand et al) http://www.springer.com/statistics/life+sciences,+medicine+%26+health/book/978-1-4614-7617-7
21 | * Parallel processing
22 |  * not worth using if datasets very small and you have a small number of cores as management overhead high
23 | * Assumptions
24 |  * individual data is representative
25 |  * target vars of interest -> some function of constraints (might not be)
26 |  * correlation between contraints & target vars is constant over space
27 |  * relationship between constraint distributions/tables is same at local (constraints) & national (individual) levels
28 | 
29 | Other notes & conversations
30 | * Spatial MSM is one option for small area estimation see: "Evaluations and improvements in small area estimation methodologies" http://eprints.ncrm.ac.uk/3210/
31 | * Belgian models - contact = Gijs Dekkers (https://www.linkedin.com/profile/view?id=164500929) chief editor of the International Journal of Microsimulation (http://www.microsimulation.org/IJM/IJM_editorial_board.htm). 
32 |  * open-source toolbox LIAM2 (http://liam2.plan.be), designed for the development of dynamic microsimulation models.
33 | * Sweden - whole popn model http://www.researchgate.net/publication/253561368_The_SVERIGE_Spatial_Microsimulation_Model 
34 | * Papers on IPF:
35 |  * Simpson, L., & Tranmer, M. (2005). Combining sample and census data in small area estimates: Iterative Proportional Fitting with standard software. The Professional Geographer, 57(2), 222–234.
36 |  * Wong, D. (1992). The Reliability of Using the Iterative Proportional Fitting Procedure∗. The Professional Geographer. Retrieved from http://www.tandfonline.com/doi/abs/10.1111/j.0033-0124.1992.00340.x
37 |  * Norman, P. (1999). Putting iterative proportional fitting on the researcher’s desk. Retrieved from http://eprints.whiterose.ac.uk/5029
38 | 


--------------------------------------------------------------------------------
/notes/L1.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Spatial Microsimulation with R: Lecture 1"
  3 | author: "Robin Lovelace"
  4 | date: "09/17/2014"
  5 | output: ioslides_presentation
  6 | ---
  7 | 
  8 | ## Spatial Microsimulation with R
  9 | 
 10 | Aims:
 11 | 
 12 | 1. To provide a solid understanding of the method and applications
 13 | 2. To teach its implementation in R in general terms
 14 | 3. To provide guidance on next steps
 15 | 
 16 | ## Introduction
 17 | 
 18 | - Housekeeping
 19 | - About the course and its teachers
 20 | - Lectures and practicals
 21 | - Getting help
 22 | 
 23 | ## This morning's agenda
 24 | 
 25 | **9:30 - 11:00**
 26 | 
 27 | - Lecture: what is spatial microsimulation?
 28 | - Getting used to working with RStudio (and GitHub)
 29 | - Demonstration of what we'll be working on 
 30 | - Loading the input data (Chapter 3)
 31 |   
 32 | *Refreshments: 11 - 11:15*
 33 | 
 34 | **11:15 - 1:00**
 35 | 
 36 | - Working through Chapter 3 and 4
 37 | - Performance
 38 | - (Parallel processing in R)
 39 | 
 40 | ## This afternoon
 41 | 
 42 | **1:30 - 2:45**
 43 | 
 44 | - Finishing up and questions about SimpleWorld
 45 | - Lecture: Introduction to spatial microsimulation in the wild
 46 | - Cleaning messy input data for spatial microsimulation (Chapter 5)
 47 | 
 48 | **3 - 4:30**
 49 | 
 50 | - Performing IPF on CakeMap Data (5.2)
 51 | - Description and demonstration of integerisation (5.3)
 52 | - Re-cap and questions on key concepts
 53 | 
 54 | ## Tomorrow
 55 | 
 56 | **9:30 - 11**
 57 | 
 58 | - Demonstration analysis of CakeMap data
 59 | - Model checking and validation
 60 | 
 61 | **11:15 - 1:30**
 62 | 
 63 | - Visualisations
 64 | - Lecture: next steps
 65 | - Applying the methods to your data
 66 | 
 67 | 
 68 | ## The course materials
 69 | 
 70 | - Major update of course materials from May
 71 | - New improved code is much faster
 72 | - And easier to write
 73 | - Booklet -> Book
 74 | 
 75 | ## What is spatial microsimulation?
 76 | 
 77 | 1. A method
 78 | 2. An approach
 79 | 
 80 | ## Applications
 81 | 
 82 | - Wide variety of potential applications
 83 | - So far main applications have been in health, poverty mapping and transport
 84 | - What do you want to use spatial microsimulation for?
 85 | - Tomintz et al. (2008). The geography of smoking in Leeds: estimating individual smoking rates and the implications for the location of stop smoking services. Area, 40(3), 341–353. 
 86 | - Gleeson (2014)
 87 | - My research
 88 | 
 89 | 
 90 | ## R
 91 | 
 92 | - Powerful *command-line interface*
 93 | - Fast - if you know how
 94 | - Steep learning curve but lots of help available
 95 | 
 96 | ## A demonstration of R and RStudio
 97 | 
 98 | - Creating, modifying and subsetting datasets
 99 | - Functions
100 | - Features of RStudio
101 | 
102 | # Demonstration of GitHub
103 | 
104 | # Working through Chapter 3
105 | 
106 | ## Day 2
107 | 
108 | - Parallel processing in R for fast/Big applications
109 | - Exploring the results of spatial microsimulation
110 | 
111 | Refreshments: 11:00
112 | 
113 | - Discussion of limitations and underlying assumptions of spatial microsimulation
114 | - Applying the methods to your own data
115 | 
116 | # Parallel processing in R for fast/big microsimulation
117 | 
118 | # Exploring the results of spatial microsimulation
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/notes/mipfp-notes.R:
--------------------------------------------------------------------------------
 1 | # mipfp to do spatial microsimulation without input data
 2 | 
 3 | global = read.delim("data/Belgium/BelgiqueConting.txt")
 4 | in_age = read.delim("data/Belgium/ContrainteAge.txt")
 5 | in_dip = read.delim("data/Belgium/ContrainteDipl.txt")
 6 | in_sta = read.delim("data/Belgium/ContrainteStatut.txt")
 7 | in_sex = read.delim("data/Belgium/ContrainteGenre.txt")
 8 | 
 9 | # for one zone
10 | global_cons = xtabs(Freq ~ gener + dipl + statut + sex, data = global)
11 | 
12 | i = 1 # zone number
13 | uz = unique(in_age$com)
14 | z = uz[i]
15 | z = "92094"
16 | # data preparation
17 | age = in_age$COUNT[in_age$com == z]
18 | edu = in_dip$COUNT[in_dip$com == z]
19 | ocu = in_sta$COUNT[in_sta$com == z]
20 | sex = in_sex$COUNT[in_sex$com == z]
21 | 
22 | target = list(age, edu, ocu, sex)
23 | descript = list(1, 2, 3, 4)
24 | 
25 | res = mipfp::Ipfp(global_cons, descript, target)
26 | identical(dimnames(res$x.hat), dimnames(global_cons))
27 | expa = as.data.frame.table(res$x.hat)
28 | 
29 | # Integerisation, see here for code:
30 | # https://github.com/Robinlovelace/spatial-microsim-book/blob/master/R/functions.R
31 | source("code/functions.R") # loads functions into memory
32 | expa$int = int_trs(expa$Freq)
33 | exp_indices = int_expand_vector(expa$int)
34 | synth = expa[exp_indices,]
35 | 
36 | # for many zones
37 | list_output = vector(mode = "list", length = length(uz))
38 | for(i in 1:length(uz)) {
39 |   z = uz[i]
40 |   # data preparation
41 |   age = in_age$COUNT[in_age$com == z]
42 |   edu = in_dip$COUNT[in_dip$com == z]
43 |   ocu = in_sta$COUNT[in_sta$com == z]
44 |   sex = in_sex$COUNT[in_sex$com == z]
45 |   target = list(age, edu, ocu, sex)
46 |   res = mipfp::Ipfp(global_cons, descript, target)
47 |   expa = as.data.frame.table(res$x.hat)
48 |   expa$int = rakeR::integerise(expa$Freq)[,1]
49 |   exp_indices = int_expand_vector(expa$int)
50 |   list_output[[i]] = expa[exp_indices,]
51 | }
52 | 
53 | synth_namur = dplyr::bind_rows(list_output, .id = "id")
54 | library(dplyr)
55 | pmale = group_by(synth_namur, id) %>% 
56 |   summarise(pmale = sum(sex == "Hommes") /
57 |               n())
58 | 


--------------------------------------------------------------------------------
/notes/seville-notes.R:
--------------------------------------------------------------------------------
  1 | # Welcome to the course's R notes
  2 | # All course material found/linked to:
  3 | # https://github.com/Robinlovelace/spatial-microsim-book
  4 | # examples will go here
  5 | 
  6 | # First challenge: get set-up on RStudio server
  7 | # https://rstudio.jrc.es/
  8 | 
  9 | # test if your RStudio account works:
 10 | # example of interactive plotting
 11 | library(tmap)
 12 | tmap_mode("view")
 13 | example(qtm)
 14 | 
 15 | # downloading and unzipping data
 16 | url_msim = "https://github.com/Robinlovelace/spatial-microsim-book/archive/master.zip"
 17 | download.file(url_msim = "https://github.com/Robinlovelace/spatial-microsim-book/archive/master.zip", destfile = "master.zip")
 18 | unzip("master.zip")
 19 | 
 20 | # Notes on project management:
 21 | # https://csgillespie.github.io/efficientR/
 22 | 
 23 | # for spatial data
 24 | u = "https://github.com/Robinlovelace/vspd-base-shiny-data/archive/master.zip"
 25 | download.file(u, destfile = "master.zip")
 26 | unzip("master.zip")
 27 | dir.create("data")
 28 | f = list.files(path = "vspd-base-shiny-data-master/",
 29 |                full.names = T)
 30 | file.copy
 31 | 
 32 | # plot x and y 
 33 | x = 1:99
 34 | y = x^3
 35 | y = plot(x, y)
 36 | system.time({x = 1:99})
 37 | 
 38 | # example of tab autocompletion:
 39 | # use tab inside funtion calls to find arguments
 40 | system2(command = "ls", args = "-hal")
 41 | 
 42 | # loading in data
 43 | ind = read.csv("data/SimpleWorld/ind-full.csv")
 44 | nrow(ind)
 45 | head(ind)
 46 | # look at the environment pane to see it
 47 | # click on it or enter View(ind) to see it
 48 | View(ind)
 49 | 
 50 | # classes
 51 | class(ind)
 52 | class(ind$age)
 53 | class(ind$sex)
 54 | 
 55 | # subsetting data
 56 | ind[5,] # select row
 57 | ind[,3]
 58 | ind[3]
 59 | ind["sex"]
 60 | ind$sex
 61 | 
 62 | # Alternative way of data handling
 63 | # dplyr rule: always returns a data fram
 64 | # concept: type stability
 65 | library(dplyr)
 66 | slice(ind, 5) 
 67 | select(ind, sex)
 68 | 
 69 | # class coercion
 70 | ind_mat = as.matrix(ind)
 71 | class(ind_mat[1,])
 72 | 
 73 | ####################################################
 74 | # spatial data with R - CakeMap for all zones
 75 | 
 76 | ind <- read.csv("data/CakeMap/ind.csv")
 77 | cons <- read.csv("data/CakeMap/cons.csv")
 78 | # Load constraints separately - normally this would be first stage
 79 | con1 <- cons[1:12] # load the age/sex constraint
 80 | con2 <- cons[13:14] # load the car/no car constraint
 81 | con3 <- cons[15:24] # socio-economic class
 82 | 
 83 | # Rename the categories in "ind" to correspond to the one of cons
 84 | ind$Car <- sapply(ind$Car, FUN = switch, "Car", "NoCar")
 85 | ind$Sex <- sapply(ind$Sex, FUN = switch, "m", "f")
 86 | ind$NSSEC8 <- as.factor(ind$NSSEC8)
 87 | levels(ind$NSSEC8) <- colnames(con3)
 88 | ind$ageband4 <- 
 89 |   gsub(pattern = "-", replacement = "_", x = ind$ageband4)
 90 | 
 91 | # Initialise weights
 92 | weight_init_1zone <- table(ind)
 93 | init_cells <- rep(weight_init_1zone, each = nrow(cons))
 94 | 
 95 | # Define the names
 96 | names <- c(list(rownames(cons)),
 97 |            as.list(dimnames(weight_init_1zone)))
 98 | 
 99 | # Structure the data
100 | weight_all <- array(init_cells, dim = 
101 |                       c(nrow(cons), dim(weight_init_1zone)),
102 |                     dimnames = names)
103 | 
104 | # Transform con1 into an 3D-array : con1_convert
105 | names <- c(list(rownames(cons)),dimnames(weight_all)[c(4,6)])
106 | con1_convert <- array(NA, dim=c(nrow(cons),2,6), dimnames = names)
107 | 
108 | for(zone in rownames(cons)){
109 |   for (sex in dimnames(con1_convert)$Sex){
110 |     for (age in dimnames(con1_convert)$ageband4){
111 |       con1_convert[zone,sex,age] <- con1[zone,paste(sex,age,sep="")]
112 |     }
113 |   }
114 | }
115 | 
116 | # Rescale con3 since it has some inconsistent constraints
117 | con3_prop <- con3*rowSums(con2)/rowSums(con3)
118 | 
119 | # Load mipfp package
120 | library(mipfp)
121 | 
122 | # Loop on the zones and make each time the mipfp
123 | # To run in parallel: use foreach package
124 | con1m = con1_convert
125 | con2m = as.matrix(con2)
126 | con3m = as.matrix(con3_prop)
127 | descript <- list(c(3,5),2,4)
128 | 
129 | for (i in 1:nrow(cons)){
130 |   target <- list(con1m[i,,], con2m[i,], con3m[i,])
131 |   res <- Ipfp(weight_init_1zone, descript,target)
132 |   weight_all[i,,,,,] <- res$x.hat
133 | }
134 | 
135 | # Results for zone 1
136 | weight_init_1zone <- weight_all[1,,,,,]
137 | 
138 | # Validation
139 | aggr <- apply(weight_all,c(1,6,4),sum)
140 | aggr <- aggr[,,c(2,1)] # order of sex to fit cons
141 | aggr1 = as.data.frame(aggr)
142 | con2 = apply(weight_all,c(1,3),sum)
143 | con3 = apply(weight_all,c(1,5),sum)
144 | ind_agg <- cbind(aggr1,con2,con3)
145 | 
146 | plot(as.matrix(ind_agg[1,]), as.matrix(cons[1,]), xlab = 'Simulated', ylab='Theoretical', main =' Validation for zone 1')
147 | 
148 | cor(as.vector(as.matrix(ind_agg)),as.vector(as.matrix(cons)))
149 | 
150 | 
151 | CorVec <- rep (0, nrow(cons))
152 | 
153 | for (i in 1:nrow(cons)){
154 |   CorVec[i] = cor(as.numeric(ind_agg[i,]),as.numeric(cons[i,]))
155 | }
156 | 
157 | which(CorVec< 0.99)
158 | 
159 | # integerisation
160 | expa = as.data.frame.table(weight_init_1zone, responseName = 'COUNT')
161 | 
162 | truncated = expa
163 | truncated$COUNT = floor(expa$COUNT)
164 | p = expa$COUNT - truncated$COUNT
165 | n_missing = sum(p)
166 | index = sample(1:nrow(truncated), size = n_missing, prob = p,replace=FALSE)
167 | truncated$COUNT[index] = truncated$COUNT[index] + 1
168 | 
169 | # see simPop-notes.R for notes on simPop
170 | 
171 | 
172 | # spatial data - using this repo
173 | # https://github.com/Robinlovelace/Creating-maps-in-R
174 | 
175 | url_maps = 
176 |   unzip()
177 | library(raster)
178 | system.time(
179 |   lnd <- shapefile("data/london_sport.shp")
180 | )
181 | class(lnd)  
182 | plot(lnd)
183 | library(sf)
184 | system.time(
185 |   lnd_sf <- st_read("data/london_sport.shp")
186 | )
187 | plot(lnd_sf)
188 | 
189 | r = raster(lnd)
190 | values(r) = 1:100
191 | plot(r)
192 | plot(lnd, add = T)
193 | proj4string(lnd)
194 | lnd_geo = spTransform(lnd, CRS("+proj=longlat +datum=WGS84"))
195 | proj4string(lnd_geo)
196 | spDists(lnd_geo[1:3,])
197 | spDists(lnd[1:3,])
198 | raster::res(r)
199 | res(r)
200 | detach("package:raster")
201 | raster::res(r)
202 | res(r)
203 | library(raster)
204 | r_highes = r
205 | raster::res(r_highes) <- 1000
206 | values(r_highes) = 1:ncell(r_highes)
207 | plot(r_highes)
208 | 
209 | # further resources: http://geostat-course.org/node
210 | 
211 | # Generate spatial microdata
212 | source("notes/mipfp-notes.R")
213 | 
214 | # Getting spatial data for Belgium
215 | u_bel = "http://biogeo.ucdavis.edu/data/gadm2.8/rds/BEL_adm4.rds"
216 | download.file(u_bel, "BEL_adm4.rds")
217 | bel = readRDS("BEL_adm4.rds")
218 | plot(bel)
219 | d = bel@data
220 | nam = bel[bel$NAME_2 == "Namur",]
221 | nam = nam[sample(length(nam), length(uz)),]
222 | plot(nam)
223 | d = nam@data
224 | # str(nam) # show structure
225 | uz = unique(synth_namur$id)
226 | nam$id = uz[sample(length(uz), length(uz))]
227 | # check the ids match
228 | summary(nam$id %in% pmale$id)
229 | nam@data = inner_join(nam@data, pmale)
230 | head(nam@data)
231 | tmap::qtm(nam, "pmale")
232 | 
233 | library(tmap)
234 | tmap_mode("view")
235 | qtm(nam, "pmale", n = 3)
236 | tm_shape(nam) +
237 |   tm_fill(col = "pmale",
238 |           breaks = c(0, 0.5, 1))
239 | 
240 | # Challenges:
241 | # 1: Write a for loop to create a spatial microdataset
242 | # for all zones in namur (don't just copy my code!)
243 | # 2: Create a map of a different variable (not % male)
244 | # 3: Implement the methods on your own data
245 | 


--------------------------------------------------------------------------------
/notes/simPop-notes.R:
--------------------------------------------------------------------------------
 1 | ## Notes on simPop
 2 | # install.packages("simPop")
 3 | library(simPop)
 4 | data(eusilcS)
 5 | nrow(eusilcS) /
 6 |   length(unique(eusilcS$db030))
 7 | inp = specifyInput(data = eusilcS,
 8 |                    hhid = "db030",
 9 |                    hhsize = "hsize",
10 |                    strata = "db040",
11 |                    weight = "rb050")
12 | data("totalsRG")
13 | tt = xtabs(Freq ~ ., totalsRG)
14 | # tableWt()
15 | class(tt) = "table"
16 | oldweights = inp@data$rb050
17 | addWeights(inp) = calibSample(inp, totals = tt)
18 | newweights = inp@data$rb050
19 | plot(oldweights, newweights)
20 | synthP = simStructure(dataS = inp,
21 |                       method = "direct",
22 |                       basicHHvars = c("age", "rb090", "db040"))
23 | s = synthP@pop@data
24 | 
25 | # with SimpleWorld
26 | ind = read.csv("data/SimpleWorld/ind-full.csv")
27 | ind$hhid = sample(x = 1:3, size = nrow(ind), replace = T)
28 | ind$strata = sample(x = 1:3, size = nrow(ind), replace = T)
29 | ind$weight = 1
30 | i = specifyInput(ind, hhid = "hhid", pid = "id", strata = "strata", weight = "weight")
31 | con1 = read.csv("data/SimpleWorld/sex.csv")
32 | tots = data.frame(sex = c("m", "f"),
33 |                   Freq =colSums(con1))
34 | ti = xtabs(Freq ~ ., tots)
35 | class(ti) = "table"
36 | addWeights(i) = calibSample(i, ti)
37 | s = simStructure(i, "direct", c("age", "sex", "hhid"))
38 | s_data = s@pop@data
39 | head(s_data)
40 | 


--------------------------------------------------------------------------------
/output/.gitignore:
--------------------------------------------------------------------------------
1 | synhhlddata.RData
2 | 


--------------------------------------------------------------------------------
/output/ints_df.csv:
--------------------------------------------------------------------------------
 1 | "","id","zone","age","sex","income"
 2 | "1",1,1,59,"m",2868
 3 | "2",2,1,54,"m",2474
 4 | "3",3,1,35,"m",2231
 5 | "4",3,1,35,"m",2231
 6 | "5",3,1,35,"m",2231
 7 | "6",4,1,73,"f",3152
 8 | "7",5,1,49,"f",2473
 9 | "8",5,1,49,"f",2473
10 | "9",5,1,49,"f",2473
11 | "10",5,1,49,"f",2473
12 | "11",3,1,35,"m",2231
13 | "12",4,1,73,"f",3152
14 | "13",1,2,59,"m",2868
15 | "14",2,2,54,"m",2474
16 | "15",4,2,73,"f",3152
17 | "16",4,2,73,"f",3152
18 | "17",4,2,73,"f",3152
19 | "18",4,2,73,"f",3152
20 | "19",5,2,49,"f",2473
21 | "20",2,2,54,"m",2474
22 | "21",4,2,73,"f",3152
23 | "22",1,2,59,"m",2868
24 | "23",3,3,35,"m",2231
25 | "24",4,3,73,"f",3152
26 | "25",4,3,73,"f",3152
27 | "26",5,3,49,"f",2473
28 | "27",5,3,49,"f",2473
29 | "28",5,3,49,"f",2473
30 | "29",5,3,49,"f",2473
31 | "30",5,3,49,"f",2473
32 | "31",4,3,73,"f",3152
33 | "32",1,3,59,"m",2868
34 | "33",3,3,35,"m",2231
35 | 


--------------------------------------------------------------------------------
/slides/SM-for-ABM.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Spatial micro-data for agent-based models"
  3 | author: "Morgane Dumont"
  4 | date: '`r Sys.Date()`'
  5 | output:
  6 |   beamer_presentation: default
  7 |   ioslides_presentation: default
  8 |   slidy_presentation: default
  9 | bibliography: ../bibliography.bib
 10 | ---
 11 | 
 12 | ```{r setup, include=FALSE}
 13 | knitr::opts_chunk$set(echo = FALSE)
 14 | ```
 15 | 
 16 | ## What is an Agent-based model?
 17 | 
 18 | It consists of :
 19 | 
 20 | > - Agents with characteristics (Agents Are Autonomous Decision-making Units with Diverse Characteristics (Heterogeneous));
 21 | > - their environment;
 22 | > - relations between agents;
 23 | > - AND possibly relations between the agents and their environment.
 24 | 
 25 | 
 26 | ##
 27 | 
 28 | "Agent-based modelling can find new, better solutions to many
 29 | problems important to our environment, health, and economy" (Grimm and Railsback 2011)
 30 | 
 31 | 
 32 | ##
 33 | 
 34 | An ABM can evolve through time and space, but need as input 
 35 | 
 36 | > - the complete population, 
 37 | > - with the pertinent characteristics for each person,
 38 | > - localized in their environment,
 39 | > - the definition of the differents relations.
 40 | 
 41 | ##
 42 | 
 43 | The individual level data needed can be generated thanks to a spatial microsimulation.
 44 | 
 45 | ## Examples of developped agent-based models
 46 | 
 47 | > - NetLogo [@thiele_facilitating_2014]
 48 | > - VirtualBelgium [@barthelemy_stochastic_2015]
 49 | > - TransMob [@TransMob]
 50 | 
 51 | ##
 52 | 
 53 | NetLogo for Simpleworld: At each *time tick* the inhabitants will:
 54 | 
 55 | > 1. move to a random location within their zone.
 56 | > 2. "look across the fence": check their field of vision for inhabitants from a neighbouring zone and select the closest one in view.
 57 | > 3. try to "convince" them to come over to the other side: the inhabitant with more money (`income`) will *bribe* the other with 10% of their money to come over to their zone.
 58 | 
 59 | The model will have the following adjustable parameters:
 60 | 
 61 | > 1. The field of vision has two parameters: the viewing angle and the distance
 62 | > 2. Average level of *bribeability* of inhabitants: if their level is less than 100%, a random number generator will be used to determine whether the agent accepts the bribe or not. The distribution of bribeability is approximately normal with a mean and a standard deviation.
 63 | 
 64 | ##
 65 | 
 66 | 
 67 | \includegraphics[width=0.9\textwidth]{../figures/nl-plots.png}
 68 | 
 69 | ##
 70 | VirtualBelgium
 71 | ![](../figures/Jojo_JASS.png)
 72 | 
 73 | 
 74 | ##
 75 | 
 76 | ![](../figures/Jojo_JASS2.png)
 77 | 
 78 | ## 
 79 | Need of spatial microsimulation:
 80 | 
 81 | - To create the initial individuals;
 82 | - To have the household features;
 83 | - To assign an activity to each person.
 84 | 
 85 | ## 
 86 | If you want to create your own code for ABM, think of :
 87 | 
 88 | - Object oriented programming (such as C++)
 89 | - Repast (https://repast.github.io/index.html)
 90 | 
 91 | ##
 92 | If your aim is to make an ABM and you have not enough input data, you can generate a spatial microsimulation.
 93 | 
 94 | But, first you need to be sure of what will be needed.
 95 | 
 96 | [A good introduction to agent-based modelling](http://link.springer.com/article/10.1057/jos.2010.3)
 97 | 
 98 | An example of a combination of spatial microsimulation and agent based modelling : Virtual Belgium In Health
 99 | 
100 | ## References
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/slides/SM-without-microdata.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Spatial Microsimulation without microdata"
  3 | author: "Morgane Dumont"
  4 | date: '`r Sys.Date()`'
  5 | output:
  6 |   beamer_presentation: default
  7 |   ioslides_presentation: default
  8 |   slidy_presentation: default
  9 | bibliography: ../bibliography.bib
 10 | ---
 11 | 
 12 | ```{r setup, include=FALSE}
 13 | knitr::opts_chunk$set(echo = FALSE)
 14 | ```
 15 | 
 16 | ## Without microdata?
 17 | 
 18 | If you have no microdata, but you would like to generate a microsimulation, you can use a wide range of methods, depending on the data you have.
 19 | 
 20 | For example:
 21 | 
 22 | > - Global cross-tables and local marginal distributions 
 23 | 
 24 | > - Two level aggregated data 
 25 | 
 26 | > - Only a cross-table, but also mean, standard deviation,... of the caracteristics you would like to add
 27 | 
 28 | ## Global cross-tables and local marginal distributions 
 29 | 
 30 | The global cross-table can be the initial weight matrix. 
 31 | 
 32 | Or, thanks to *mipfp* it can be the initial matrix AND a constraint.
 33 | 
 34 | ## Two level aggregated data 
 35 | 
 36 | For example (Barthélemy and Toint - 2013): 
 37 | 
 38 | At municipality level: 
 39 | 
 40 | >  - the cross table gender x age 
 41 | >  - and the marginals of diploma level and activity status;
 42 |           
 43 |           
 44 |           
 45 | At district level: 
 46 | 
 47 | >  - gender x activity status, 
 48 | >  - gender x diploma level, 
 49 | >  - age x activity status 
 50 | >  - and age x diploma level.
 51 | 
 52 | ## Several steps
 53 | 
 54 | > 1. Create at district level gender x activity status x age 
 55 | > 2. Create at district level gender x diploma level x age 
 56 | > 3. Create at district level gender x activity status x age x diploma level
 57 | > 4. Use this data as seed for IPF and the two municipality level databases as constraints
 58 | 
 59 | 
 60 | ## Only a cross-table, but also mean, standard deviation,...
 61 | 
 62 | First, created the constraints by knowing:
 63 | 
 64 | - the distribution of you variable
 65 | - the total number of individuals you need at the end.
 66 | 
 67 | 
 68 | 
 69 | ## Addition of the household level 
 70 | 
 71 | Depending on the data you have, they are several possibilities. One method is :
 72 | 
 73 | > - Run a IPF to create an individual pool.
 74 | > - Run a IPF to create an household pool.
 75 | > - Try to complete the household with individuals (depending on the variable of both)
 76 | 
 77 | ##
 78 | 
 79 | If you have more precise data, such as age differences in couples, type of household of each individual, their civil status, ... you can make a combinatorial optimization to constraint the households.
 80 | 
 81 | In the case of my current research, the data are: 
 82 | 
 83 | > - for each individual, a zone, age, sex, some characteristics AND size and type of household;
 84 | > - the age distribution between couples and between mother and child;
 85 | 
 86 | ##
 87 | 
 88 | ![](../figures/HH-CO.png)
 89 | 
 90 | ##
 91 | 
 92 | ![](../figures/IllustrationCouples.png)
 93 | 
 94 | ##
 95 | 
 96 | [@lenormand_generating_2012]
 97 | https://arxiv.org/pdf/1208.6403v2.pdf
 98 | 
 99 | IPU (Guo and Bhat, 2007)
100 | 
101 | ## Choice of data and methods 
102 | 
103 | > - Major tip : first 'make a plan' before beginning to code.
104 | > - Checking the source of the data and the way it was collected.
105 | > - Does it represent your target data?
106 | > - Choice of method is important. What are the hypothesis of the method? The underlying assumptions? What are the strengths and weaknesses of the method?
107 | > - Spatial microsimulation is an approximation, so you need to be aware of the biais that the method could have. 
108 | 
109 | ## Coding
110 | 
111 | Imagine that the aim is to create a population for a Belgian province (Namur) characterised by: 
112 | 
113 | > - A municipality (code INS),
114 | > - a diploma level,
115 | > - a professional status level,
116 | > - an age category (0.5 meaning from 0 to 5 years old),
117 | > - a gender.
118 | 
119 | ##
120 | The data you have are stored in the *Belgium* folder. You have : 
121 | 
122 | > - ContrainteStatut.txt: per municipality and professional status, you have a count;
123 | > - ContrainteGenre.txt: per municipality and gender, you have a count;
124 | > - ContrainteDipl.txt: per municipality and diploma level, you have a count;
125 | > - ContrainteAge.txt: per municipality and age classes, you have a count;
126 | > - BelgiqueConting.txt : per age classes, gender, diploma and status, you have a count.
127 | 
128 | 
129 | ## References
130 | 


--------------------------------------------------------------------------------
/slides/introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction to Spatial Microsimulation with R"
  3 | author: "Robin Lovelace"
  4 | date: '`r Sys.Date()`'
  5 | output: beamer_presentation
  6 | bibliography: ../bibliography.bib
  7 | ---
  8 | 
  9 | ```{r setup, include=FALSE}
 10 | knitr::opts_chunk$set(echo = TRUE)
 11 | 
 12 | ```
 13 | 
 14 | ## Introduction
 15 | 
 16 | - Housekeeping
 17 | - About the course and its teachers
 18 | - Lectures and practicals
 19 | - Getting help
 20 | 
 21 | ## Housekeeping
 22 | 
 23 | - Thanks to the hosts, the JRC
 24 | 
 25 | ## Spatial Microsimulation with R
 26 | 
 27 | Aims:
 28 | 
 29 | 1. To provide a solid understanding of the method and applications
 30 | 2. To teach its implementation in R in general terms
 31 | 3. To provide guidance on next steps
 32 | 
 33 | ## Objectives:
 34 | 
 35 | - Become proficient with R and RStudio for handling data
 36 | - Understand some applications where spatial microsimulation is useful
 37 | - Realise the limitations of the method
 38 | - Know about a range of packages for doing spatial microsimulation with R
 39 | - Understand code for generating spatial microdata with **mipfp**
 40 | - Have ideas for trying the methods on your own datasets
 41 | 
 42 | # About the course and its teachers
 43 | 
 44 | ## The request to teach at the EU
 45 | 
 46 | - Links with much of the research taking place at the JRC
 47 | - Big Data
 48 | - Modelling
 49 | - Social impact assessment
 50 | - Scenarios of the future
 51 | 
 52 | ## The course materials
 53 | 
 54 | - Based on our book, [@lovelace_spatial_2016]. Digital versions available on-line
 55 | - Slides available on-line
 56 | - We'll be making small 'code chunks' and scripts available during the course
 57 | - Any feedback welcome
 58 | 
 59 | ## A bit about us
 60 | 
 61 | Robin Lovelace
 62 | 
 63 | > - Environmental Geographer turned Computational and Transport Geographer
 64 | > - Now on 5 year University Academic Fellowship (UAF) in Transport and Big Data at Leeds Institute for Transport Studies (ITS)
 65 | > - Creator of many online teaching materials - see [github.com/robinlovelace](https://github.com/Robinlovelace)
 66 | > - Creator of the [**stplanr**](https://github.com/ropensci/stplanr) package
 67 | > - Lead developer of the [Propensity to Cycle Tool](http://www.pct.bike/) (PCT)
 68 | 
 69 | ## A bit about us
 70 | 
 71 | Morgane Dumont
 72 | 
 73 | > - Applied Mathematician with coding, algorithmic and statistics background
 74 | > - Now on a project of the Wallonia Region developping an evolutionary spatial microsimulation to forecast health needs of elderly in 2030 for Belgium
 75 | > - Teach statistics in R to the master's student in applied mathematics at university of Namur
 76 | 
 77 | 
 78 | ## A bit about you
 79 | 
 80 | - Go-around - who you are, interests in the course
 81 | - With your neighbour:
 82 | 
 83 |     > - Experience with R
 84 |     > - Geographical data
 85 | 
 86 | - What you hope to get out of the course
 87 | 
 88 | # What is spatial microsimulation and its applications
 89 | 
 90 | ## What is spatial microsimulation?
 91 | 
 92 | 1. A method
 93 | 2. An approach
 94 | 
 95 | ## Applications
 96 | 
 97 | - Wide variety of potential applications
 98 | - So far main applications have been in health, poverty mapping and transport
 99 | - What do you want to use spatial microsimulation for?
100 | - @tomintz_geography_2008 The geography of smoking in Leeds: estimating individual smoking rates and the implications for the location of stop smoking services. 
101 | - Exploration of the energy costs of transport [@lovelace_oil_2014]
102 | 
103 | ## Agriculture
104 | 
105 | @hynes_modelling_2008 is a classic example
106 | 
107 | Had 2 datasets:
108 | 
109 | - Individual level data on farmers participating in agri-environment scheme
110 | - Farm level data with many attributes about the farms
111 | - Geographical data on farms at the Enumeration District (ED) level
112 | 
113 | For confidentiallity reasons, the individual-level datasets could not be linked
114 | 
115 | Spatial microsimulation used to create a synthetic dataset
116 | 
117 | ## Agriculture II
118 | 
119 | Results show the probability of participation across Ireland:
120 | 
121 | ![](../figures/agri-example-hynes-2008.png)
122 | 
123 | ## Tax policy
124 | 
125 | Commonly used to evaluate distributional impacts of tax policies [@agostini_were_2014]
126 | 
127 | ![](../figures/austerity.png)
128 | 
129 | ## Transport
130 | 
131 | A simulation of the car's traffic for Namur [@barthelemy_parallelized_2014]
132 | 
133 | \includegraphics[width=0.5\textwidth]{../figures/Trafic_jojo.png}
134 | \includegraphics[width=0.5\textwidth]{../figures/Trafic_Jojo2.png}
135 | 
136 | 
137 | Used tools : spatial microsimulation, agent based modelling, activity chains,...
138 | 
139 | ## What's next:
140 | 
141 |   - The RStudio Graphical User Interface (GUI)
142 |   - Using R
143 |   - Project management
144 |   - GitHub
145 |   
146 | ## References


--------------------------------------------------------------------------------
/slides/r-rstudio-practical.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Using R and RStudio for spatial microsimulation"
 3 | author: "Robin Lovelace"
 4 | date: "`r Sys.Date()`"
 5 | output: ioslides_presentation
 6 | bibliography: ../bibliography.bib
 7 | ---
 8 | 
 9 | ```{r setup, include=FALSE}
10 | knitr::opts_chunk$set(echo = TRUE)
11 | knitr::opts_knit$set(root.dir = "..")
12 | ```
13 | 
14 | ## Introduction
15 | 
16 | - Primarily a practical session
17 | - Based on the [old](https://en.wikipedia.org/wiki/Docendo_discimus) (~2000 years old!) saying *docendo discimus*:
18 | 
19 | > **by teaching we learn**.
20 | 
21 | We'll use up-to-date materials:
22 |   - A section from [Efficient R Programming](https://csgillespie.github.io/efficientR/set-up.html#rstudio)
23 |   - And a live demo
24 | 
25 | ## Practical demonstration using R and RStudio
26 | 
27 | - Using the simpleworld data
28 | 
29 | ```{r}
30 | source("code/SimpleWorld.R")
31 | ```
32 | 
33 | ## Loading data
34 | 
35 | ```{r}
36 | con_age <- read.csv("data/SimpleWorld/age.csv")
37 | con_sex <- read.csv("data/SimpleWorld/sex.csv")
38 | ```
39 | 
40 | - R can read data from all major file-formats
41 | - See the **rio** package for more information
42 | 
43 | ```{r, eval=FALSE}
44 | install.packages("rio")
45 | ```
46 | 
47 | ## Subsetting data
48 | 
49 | ```{r}
50 | con_age
51 | ```
52 | 
53 | 
54 | ```{r, eval=FALSE}
55 | View(con_age)
56 | ```
57 | 
58 | ## Exploring RStudio
59 | 
60 | ![](../figures/rstudio-environment.png)
61 | 
62 | ## Autocompletion
63 | 
64 | ![](../figures/rstudio-autocomplete.png)
65 | 
66 | ## Exercises
67 | 
68 | Work through [Section 2.5 of Efficient R Programming](https://csgillespie.github.io/efficientR/set-up.html#rstudio)
69 | 
70 | - Answer the practicals.
71 | - If you finish early, work on the Spatial Microsimulation book's [appendix](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/13-appendix.Rmd)


--------------------------------------------------------------------------------
/slides/simpop-intro.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "simPop"
  3 | author: "Robin Lovelace"
  4 | date: '`r Sys.Date()`'
  5 | output:
  6 |   ioslides_presentation: default
  7 |   beamer_presentation: default
  8 | ---
  9 | 
 10 | ## SimPop
 11 | 
 12 | - simPop is a package for creating, analysing and modelling synthetic microdata
 13 | - It is powerful and integrates a number of beneficial features:
 14 |     - Parallel processing
 15 |     - Real (EU SILC and other) test datasets
 16 |     - Wide range of functionality
 17 | - Funded by respected organisations and implemented by skilled programmers
 18 | 
 19 | ## Basic use
 20 | 
 21 | ```{r, echo=TRUE}
 22 | library(simPop) # loads lots of packages
 23 | ```
 24 | 
 25 | ## simPop data
 26 | 
 27 | ```{r, echo=TRUE}
 28 | data(eusilcS)
 29 | dplyr::glimpse(eusilcS[1:5])
 30 | ```
 31 | 
 32 | ## Create dataObj
 33 | 
 34 | ```{r, echo=TRUE}
 35 | inp = specifyInput(data = eusilcS,
 36 |                    hhid = "db030",
 37 |                    hhsize = "hsize",
 38 |                    strata = "db040",
 39 |                    weight = "rb050")
 40 | class(inp)
 41 | inp
 42 | ```
 43 | 
 44 | ## Input data
 45 | 
 46 | ```{r, echo=TRUE}
 47 | head(eusilcS$db030)
 48 | head(eusilcS$hsize)
 49 | # number of people per household
 50 | nrow(eusilcS) /
 51 |   length(unique(eusilcS$db030))
 52 | ```
 53 | 
 54 | ## Constraining the input data by cross-tabbed marginals
 55 | 
 56 | ```{r, echo=TRUE}
 57 | data(totalsRGtab)
 58 | totalsRGtab
 59 | rcons = colSums(totalsRGtab) / sum(totalsRGtab)
 60 | rsurv = summary(eusilcS$db040) / nrow(eusilcS)
 61 | ```
 62 | 
 63 | ## Differences in regional totals
 64 | 
 65 | ```{r, echo=TRUE}
 66 | plot(rcons)
 67 | points(rsurv, pch = 3)
 68 | text(1:length(rsurv), y = pmin(rsurv, rcons), labels = names(rcons))
 69 | ```
 70 | 
 71 | ## Add weights
 72 | 
 73 | ```{r, echo=TRUE}
 74 | addWeights(inp) =
 75 |   calibSample(inp = inp, totals = totalsRGtab)
 76 | synthP = simStructure(dataS = inp,
 77 |                       method = "direct",
 78 |                       basicHHvars = c("age", "rb090", "db040"))
 79 | ```
 80 | 
 81 | ## A look at the outputs
 82 | 
 83 | ```{r, echo=TRUE}
 84 | slotNames(synthP)
 85 | nrow(synthP@pop@data)
 86 | head(synthP@pop@data)
 87 | rsynth = summary(synthP@pop@data$db040) /
 88 |   nrow(synthP@pop@data)
 89 | ```
 90 | 
 91 | ## Comparison with marginals
 92 | 
 93 | ```{r, echo=TRUE}
 94 | plot(rcons)
 95 | points(rsynth, pch = 3)
 96 | text(1:length(rsurv), y = pmin(rsurv, rcons), labels = names(rcons))
 97 | ```
 98 | 
 99 | ## Visualisation | source: [publik.tuwien.ac.at](http://publik.tuwien.ac.at/files/PubDat_238106.pdf) 
100 | 
101 | ![](../figures/simPop-results-eg.png)
102 | 
103 | ## Tasks
104 | 
105 | - Practical (30 minutes)
106 |     - Basic: read-up on **simPop**
107 |     - Intermediate: build on the examples using `data("eusilcP")` and `data("eusilcS")` to explore the functionality of **simPop**
108 |     - Advanced: take a look at the package's source code
109 |     
110 | - Challenge (20 minutes)
111 |     - Beginner: try to create a synthetic microdataset of [SimpleWorld](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/02-SimpleWorld.Rmd) using **simPop**
112 |     - Explore how to use `simContinuous()` to estimate mean income in the regions of Austria
113 | 
114 | - Discussion: how could these methods be useful in your work?
115 | 
116 | ```{r}
117 | args(simContinuous)
118 | ```
119 | 
120 | ## simPop resources:
121 | 
122 | - Slides by Matthias Templ: http://publik.tuwien.ac.at/files/PubDat_238106.pdf
123 | - A youtube video on the topic: https://www.youtube.com/watch?v=fjZhAUq3JZ0
124 | - The package's documentation
125 | 
126 | 


--------------------------------------------------------------------------------
/sms-book-citation.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @book{lovelace_spatial_2016,
 3 |   title = {Spatial Microsimulation with {{R}}},
 4 |   url = {http://robinlovelace.net/spatial-microsim-book/},
 5 |   publisher = {{CRC Press}},
 6 |   date = {2016},
 7 |   author = {Lovelace, Robin and Dumont, Morgane}
 8 | }
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/spatial-microsim-book.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Website
16 | 


--------------------------------------------------------------------------------
/www/.gitignore:
--------------------------------------------------------------------------------
1 | bootstrap-2.3.2/
2 | highlight/
3 | jquery-1.11.0/


--------------------------------------------------------------------------------
/www/glyphicons-halflings-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/www/glyphicons-halflings-white.png


--------------------------------------------------------------------------------
/www/glyphicons-halflings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/www/glyphicons-halflings.png


--------------------------------------------------------------------------------
/www/highlight.css:
--------------------------------------------------------------------------------
  1 | /* Affix navigation to top-left */
  2 | #nav.affix {
  3 |   position: static;
  4 | }
  5 | @media screen and (min-width: 720px) {
  6 |   #nav.affix,
  7 |   #nav.affix {
  8 |     position: fixed;
  9 |     top: 0px;
 10 |   }
 11 |   #nav.affix-bottom {
 12 |     position: absolute;
 13 |   }
 14 | }
 15 | 
 16 | /* Tweak code styling */
 17 | 
 18 | code {
 19 |   padding: 1px;
 20 | }
 21 | 
 22 | /* Tweak navigation list styling */
 23 | 
 24 | ul.toc {
 25 |   padding-left: 0px;
 26 | }
 27 | 
 28 | ul.toc .dropdown-header {
 29 |   padding: 5px 0 0 0;
 30 | }
 31 | 
 32 | ul .dropdown-header:first-child {
 33 |   margin-top: 2px;
 34 | }
 35 | 
 36 | .dropdown-header {
 37 |   font-weight: bold;
 38 |   color: #333;
 39 |   margin-top: 8px;
 40 | }
 41 | 
 42 | .dropdown-menu .dropdown-header {
 43 |   border-bottom: 1px solid #eee;  
 44 | }
 45 | 
 46 | 
 47 | /* Syntax highlighting */
 48 | 
 49 | pre, code {
 50 |   font-family: 'Inconsolata', sans-serif;
 51 |   font-size: 1em;
 52 |   background-color: #fafafa;
 53 | }
 54 | pre {
 55 |   border-color: #ddd;
 56 | }
 57 | code {
 58 |   color: #333;
 59 |   white-space: normal;
 60 | }
 61 | 
 62 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
 63 |   margin: 0; padding: 0; vertical-align: baseline; border: none; }
 64 | table.sourceCode { width: 100%; line-height: 100%; }
 65 | td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
 66 | td.sourceCode { padding-left: 5px; }
 67 | 
 68 | /* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css
 69 |    Colours from https://gist.github.com/robsimmons/1172277 */
 70 | 
 71 | code > span.kw { color: #555; font-weight: bold; } /* Keyword */
 72 | code > span.dt { color: #902000; } /* DataType */
 73 | code > span.dv { color: #40a070; } /* DecVal (decimal values) */
 74 | code > span.bn { color: #d14; } /* BaseN */
 75 | code > span.fl { color: #d14; } /* Float */
 76 | code > span.ch { color: #d14; } /* Char */
 77 | code > span.st { color: #d14; } /* String */
 78 | code > span.co { color: #888888; font-style: italic; } /* Comment */
 79 | code > span.ot { color: #007020; } /* OtherToken */
 80 | code > span.al { color: #ff0000; font-weight: bold; } /* AlertToken */
 81 | code > span.fu { color: #900; font-weight: bold; } /* Function calls */ 
 82 | code > span.er { color: #a61717; background-color: #e3d2d2; } /* ErrorTok */
 83 | 
 84 | /* Tables */
 85 | 
 86 | table {
 87 |   width: 100%;
 88 |   margin-bottom: 20px;
 89 | }
 90 | 
 91 | table thead > tr > th,
 92 | table tbody > tr > th,
 93 | table tfoot > tr > th,
 94 | table thead > tr > td,
 95 | table tbody > tr > td,
 96 | table tfoot > tr > td {
 97 |   padding: 8px;
 98 |   line-height: 1.428571429;
 99 |   vertical-align: top;
100 |   border-top: 1px solid #dddddd;
101 | }
102 | 
103 | table thead > tr > th {
104 |   vertical-align: bottom;
105 |   border-bottom: 2px solid #dddddd;
106 | }
107 | 
108 | table tr.odd {
109 |   background-color: #fafafa;
110 | }
111 | 


--------------------------------------------------------------------------------
/www/toc.js:
--------------------------------------------------------------------------------
 1 | toc = $("ul#toc");
 2 | $("#content").find("h2").each(function() {
 3 |   h = $(this);
 4 |   toc.append("<li><a href='#" + h.attr("id") + "'>" + h.text() + "</a></li>");
 5 | });
 6 | 
 7 | 
 8 | $('#nav').affix({
 9 |     offset: $('#nav').position()
10 | });


--------------------------------------------------------------------------------