├── .Rbuildignore ├── .gitattribrutes ├── .gitignore ├── .travis.yml ├── 01-introduction.Rmd ├── 02-SimpleWorld.Rmd ├── 03-what-is-smsim.Rmd ├── 04-data-prep.Rmd ├── 05-smsim-in-R.R ├── 05-smsim-in-R.Rmd ├── 06-alternative-approaches.Rmd ├── 07-CakeMap.Rmd ├── 08-validation.Rmd ├── 09-no-microdata.Rmd ├── 10-household-allocation.Rmd ├── 11-Tresis_chapter.Rmd ├── 12-smsim-for-abm.Rmd ├── 13-appendix.Rmd ├── 14-glossary.Rmd ├── 15-references.Rmd ├── DESCRIPTION ├── LICENSE ├── Makefile ├── NAMESPACE ├── NetLogo ├── NLv01.R ├── SimpleWorldVersion1.nlogo ├── SimpleWorldVersion2.nlogo ├── SimpleWorldVersion3.nlogo ├── SimpleWorldVersion4.nlogo ├── agents.csv ├── ints_df.RData └── multiSimRun.R ├── README.Rmd ├── README.md ├── additional-material ├── 13-additional.Rmd ├── input-data-mipfp.csv ├── input-data-mipfp.csv.ods └── reformatting-mipfp-example.Rmd ├── backup-code ├── .Rapp.history ├── CakeMap-dataknut.R └── tests.R ├── bibliography.bib ├── build.R ├── cache-data-prep.RData ├── cache-smsim-in-R.RData ├── code ├── CakeMap.R ├── CakeMapInts.R ├── CakeMapMipfpCon1Convert.R ├── CakeMapMipfpData.R ├── CakeMapPlot.R ├── CakeMapTimeAnalysis.R ├── CakeMapWithMipfp.R ├── ConvertIpfpWeights.R ├── SimpleWorld.R ├── bbuild.R ├── book-functions.R ├── build-CRC-version.R ├── functions.R ├── gregwt.R ├── ipfpMultiDim.R ├── optim-cakeMap.R ├── optim-tests-SimpleWorld.R └── parallel-ipfp.R ├── courses └── course-info-3day.Rmd ├── data ├── Belgium │ ├── BelgiqueConting.txt │ ├── ContrainteAge.txt │ ├── ContrainteDipl.txt │ ├── ContrainteGenre.txt │ ├── ContrainteStatut.txt │ ├── HH_cons_INS92094 │ └── HH_sample ├── CakeMap │ ├── age-sex-raw.csv │ ├── area-cat.R │ ├── cars-raw.csv │ ├── categorise.R │ ├── cons.csv │ ├── inc-est-2001.csv │ ├── ind.csv │ ├── load-all.R │ ├── nssec-raw.csv │ ├── process-age.R │ ├── process-car.R │ ├── process-nssec.R │ └── wards.RData └── SimpleWorld │ ├── age.csv │ ├── ind-full.csv │ ├── ind.csv │ └── sex.csv ├── deploy.sh ├── elsevier-harvard.csl ├── figures ├── Belgium │ ├── BadSize.png │ ├── CM_ENF.png │ ├── Couples.png │ ├── NonAssigne.png │ ├── diplome.png │ ├── diplome_statut.png │ └── statut.png ├── CakeMap-lores.png ├── Couple_SE.png ├── HH-CO.png ├── HHCouplesBelgium.png ├── HHCouplesNamur.jpg ├── IllustrationCouples.png ├── Jojo.png ├── Jojo_JASS.png ├── Jojo_JASS2.png ├── RandomUnif100000.png ├── TAEOptim_GenSA_Mo.pdf ├── TAEOptim_GenSA_Mo.png ├── TRESISModels.png ├── TimeCakeMap.png ├── TimeOptim_GenSA_Mo.pdf ├── TimeOptim_GenSA_Mo.png ├── Trafic_Jojo2.png ├── Trafic_jojo.png ├── agri-example-hynes-2008.png ├── austerity.png ├── co-vs-ipf-schema.png ├── cover-image.jpg ├── fit-obs-sim-simple-5.png ├── fsimple1.png ├── history01.png ├── incomeCake.png ├── integerisation-algorithms.png ├── jtg.png ├── msim-flow.png ├── msim-schema.png ├── nl-chooser.png ├── nl-graphics-window.png ├── nl-income-boxplots.png ├── nl-plots.png ├── nl-simpleworld-negotiating.png ├── nl-simpleworld-populated.png ├── nl-simpleworld.png ├── nl-sliders.png ├── nl-ticks.png ├── nl-zones.png ├── optim-its.png ├── optim-time.png ├── raw-data-screenshot.jpeg ├── rstudio-autocomplete.png ├── rstudio-environment.png ├── simPop-results-eg.png ├── simpleworld-1.png ├── studio-basic.png ├── vingtile.png └── why-msim-maup.png ├── fractional_weights ├── BA-MakeCakeSimFractional.R ├── BA-process-final_micro_fractional_cakes_geo.do ├── README.md └── cakes_geo.csv ├── frontmatter ├── pream.tex └── preface.tex ├── index.Rmd ├── krantz.cls ├── notes ├── BA-notes.md ├── L1.Rmd ├── mipfp-notes.R ├── seville-notes.R └── simPop-notes.R ├── output ├── .gitignore └── ints_df.csv ├── slides ├── Applying-IPF-and-CO.Rmd ├── SM-for-ABM.Rmd ├── SM-without-microdata.Rmd ├── introduction.Rmd ├── r-rstudio-practical.Rmd ├── simpop-intro.Rmd └── spatial-microdata-in-r.Rmd ├── sms-book-citation.bib ├── spatial-microsim-book.Rproj └── www ├── .gitignore ├── bootstrap-theme.min.css ├── bootstrap.min.css ├── bootstrap.min.js ├── glyphicons-halflings-white.png ├── glyphicons-halflings.png ├── highlight.css └── toc.js /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitattribrutes: -------------------------------------------------------------------------------- 1 | README.md merge=ours 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | spatial-microsim-book* 2 | backup-code/ 3 | figures/CakeMap.png 4 | *.tex 5 | *.aux 6 | *.orig 7 | *.docx 8 | .Rproj.user 9 | *.pdf 10 | .history 11 | *.backup 12 | *.md~ 13 | *.toc 14 | *.kilepr 15 | .Rhistory 16 | .RData 17 | *.gz 18 | ggmap* 19 | .Rproj.user 20 | *.cls 21 | book/ 22 | _site/ 23 | temp.* 24 | book.Rmd 25 | *.html 26 | drafts/ 27 | .dropbox 28 | *.out 29 | comments.ods 30 | cache-CakeMap.RData 31 | .~lock.book.docx# 32 | desktop.ini 33 | *.log 34 | book.odt 35 | frontmatter/rough-drafts/stackXquestion.R 36 | *.bak 37 | master.zip 38 | BEL_adm4.rds 39 | libs 40 | _book 41 | _bookdown_files 42 | _main* 43 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | dist: trusty 4 | sudo: required 5 | warnings_are_errors: false 6 | before_install: 7 | - sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable --yes 8 | - sudo add-apt-repository ppa:opencpu/jq --yes 9 | - sudo apt-get --yes --force-yes update -qq 10 | - sudo apt-get install --yes libudunits2-dev libproj-dev libgeos-dev libgdal-dev libv8-dev 11 | - sudo apt-get install --yes libjq-dev libprotobuf-dev libprotoc-dev protobuf-compiler 12 | 13 | r_packages: 14 | - rgdal 15 | - sf 16 | - devtools 17 | 18 | script: 19 | - R CMD INSTALL ../spatial-microsim-book/ 20 | - Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::gitbook", clean = FALSE)' 21 | 22 | after_success: 23 | - test $TRAVIS_PULL_REQUEST == "false" && test $TRAVIS_BRANCH == "master" && bash deploy.sh 24 | 25 | notifications: 26 | email: 27 | on_success: change 28 | on_failure: change 29 | -------------------------------------------------------------------------------- /13-appendix.Rmd: -------------------------------------------------------------------------------- 1 | # Appendix: Getting up-to-speed with R {#apR} 2 | 3 | As mentioned in Chapter 1, R is a general purpose programming 4 | language focussed on data analysis and modelling. This small tutorial aims to 5 | teach the basics of R, from the perspective of spatial microsimulation research. 6 | It should also be useful to people with existing R skills, to re-affirm their 7 | knowledge base and see how it is applicable to spatial microsimulation. 8 | 9 | R's design is built on the idea that everything that exists is an object and everything 10 | that happens is a function. It is a *vectorised*, *object orientated* and 11 | *functional* programming language (Wickham 2014). This means that R 12 | understands vector algebra, all data accessible to R resides in a number of 13 | named objects and functions must be used to modify objects. We will 14 | look at each of these in some code below. 15 | 16 | ## R understands vector algebra {#vector-alg} 17 | 18 | A vector is simply an ordered list of numbers (Beezer 2008). 19 | Imagine two vectors, each consisting of 3 elements: 20 | 21 | $$a = (1,2,3); b = (9,8,6) $$ 22 | 23 | To say that R understands vector algebra is to say that it knows how to 24 | handle vectors in the same way a mathematician does: 25 | 26 | $$a + b = (a_1 + b_1, a_2 + b_2, c_3 + c_3 ) = (10,10,9) $$ 27 | 28 | This may not seem remarkable, but it is. Most programming 29 | languages are not vectorised, so they would see $a + b$ differently. 30 | In Python, for example, this is the answer we get:^[We can 31 | get the right answer in Python, by typing the following: 32 | `import numpy; a=numpy.array([1,2,3]); b=numpy.array([9,8,6]); a+b`.] 33 | 34 | ```{r, engine='python', eval=FALSE} 35 | a = [1,2,3] 36 | b = [9,8,6] 37 | print(a + b) 38 | ``` 39 | 40 | `## [1, 2, 3, 9, 8, 6]` 41 | 42 | In R, the operation *just works*, intuitively: 43 | 44 | ```{r} 45 | a <- c(1, 2, 3) 46 | b <- c(9, 8, 6) 47 | a + b 48 | ``` 49 | 50 | This conciseness is clearly very useful in spatial microsimulation, as numeric 51 | variables of the same length are common (e.g. the attributes of individuals in a 52 | zone) and can be acted on with a minimum of effort. 53 | 54 | ## R is object orientated {#R-object} 55 | 56 | In R, everything that exists is an object with a name and a class. This is 57 | useful, because R's functions know automatically how to behave differently on 58 | different objects depending on their class. 59 | 60 | To illustrate the point, let's create two objects, each with a different class 61 | and see how the function `summarise` behaves differently, depending on the type. 62 | This behaviour is *polymorphism* [@Matloff2011]: 63 | 64 | ```{r} 65 | # Create a character and a vector object 66 | char_obj <- c("red", "blue", "red", "green") 67 | num_obj <- c(1, 4, 2, 532.1) 68 | 69 | # Summary of each object 70 | summary(char_obj) 71 | summary(num_obj) 72 | 73 | # Summary of a factor object 74 | fac_obj <- factor(char_obj) 75 | summary(fac_obj) 76 | ``` 77 | 78 | In the example above, the output from `summary` for the numeric object `num_obj` 79 | was very different from that of the character vector `char_obj`. Note that 80 | although the same information was contained in `fac_obj` (a factor), the output 81 | from `summary` changes again. 82 | 83 | Note that objects can be called almost anything in R with the exceptions of 84 | names beginning with a number or containing operator symbols such as `-`, `^` 85 | and brackets. It is good practice to think about what the purpose of an object 86 | is before naming it: using clear and concise names can save you a huge amount of 87 | time in the long run. 88 | 89 | 90 | ## Subsetting in R {#subsetting} 91 | 92 | R has powerful, concise and (over time) intuitive methods for taking subsets of 93 | data. Using the SimpleWorld example we loaded in *Data preparation*, 94 | let's explore the `ind` object in more detail, to see 95 | how we can select the parts of an object we are most interested in. As before, 96 | we need to load the data: 97 | 98 | ```{r} 99 | ind <- read.csv("data/SimpleWorld/ind.csv") 100 | ``` 101 | 102 | Now, it is easy from within R to call a single individual (e.g. individual 3) 103 | using the square bracket notation: 104 | 105 | ```{r} 106 | ind[3,] 107 | ``` 108 | 109 | The above example takes a subset of `ind` all elements present on the 3rd row: 110 | for a 2 dimensional table, anything to the left of the comma refers to rows and 111 | anything to the right refers to columns. Note that `ind[2:3,]` and 112 | `ind[c(3,5),]` also take subsets of the `ind` object: the square brackets can 113 | take *vector* inputs as well as single numbers. 114 | 115 | We can also subset by columns: the second dimension. Confusingly, this can be 116 | done in four ways, because `ind` is an R `data.frame`^[This can be ascertained 117 | by typing `class(ind)`. It is useful to know the class of different R objects, 118 | so make good use of the `class()` function.] and a data frame can behave 119 | simultaneously as a list, a matrix and a data frame (only the results of the 120 | first are shown): 121 | 122 | ```{r} 123 | ind$age # data.frame column name notation I 124 | # ind[, 2] # matrix notation 125 | # ind["age"] # column name notation II 126 | # ind[[2]] # list notation 127 | # ind[2] # numeric data frame notation 128 | ``` 129 | 130 | It is also possible to subset cells by both rows and columns simultaneously. 131 | Let us select query the gender of the 4th individual, as an example 132 | (pay attention to the relative location of the comma inside the square brackets): 133 | 134 | ```{r} 135 | ind[4, 3] # The attribute of the 4th individual in column 3 136 | ``` 137 | 138 | A commonly used trick in R that helps with the analysis of individual level data 139 | is to subset a data frame based on one or more of its variables. Let's subset 140 | first all females in our dataset and then all females over 50: 141 | 142 | ```{r} 143 | ind[ind$sex == "f", ] 144 | ind[ind$sex == "f" & ind$age > 50, ] 145 | ``` 146 | 147 | In the above code, R uses relational operators of equality (`==`) and inequality 148 | (`>`) which can be used in combination using the `&` symbol. This works because, 149 | as well as integer numbers, one can also place *boolean* variables into square 150 | brackets: `ind$sex == "f"` returns a binary vector consisting solely of `TRUE` 151 | and `FALSE` values.^[Thus, yet another way to invoke the 2nd column of `ind` is 152 | the following: `ind[c(F, T, F)]`! Here, `T` and `F` are shorthand for "TRUE" and 153 | "FALSE" respectively.] 154 | 155 | ## Further R resources {#further} 156 | 157 | The above tutorial should provide a sufficient grounding in R for beginners to 158 | understand the practical examples in the book. However, R is a deep language 159 | and there is much else to learn that will be of benefit to your modelling 160 | skills. There are many excellent books and tutorials that teach the fundamentals 161 | of R for a variety of applications. 162 | The following resources, in ascending order of difficulty, 163 | are highly recommended: 164 | 165 | - *Introduction to visualising spatial data in R* (Lovelace and Cheshire 2014) 166 | provides an introductory tutorial on handling spatial data in R, including the 167 | administrative zone data which often form the building blocks of spatial microsimulation 168 | models in R. 169 | - *Introduction to scientific programming and simulation using R* 170 | (Jones et al. 2014) is an 171 | accessible and highly practical course that will form a solid foundation 172 | for a range of modelling applications, including spatial microsimulation. 173 | - *An Introduction to R* (Venables et al. 2014) 174 | is the foundational introductory R manual, written by the 175 | software's core developers and is available on-line for free. 176 | It is terse and covers some advanced topics, but 177 | provides a useful reference on the fundamentals of R as a language. 178 | - *Advanced R* 179 | (Wickham 2014) (http://www.crcpress.com/product/isbn/9781466586963) 180 | delves into the heart 181 | of the R language. It contains many advanced topics, but the introductory 182 | chapters are straightforward. Browsing some of the pages on 183 | Advanced R's website (http://adv-r.had.co.nz/) and 184 | trying to answer the questions that open each chapter 185 | provides a taste of the book and an excellent 186 | way of testing and improving one's understanding of the R language. 187 | 188 | ```{r, echo=F} 189 | # There are alternatives to R and in the next section we will consider a few of these. 190 | ``` 191 | -------------------------------------------------------------------------------- /14-glossary.Rmd: -------------------------------------------------------------------------------- 1 | # Glossary 2 | 3 | - **Algorithm**: a series of computer commands executed in a 4 | specific order for a pre-defined purpose. 5 | Algorithms process input data and produce outputs. 6 | 7 | - **Constraints** are variables used to estimate the number (or weight) 8 | of individuals in each zone. Also referred to by the longer name of 9 | **constraint variable**. We tend to use the term **linking variable** 10 | in this book because they *link* aggregate and individual level datasets. 11 | 12 | - **Combinatorial optimisation** is an approach to spatial 13 | microsimulation that generates spatial microdata by randomly 14 | selecting individuals from a survey dataset and measuring the fit 15 | between the simulated output and the constraint variables. If the 16 | fit improves after any particular change, the change is kept. 17 | Williamson (2007) provides a practical user manual. @Harland2013 18 | provides a practical demonstration of the method implemented in 19 | the Java-based Flexible Modelling Framework (FMF). 20 | 21 | - **Data frame**: a type of object (formally referred to as a class) 22 | in R, data frames are square tables composed of rows and columns of 23 | information. As with many things in R, the best way to understand 24 | data frames is to create them and experiment. The following creates 25 | a data frame with two variables: name and height: 26 | 27 | Note that each new variable is entered using the command `c()` this is 28 | how R creates objects with the *vector* data class, a one 29 | dimensional matrix — and that text data must be entered in quote 30 | marks. 31 | 32 | - **Deterministic reweighting** is an approach to generating spatial 33 | microdata that allocates fractional weights to individuals based on 34 | how representative they are of the target area. It differs from 35 | combinatorial optimisation approaches in that it requires no random 36 | numbers. The most frequently used method of deterministic 37 | reweighting is IPF. 38 | 39 | - **For loops** are instructions that tell the computer to run a 40 | certain set of command repeatedly. `for(i in 1:9) print(i)`, for 41 | example will print the value of i 9 times. The best way to further 42 | understand for loops is to try them out. 43 | 44 | - **Iteration**: one instance of a process that is repeated many times 45 | until a predefined end point, often within an *algorithm*. 46 | 47 | - **Iterative proportional fitting** (IPF): an iterative process 48 | implemented in mathematics and algorithms to find the maximum 49 | likelihood of cells that are constrained by multiple sets of 50 | marginal totals. To make this abstract definition even more 51 | confusing, there are multiple terms which refer to the process, 52 | including ‘biproportional fitting’ and ‘matrix raking’. In plain 53 | English, IPF in the context of spatial microsimulation can be 54 | defined as *a statistical technique for allocating weights to 55 | individuals depending on how representative they are of different 56 | zones*. IPF is a type of deterministic reweighting, meaning that 57 | random numbers are not needed to generate the result and that the 58 | output weights are real (not integer) numbers. 59 | 60 | - A **linking variable** is a variable that is shared between individual and 61 | aggregate level data. Common examples include age and sex (the linking variables 62 | used in the SimpleWorld example): questions that are commonly asked in all 63 | kinds of survey. Linking variables are also referred to as 64 | **constraint variables** because they *constrain* the weights for individuals 65 | in each zone. 66 | 67 | - **Microdata** is the non-geographical individual level dataset from which 68 | synthetic **spatial microdata** are usually derived. This sample of the 69 | target population has also been labelled as the 'seed' 70 | (e.g. Barthelemy and Toint, 2012) and simply the 'survey data' in the academic 71 | literature. The term microdata is used in this book for its brevity and 72 | semantic link to spatial microdata. 73 | 74 | - The **population base** roughly equivalent to the 'target population', 75 | used by statisticians to describe the population about whom they wish to 76 | draw conclusions based on a 'sample population'. 77 | The sample population, is the group of individuals who 78 | we have individual level data for. 79 | In aggregate level data, the **population base** is the 80 | complete set of individuals represented by the counts. 81 | A common example is the variable "Hours worked": 82 | only people aged 16 to 74 are generally thought of as working, so, if there is 83 | no `NA` (no answer) category, the population base is not the same as the total 84 | population of an area. A common problem faced by people using spatial microsimulation 85 | methods is incompatibility between aggregate constraints that use different 86 | population bases. 87 | 88 | - **Population synthesis** is the process of converting input data (generally 89 | non-geographical **microda** and geographically aggregated 90 | **constraint variables**) into **spatial microdata**. 91 | 92 | - **Spatial microdata** is the name given to individual level data allocated 93 | to mutually exclusive geographical zones (see Figure 5.1 above). Spatial 94 | microdata is useful because it provides multi level information, about the 95 | relationships between individuals and where they live. However, due to the 96 | high costs of large surveys and restrictions on the release of geocoded 97 | individual level data, spatial microdata is rarely available to researchers. 98 | To overcome this issue, most spatial microsimulation research employs methods 99 | of **population synthesis** to generate representative spatial microdata. 100 | 101 | - **Spatial microsimulation** is the name given to an approach to modelling that 102 | comprises a series of techniques that 103 | generate, analyse and model individual level data allocated to small 104 | administrative zones. Spatial microsimulation is an approach for 105 | understanding processes that operate on individual and geographical levels. 106 | 107 | - A **weight matrix** is a 2 dimensional array that links non-spatial 108 | *microdata* to geographical zones. Each row in the weight matrix represents 109 | an individual and each column represents a zone. Thus, in R notation, 110 | the weight matrix `w` has dimensions of `nrow(ind)` rows by `nrow(cons)` 111 | where `ind` and `cons` are the microdata and constraints respectively. 112 | The value of `w[i,j]` represents the extent to which individual `i` is 113 | representative of zone `j`. `sum(w)` is the total population of the study area. 114 | The weight matrix is an efficient way of storing spatial microdata because 115 | it does not require a new row for every additional individual in the study 116 | area. For a weight matrix to be converted into spatial microdata, all the 117 | values of the wieghts must be integers. The conversion of an integer weight 118 | matrix into an integer weight matrix is known as *integerisation*. 119 | 120 | ```{r, echo=FALSE} 121 | # Any words that are highlighted in the main text can go in here 122 | ``` 123 | -------------------------------------------------------------------------------- /15-references.Rmd: -------------------------------------------------------------------------------- 1 | # Bibliography {#bibliography} 2 | 3 | ```{r, echo=FALSE} 4 | # How to create: %s/\n/\r\r/gc in vim 5 | ``` 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: smsbook 2 | Title: Spatial Microsimulation with R: a book 3 | Version: 0.0.1 4 | Authors@R: c(person("Robin", "Lovelace", role = c("aut", "cre"), email = "rob00x@gmail.com"), 5 | person("Morgane", "Dumont", role = c("aut"))) 6 | Imports: 7 | bookdown, 8 | knitr, 9 | rmarkdown, 10 | png, 11 | ggmap, 12 | GREGWT, 13 | dplyr, 14 | ipfp, 15 | rgeos, 16 | mipfp, 17 | rgdal, 18 | gridExtra, 19 | maptools, 20 | jpeg, 21 | tmap, 22 | tidyr, 23 | mlogit, 24 | simPop, 25 | reticulate 26 | Remotes: 27 | emunozh/GREGWT 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Robin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | html: 2 | Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::gitbook", clean = FALSE)' 3 | cp -fvr _main.utf8.md _book/main.md 4 | # cp -fvr css/style.css _book/ 5 | # cp -fvr images _book/ 6 | 7 | build: 8 | make html 9 | Rscript -e 'browseURL("_book/index.html")' 10 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/NAMESPACE -------------------------------------------------------------------------------- /NetLogo/NLv01.R: -------------------------------------------------------------------------------- 1 | require(RNetLogo) 2 | require(dplyr) 3 | require(ggplot2) 4 | require(extrafont) 5 | loadfonts() 6 | 7 | ############ 8 | ## section 1 9 | ############ 10 | NLStart("C:\\Program Files (x86)\\NetLogo 5.1.0") 11 | NLLoadModel("C:/Users/sfos0247/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo") 12 | 13 | NLStart("/usr/local/netlogo-5.1.0") 14 | NLLoadModel("/home/mz/Documents/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo") 15 | 16 | NLCommand("setup") 17 | NLReport("ticks") 18 | NLCommand("go") 19 | NLReport("ticks") 20 | NLDoCommand(50,"go") 21 | NLReport("ticks") 22 | 23 | test <- NLDoReport(10,"go", c(" ticks", 24 | "count inhabitants with [zone = 1]", 25 | "count inhabitants with [zone = 2]", 26 | "count inhabitants with [zone = 3]"), 27 | as.data.frame = TRUE) 28 | head(test) 29 | current.state <- NLGetAgentSet(c("who","income", "zone"), 30 | "inhabitants") 31 | boxplot(current.state$income~current.state$zone, 32 | xlab="Zone", ylab="Income", main=paste("Income distribution after", 33 | NLReport("ticks"), "ticks" )) 34 | 35 | NLDoCommandWhile (" (ticks <= 100) " , "go") 36 | NLReport("ticks") 37 | 38 | NLQuit() 39 | 40 | ############ 41 | ## section 2 42 | ############ 43 | 44 | NLStart("/usr/local/netlogo-5.1.0", gui=FALSE) 45 | NLLoadModel("/home/mz/Documents/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo") 46 | 47 | SimpleWorld <- function(time.stable = 100) { 48 | NLCommand("setup") 49 | NLDoCommandWhile (paste("(count inhabitants with [zone = 1] < 33) and", 50 | "(count inhabitants with [zone = 2] < 33) and", 51 | "(count inhabitants with [zone = 3] < 33) and", 52 | "(time-stable <= ", time.stable, ") ") , "go") 53 | NLGetAgentSet("history", "inhabitants") 54 | } 55 | 56 | NLCommand("random-seed 42") 57 | inhabitant.histories <- SimpleWorld(50) 58 | NLQuit() 59 | 60 | ## analysis 61 | dim(inhabitant.histories)[1]*dim(inhabitant.histories)[2] 62 | history <- as.data.frame(matrix(unlist(inhabitant.histories), ncol=4, byrow=TRUE)) 63 | colnames(history) <- c("id", "tick","income", "zone") 64 | 65 | require(dplyr) 66 | changes <- group_by(history, id) %>% 67 | mutate( change=c(0,diff(zone))) %>% 68 | summarize(start.income = income[1], 69 | end.income = tail(income,1), 70 | income.change = end.income - start.income, 71 | zone.changes = sum(change != 0) 72 | ) 73 | 74 | par(oma=c(0.5,4,0.5,4), mar=c(4,1,1,2), mfrow=c(1,3), xpd=FALSE) 75 | plot(zone.changes ~ start.income , data=changes, axes=FALSE, xlab="") 76 | axis(1) 77 | axis(2) 78 | mtext("Starting income", 1, line=3) 79 | mtext( "Number of zone changes", 2, line=3) 80 | abline(lm(zone.changes ~ start.income , data=changes)) 81 | plot(zone.changes ~ end.income , data=changes, axes=FALSE, xlab="") 82 | axis(1) 83 | mtext( "Final income", 1, line=3) 84 | abline(lm(zone.changes ~ end.income , data=changes)) 85 | plot(zone.changes ~ income.change , data=changes, xlab="", axes=FALSE) 86 | axis(1) 87 | axis(4) 88 | mtext( "Net income gain", 1, line=3) 89 | mtext( "Number of zone changes", 4, line=3) 90 | abline(lm(zone.changes ~ income.change , data=changes)) 91 | 92 | 93 | ## section 3 94 | require(RNetLogo) 95 | require(dplyr) 96 | require(ggplot2) 97 | require(extrafonts) 98 | loadfonts() 99 | 100 | NLStart("C:\\Program Files (x86)\\NetLogo 5.1.0", gui=FALSE) 101 | NLLoadModel("C:/Users/sfos0247/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo/SimpleWorldVersion3.nlogo") 102 | setwd("C:/Users/sfos0247/Copy/Dropbox/XtraWork/spatial-microsim-book/NetLogo") 103 | 104 | 105 | SimpleWorld <- function(angle.of.vision=360, distance.of.vision=10, time.stable = 200) { 106 | NLCommand (paste("set average-bribeability", 100)) 107 | NLCommand (paste("set stdev-bribeability", 0)) 108 | NLCommand (paste("set angle-of-vision", angle.of.vision)) 109 | NLCommand (paste("set distance-of-vision", distance.of.vision)) 110 | NLCommand("setup") 111 | NLDoCommandWhile (paste("(time-stable <= ", time.stable, ") ") , "go") 112 | c(NLReport(c("ticks - time-stable", nrow(unique(NLGetAgentSet( "zone", "inhabitants")))))) 113 | } 114 | 115 | MultipleSimulations <- function (reps=1, a.o.v = 360, d.o.v = c(5,10)){ 116 | p.s <- expand.grid(rep = seq(1, reps), a.o.v = a.o.v, d.o.v = d.o.v) 117 | reslut.list <- lapply(as.list(1:nrow(p.s)), function(i) 118 | setNames(cbind(p.s[i,], SimpleWorld(p.s[i,2], p.s[i,3])), c("rep", "a.o.v", "d.o.v", "ticks", "zones"))) 119 | do.call(rbind, reslut.list) 120 | } 121 | 122 | MultipleSimulations(2,360,c(5,10)) 123 | 124 | # results.df <- MultipleSimulations2(20,seq(60,360,30),seq(1,10)) 125 | #save(results.df, file="multiSimRun.R") 126 | load("multiSimRun.R") 127 | head(results.df) 128 | 129 | 130 | 131 | # summaries for plots 132 | av.ticks2 <- results.df %>% 133 | group_by(a.o.v, d.o.v) %>% 134 | # filter(zones == 1) %>% 135 | summarize(mean.ticks = mean(ticks, na.rm=TRUE)) 136 | 137 | zones <- results.df %>% 138 | group_by(a.o.v, d.o.v, zones) %>% 139 | summarize(height = n()/20) %>% 140 | group_by(a.o.v, d.o.v) %>% 141 | arrange(desc(zones)) %>% 142 | mutate(shift=-0.5 + cumsum(height)-height + height/2) 143 | 144 | 145 | ## fig 10 146 | png(file="zones.png", height=450, width=750, family="Garamond") 147 | 148 | ggplot(zones, aes(a.o.v,y=d.o.v + shift, fill=as.factor(zones), height=height)) + 149 | geom_tile(col="white") + xlab('aov') + ylab('dov') + 150 | scale_fill_manual(values=c("gray30", "gray50", 151 | "gray80"), name="No of zones") + 152 | theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), 153 | panel.grid.minor = element_blank(),axis.text=element_text(size=16), 154 | title=element_text(size=16), 155 | legend.text=element_text(size=12), 156 | legend.key = element_rect(colour = "white")) + 157 | scale_x_continuous(breaks=seq(60,360,30) ) + 158 | scale_y_continuous(breaks=seq(1,10,1) ) + 159 | xlab("Angle of vision") + 160 | ylab("Distance of vision") + 161 | guides(fill = guide_legend(override.aes = list(colour = NULL))) 162 | dev.off() 163 | 164 | ##fig 11 165 | png(file="ticks.png", height=450, width=750, family="Garamond") 166 | ggplot(av.ticks, aes(a.o.v,y=d.o.v , fill=mean.ticks)) + 167 | geom_tile(col="white")+ 168 | theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), 169 | panel.grid.minor = element_blank(), axis.text=element_text(size=16), 170 | title=element_text(size=16), 171 | legend.text=element_text(size=12)) + 172 | scale_x_continuous(breaks=seq(60,360,30) ) + 173 | scale_y_continuous(breaks=seq(1,10) ) + 174 | scale_fill_gradient(name = "Tick count", trans = "log",low="gray80",high="gray20", 175 | breaks=c(50,100, 500, 1000, 5000)) + 176 | xlab("Angle of vision") + 177 | ylab("Distance of vision") 178 | dev.off() 179 | 180 | -------------------------------------------------------------------------------- /NetLogo/agents.csv: -------------------------------------------------------------------------------- 1 | 1,1,59,"m",2868 2 | 2,1,54,"m",2474 3 | 3,1,35,"m",2231 4 | 3,1,35,"m",2231 5 | 3,1,35,"m",2231 6 | 4,1,73,"f",3152 7 | 5,1,49,"f",2473 8 | 5,1,49,"f",2473 9 | 5,1,49,"f",2473 10 | 5,1,49,"f",2473 11 | 4,1,73,"f",3152 12 | 3,1,35,"m",2231 13 | 1,2,59,"m",2868 14 | 2,2,54,"m",2474 15 | 4,2,73,"f",3152 16 | 4,2,73,"f",3152 17 | 4,2,73,"f",3152 18 | 4,2,73,"f",3152 19 | 5,2,49,"f",2473 20 | 1,2,59,"m",2868 21 | 3,2,35,"m",2231 22 | 2,2,54,"m",2474 23 | 3,3,35,"m",2231 24 | 4,3,73,"f",3152 25 | 4,3,73,"f",3152 26 | 5,3,49,"f",2473 27 | 5,3,49,"f",2473 28 | 5,3,49,"f",2473 29 | 5,3,49,"f",2473 30 | 5,3,49,"f",2473 31 | 4,3,73,"f",3152 32 | 1,3,59,"m",2868 33 | 3,3,35,"m",2231 34 | -------------------------------------------------------------------------------- /NetLogo/ints_df.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/NetLogo/ints_df.RData -------------------------------------------------------------------------------- /NetLogo/multiSimRun.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/NetLogo/multiSimRun.R -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "figures/" 14 | ) 15 | is_online = curl::has_internet() 16 | ``` 17 | 18 | # Spatial Microsimulation with R 19 | 20 | [![DOI](https://zenodo.org/badge/20914/Robinlovelace/spatial-microsim-book.svg)](https://zenodo.org/badge/latestdoi/20914/Robinlovelace/spatial-microsim-book) [![Build Status](https://travis-ci.org/Robinlovelace/spatial-microsim-book.svg?branch=master)](https://travis-ci.org/Robinlovelace/spatial-microsim-book) 21 | 22 | This repository hosts the code and data used in *Spatial Microsimulation with R*, a book by [Robin Lovelace](http://robinlovelace.net/) and [Morgane Dumont](https://directory.unamur.be/staff/modumont), (with chapters [11](http://spatial-microsim-book.robinlovelace.net/tresis.html) and [12](http://spatial-microsim-book.robinlovelace.net/abm) contributed by [Richard Ellison](http://sydney.edu.au/business/staff/richard.ellison) and [Maja Založnik](https://www.oxfordmartin.ox.ac.uk/people/565) respectively). 23 | 24 | The book is now published and is available from [CRC Press](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548). 25 | 26 | We hope to continue to update the book as methods evolve and we encourage contributions on any part of the book, including: 27 | 28 | - Improvements to the text, e.g. clarifying unclear sentences, fixing typos (see guidance from [Yihui Xie](https://yihui.name/en/2013/06/fix-typo-in-documentation/)). 29 | - Changes to the code, e.g. to do things in a more efficient way. 30 | - Suggestions on content (see the project's [issue tracker](https://github.com/Robinlovelace/spatial-microsim-book/issues)). 31 | 32 | The latest version of the book can be viewed at 33 | [the book's homepage at spatial-microsim-book.robinlovelace.net](http://spatial-microsim-book.robinlovelace.net/). 34 | Anyone can contribute to this book [here](https://github.com/Robinlovelace/spatial-microsim-book). 35 | 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Spatial Microsimulation with R 3 | ============================== 4 | 5 | [![DOI](https://zenodo.org/badge/20914/Robinlovelace/spatial-microsim-book.svg)](https://zenodo.org/badge/latestdoi/20914/Robinlovelace/spatial-microsim-book) [![Build Status](https://travis-ci.org/Robinlovelace/spatial-microsim-book.svg?branch=master)](https://travis-ci.org/Robinlovelace/spatial-microsim-book) 6 | 7 | This repository hosts the code and data used in *Spatial Microsimulation with R*, a book by [Robin Lovelace](http://robinlovelace.net/) and [Morgane Dumont](https://directory.unamur.be/staff/modumont), (with chapter [10](http://spatial-microsim-book.robinlovelace.net/ha.html) contributed by [Johan Barthélemy](https://smart.uow.edu.au/people/UOW192467.html), chapter [11](http://spatial-microsim-book.robinlovelace.net/tresis.html) contributed by [Richard Ellison](http://sydney.edu.au/business/staff/richard.ellison) and [David Hensher](http://sydney.edu.au/business/staff/david.hensher) and chapter [12](http://spatial-microsim-book.robinlovelace.net/abm) contributed by [Maja Založnik](https://www.oxfordmartin.ox.ac.uk/people/565)). 8 | 9 | The book is now published and is available from [CRC Press](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548). 10 | 11 | We hope to continue to update the book as methods evolve and we encourage contributions on any part of the book, including: 12 | 13 | - Improvements to the text, e.g. clarifying unclear sentences, fixing typos (see guidance from [Yihui Xie](https://yihui.name/en/2013/06/fix-typo-in-documentation/)). 14 | - Changes to the code, e.g. to do things in a more efficient way. 15 | - Suggestions on content (see the project's [issue tracker](https://github.com/Robinlovelace/spatial-microsim-book/issues)). 16 | 17 | The latest version of the book can be viewed at [the book's homepage at spatial-microsim-book.robinlovelace.net](http://spatial-microsim-book.robinlovelace.net/). Anyone can contribute to this book [here](https://github.com/Robinlovelace/spatial-microsim-book). 18 | -------------------------------------------------------------------------------- /additional-material/13-additional.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Additional tools and techniques" 3 | layout: default 4 | bibliography: bibliography.bib 5 | --- 6 | 7 | # Additional tools and techniques {#additional} 8 | 9 | ```{r, echo=FALSE} 10 | 11 | ``` 12 | 13 | ## R packages for spatial microsimulation 14 | 15 | Most of the code presented so far --- 16 | with the exception of plotting commands based on **ggplot2**, 17 | data manipulation tools from **dplyr** and some packages offering 18 | niche functionality such as **ipfp** and **RNetLogo** --- has been implemented in R's base functions. 19 | This is deliberate: base R offers robustness of 20 | code and minimisation of installation dependencies. 21 | There are often dozens of ways of doing one thing 22 | in R, and a bewildering number of packages offering to help. 23 | The way that will be understood by the greatest number 24 | of people and work on the maximum number of computers 25 | is often preferable, however, unless there is a 26 | clear advantage to using additional packages. 27 | The behaviour of contributed packages 28 | (excepting the `r-recommended` [packages](see http://cran.r-project.org/bin/linux/debian/README)) 29 | may change unexpectedly, 30 | whereas core R functions are likely to remain stable over many decades to come. 31 | 32 | With this caveat in mind, let's begin our tour of 33 | some contributed R that are of use for 34 | spatial microsimulation. 35 | 36 | ### **GREGWT** 37 | 38 | ### **sms** 39 | 40 | ### **multilevel** 41 | 42 | ## The Flexible Modelling Framework (FMF) 43 | 44 | ## Allocation of home-work locations 45 | 46 | A question that arises after spatial microdata have been allocated to 47 | geographical zones is: where exactly in the zone do the people inhabit? 48 | For some applications this may not matter but for others, such as disaster 49 | management and 50 | or transport planning, the precise location of an individual 51 | is important [@Smith2014; @Hanaoka2014]. 52 | 53 | ```{r, echo=FALSE} 54 | # TODO: add reference for above 55 | ``` 56 | 57 | In this section we will see how spatial microdata can be allocated first 58 | to 'urban areas' (to prevent people being placed in the sea, for example) 59 | and then to individual buildings based on freely available Open Street Map 60 | data. This process also makes sense from the perspective of visualisation: 61 | the typical choropleth map outputs of spatial microsimulation models 62 | over-represent low density rural areas and under-represent dense 63 | urban areas in terms of visual 'real-estate'. Plotting attributes only 64 | in the buildings where they occur can help overcome this issue. 65 | 66 | ```{r, echo=FALSE} 67 | # TODO: add figure from O'Brien's online map 68 | ``` 69 | 70 | ## Spatial interaction modelling 71 | 72 | In this example we will demonstrate a method for 73 | evaluating the distributional impact of a new pathway 74 | in an urban setting. 75 | 76 | ```{r} 77 | ## Advanced applications in agent-based modelling 78 | ``` 79 | 80 | -------------------------------------------------------------------------------- /additional-material/input-data-mipfp.csv: -------------------------------------------------------------------------------- 1 | Health,Age Group, White, Mixed, Asian and Asian British, Black or Black British, Chinese or Other ethnic group,Total 2 | Very good health, 0 to 15,67315,4151,7283,3844,987,83580 3 | , 16 to 24,35665,1403,4387,1706,540,43701 4 | , 25 to 34,34106,948,4919,1865,753,42591 5 | , 35 to 44,32047,516,2759,1650,445,37417 6 | , 45 to 54,26311,317,1184,980,171,28963 7 | , 55 to 64,16263,88,525,293,92,17261 8 | , 65 to 74,7897,38,143,89,18,8185 9 | , 75 and over,3170,12,55,31,5,3273 10 | ,Sum,222774,7473,21255,10458,3011,264971 11 | Good health, 0 to 15,14044,1047,2709,1046,321,19167 12 | , 16 to 24,16018,687,2146,710,260,19821 13 | , 25 to 34,21479,748,3444,1065,457,27193 14 | , 35 to 44,25922,433,2950,1190,407,30902 15 | , 45 to 54,28949,345,1982,1044,253,32573 16 | , 55 to 64,26483,130,1178,355,117,28263 17 | , 65 to 74,19747,76,474,221,67,20585 18 | , 75 and over,12868,42,201,110,19,13240 19 | ,Sum,165510,3508,15084,5741,1901,191744 20 | Fair health, 0 to 15,1699,153,323,144,41,2360 21 | , 16 to 24,2438,115,280,120,33,2986 22 | , 25 to 34,3602,140,563,171,78,4554 23 | , 35 to 44,6017,161,760,323,112,7373 24 | , 45 to 54,9288,137,897,384,112,10818 25 | , 55 to 64,12566,80,787,191,92,13716 26 | , 65 to 74,13323,53,540,252,57,14225 27 | , 75 and over,17776,55,359,223,35,18448 28 | ,Sum,66709,894,4509,1808,560,74480 29 | Bad health, 0 to 15,366,36,72,30,9,513 30 | , 16 to 24,443,15,55,18,4,535 31 | , 25 to 34,917,25,105,54,36,1137 32 | , 35 to 44,1951,45,191,90,43,2320 33 | , 45 to 54,3408,66,249,124,53,3900 34 | , 55 to 64,4554,27,355,76,23,5035 35 | , 65 to 74,4123,17,235,92,18,4485 36 | , 75 and over,6307,25,178,99,24,6633 37 | ,Sum,22069,256,1440,583,210,24558 38 | Very bad health, 0 to 15,148,5,24,13,4,194 39 | , 16 to 24,132,5,19,10,4,170 40 | , 25 to 34,223,12,34,11,8,288 41 | , 35 to 44,520,7,50,27,11,615 42 | , 45 to 54,970,20,81,44,11,1126 43 | , 55 to 64,1250,18,82,23,14,1387 44 | , 65 to 74,1201,4,57,24,11,1297 45 | , 75 and over,1971,7,77,44,8,2107 46 | ,Sum,6415,78,424,196,71,7184 47 | Total, 0 to 15,83572,5392,10411,5077,1362,105814 48 | , 16 to 24,54696,2225,6887,2564,841,67213 49 | , 25 to 34,60327,1873,9065,3166,1332,75763 50 | , 35 to 44,66457,1162,6710,3280,1018,78627 51 | , 45 to 54,68926,885,4393,2576,600,77380 52 | , 55 to 64,61116,343,2927,938,338,65662 53 | , 65 to 74,46291,188,1449,678,171,48777 54 | , 75 and over,42092,141,870,507,91,43701 55 | ,Sum,483477,12209,42712,18786,5753,562937 56 | -------------------------------------------------------------------------------- /additional-material/input-data-mipfp.csv.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/additional-material/input-data-mipfp.csv.ods -------------------------------------------------------------------------------- /additional-material/reformatting-mipfp-example.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Load data 4 | 5 | ```{r} 6 | ctabs <- read.csv("additional-material/input-data-mipfp.csv") 7 | ctabs 8 | ``` 9 | 10 | 11 | 12 | ```{r, echo=FALSE} 13 | pkgs <- c("readxl") 14 | lapply(pkgs, library, character.only = T) 15 | 16 | df <- read_excel("/tmp/Case Study 3 Data.xlsx", sheet = 2) 17 | df <- df[1:48,] 18 | head(df) 19 | 20 | n1 <- as.character(df[2,3:7]) # first set of names 21 | 22 | 23 | # remove sums 24 | d <- df 25 | sel <- df[,2] == "Sum" 26 | d <- d[!sel,] 27 | d <- d[-c(1:2)] 28 | d <- d[1:5] 29 | d <- d[-c(1, nrow(d)),] 30 | d <- d[-1,] 31 | 32 | # convert to multi-dimensional array 33 | dnames <- c("age", "eth", "health") 34 | names 35 | dims <- c(8, 5, 5) 36 | ``` 37 | 38 | ```{r, eval=FALSE} 39 | # dvec <- as.numeric(d) 40 | # apply(dvec, MARGIN = 2, FUN = class) 41 | dvec <- apply(X = d, MARGIN = 2, FUN = as.numeric) 42 | 43 | seed <- array(data = dvec, dim = dims, dimnames = dnames) 44 | 45 | # marginal 46 | 47 | # read-in the data 48 | 49 | # for zone 1 50 | target.data <- list(c1, c2, c3) # each c is marginal for 1 constraint and 1 zone 51 | Ipfp() # for zone 1 52 | 53 | result <- as.list(1:348) 54 | result[[1]] # 55 | for(i in 1:nrow(constraints)){ 56 | target.data <- list(c1 = ..., 57 | ) 58 | result[[i]] <- Ipfp() 59 | } 60 | 61 | ``` 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /backup-code/.Rapp.history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/backup-code/.Rapp.history -------------------------------------------------------------------------------- /backup-code/CakeMap-dataknut.R: -------------------------------------------------------------------------------- 1 | ############################################ 2 | #### From the spatial-microsim-book project 3 | #### https://github.com/Robinlovelace/spatial-microsim-book 4 | ############################################ 5 | 6 | # Additions from Ben Anderson (@dataknut) 7 | 8 | # Loading the data: Ensure R is in the right working directory 9 | ind <- read.csv("data/CakeMap/ind.csv") 10 | cons <- read.csv("data/CakeMap/cons.csv") 11 | 12 | # Take a quick look at the data 13 | head(ind) 14 | head(cons) 15 | 16 | # load constraints separately - normally this would be first stage 17 | con1 <- cons[1:12] # load the age/sex constraint 18 | con2 <- cons[13:14] # load the car/no car constraint 19 | con3 <- cons[15:24] # socio-economic class 20 | 21 | cat_labs <- names(cons) # category names, from correct from cons.R 22 | 23 | # set-up aggregate values - column for each category 24 | source("data/CakeMap/categorise.R") # this script must be customised to input data 25 | 26 | # check constraint totals - should be true 27 | sum(ind_cat[,1:ncol(con1)]) == nrow(ind) # is the number in each category correct? 28 | sum(ind_cat[,ncol(con1)+1:ncol(con2)]) == nrow(ind) 29 | 30 | # create 2D weight matrix (individuals, areas) 31 | weights <- array(NA, dim=c(nrow(ind),nrow(cons))) 32 | 33 | # convert survey data into aggregates to compare with census (3D matix) 34 | ind_agg <- matrix(colSums(ind_cat), nrow(cons), ncol(cons), byrow = T) 35 | ind_agg[1:5,1:10] # look at what we've created - n. individuals replicated throughout 36 | 37 | ############## The IPF part ############# 38 | 39 | library(ipfp) 40 | cons <- apply(cons, 2, as.numeric) 41 | ind_catt <- t(ind_cat) 42 | # set up initial vector as a load of 1s 43 | x0 <- rep(1, nrow(ind)) 44 | # you can use x0 as a way to start from the original survey weights 45 | # as it just has to be a numeric initial vector (length ncol) 46 | # this might be useful if you have a small number of constraints but 47 | # if you have many the effect of the IPF will tend to drown them out 48 | 49 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, 20)) 50 | 51 | ### Convert back to aggregates for testing 52 | for (i in 1:nrow(cons)){ # convert con1 weights back into aggregates 53 | ind_agg[i,] <- colSums(ind_cat * weights[,i])} 54 | 55 | # test results for first row (not necessary for model) 56 | # you could iterate over this to test each zone 57 | ind_agg[1,1:15] - cons[1,1:15] # should be zero for final column - last constraint 58 | # which should remind us that IPF works to an order - so the last constraint is 59 | # fitted perfectly. This might matter if you think other constraints should be fitted perfectly... 60 | cor(as.numeric(ind_agg), as.numeric(cons)) # fit between contraints and estimate 61 | 62 | # at this point RL wants to integrise to create a spatial microdataset of whole 'units' 63 | # But we don't have to - for many applications we may want to keep all the survey units (people or households) 64 | # with their fractional weights to avoid losing information. It also helps if we're interested in distributional 65 | # statistics for each area. 66 | 67 | # to do this simply join the weights matrix back on to the original individual data 68 | # we have to assume R has kept them in the correct order! 69 | 70 | # just do a column bind 71 | ind_final <- cbind(ind,weights) 72 | View(ind_final) 73 | # so now we have a weight for each individual for each zone and from here on we can do 74 | # a range of weighted statistics or collapse to tables by zone etc etc 75 | # Would be a good idea at this point to rename the zone columns to their actual geography. -------------------------------------------------------------------------------- /backup-code/tests.R: -------------------------------------------------------------------------------- 1 | con_age <- read.csv("data/SimpleWorld/age.csv") 2 | con_sex <- read.csv("data/SimpleWorld/sex.csv") 3 | ind <- read.csv("data/SimpleWorld/ind.csv") 4 | 5 | (ind$age <- cut(ind$age, breaks = c(0, 49, 120), labels = c("a0_49", "a50+"))) 6 | 7 | names(con_age) <- levels(ind$age) # rename aggregate variables 8 | h 9 | # make the number of constraints larger - to see benefit of parallel processing 10 | cons <- cbind(con_age, con_sex) 11 | 12 | cat_age <- model.matrix(~ ind$age - 1) 13 | cat_sex <- model.matrix(~ ind$sex - 1)[, c(2, 1)] 14 | (ind_cat <- cbind(cat_age, cat_sex)) # combine flat representations of the data 15 | 16 | cons <- apply(cons, 2, as.numeric) # convert matrix to numeric data type 17 | # cons <- cons[sample(3, size = 500, replace = T),] 18 | 19 | weights <- matrix(data = NA, nrow = nrow(ind), ncol = nrow(cons)) 20 | 21 | ind_catt <- t(ind_cat) 22 | x0 <- rep(1, nrow(ind)) 23 | 24 | weights <- apply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), ind_catt, x0, tol = 1, maxit = 20)) 25 | 26 | # Tests of parallel implementation of ipf 27 | library(parallel) 28 | 29 | detectCores() 30 | cl <- makeCluster(getOption("cl.cores", 4)) 31 | cl <- clusterExport(cl, varlist = c("ipfp", "ind_cat", "ind")) 32 | 33 | parApply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), t(ind_cat), x0 = rep(1,nrow(ind)))) 34 | 35 | 36 | xArray <- array(NA, dim = c(3,3)) 37 | xMatrix <- matrix(NA, nrow = 3, ncol = 3) 38 | identical(xArray, xMatrix) 39 | 40 | for(i in 1:ncol(weights)){ 41 | weights[,i] <- ipfp(cons[i,], ind_catt, x0, maxit = 20) 42 | } 43 | 44 | # Demonstration of incorrect ipfp 45 | weights1 <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, tol = 1, maxit = 20)) 46 | weights2 <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, 20)) 47 | 48 | -------------------------------------------------------------------------------- /build.R: -------------------------------------------------------------------------------- 1 | # TODO for the book project overall 2 | # Individual chapters on eprints 3 | # Implement regex to make bibliography happen in CRC press style 4 | # Add urls to all the references and packages 5 | # Propensity to cycle 6 | # IPF in R/loglin/mipfp/GREGWT 7 | # Reference UrbanSim 8 | # Chapter summaries at outset? 9 | # Mention of collaborative project early on 10 | 11 | pkgs <- c("knitr", "rmarkdown", "png", "ggmap", "dplyr", "ipfp", "rgeos", "mipfp", "rgdal", "gridExtra", "maptools", "jpeg", "tmap", "tidyr", "mlogit", "simPop") 12 | wpacks <- pkgs %in% installed.packages() 13 | install.packages(pkgs[!wpacks]) 14 | 15 | # file.copy(from = "~/Documents/smr.bib", to = "bibliography.bib", overwrite = T) 16 | 17 | # # View the order chapters will be knitted (see code/book-functions.R) 18 | # # chap_ord <- c(7,16,10,5,12,2,8,4,13,14,15,11,1,3,6,9) 19 | # cfiles <- list.files(pattern = "*.Rmd$") 20 | # # cfiles <- cfiles[chap_ord] # chapter order 21 | # cfiles 22 | # 23 | # # Add book header 24 | # book_header = readLines(textConnection('--- 25 | # title: "Spatial microsimulation with R" 26 | # output: 27 | # \ \ pdf_document: 28 | # \ \ \ \ fig_caption: yes 29 | # \ \ \ \ highlight: monochrome 30 | # \ \ \ \ includes: null 31 | # \ \ \ \ keep_tex: yes 32 | # \ \ \ \ number_sections: yes 33 | # \ \ \ \ toc: yes 34 | # bibliography: bibliography.bib 35 | # csl: elsevier-harvard.csl 36 | # layout: default 37 | # ---')) 38 | # 39 | # source("code/book-functions.R") 40 | # # file.remove("book.Rmd") 41 | # # Rmd_bind(book_header = book_header) 42 | # Rmd_bind_mod(book_header = book_header) 43 | # 44 | # # Packages needed to build the book 45 | # # install.packages("knitr", "rmarkdown", "png", "ggmap", "dplyr", "ipfp") 46 | # library(knitr) 47 | # library(rmarkdown) 48 | # 49 | # # Build the book: 50 | # render("book.Rmd", output_format = "pdf_document") 51 | 52 | 53 | # Build the CRC-formated version - requires local files 54 | # need to build the .tex manually for references to compile 55 | # source("code/build-CRC-version.R") 56 | # # Make latex-specific changes automated 57 | # booktex <- readLines("spatial-microsim-book.tex") 58 | # booktex[grep("\\{Glossary\\}", booktex)] 59 | # booktex <- gsub(pattern = "chapter\\{Glossary\\}", "chapter*\\{Glossary\\}\n\\\\addcontentsline{toc}{chapter}{Glossary} 60 | # ", booktex) 61 | # writeLines(booktex, "spatial-microsim-book.tex") 62 | # in case index does not build - run again! 63 | # system("pdflatex --interaction=nonstopmode spatial-microsim-book.tex") 64 | 65 | # For website build see gh-pages version 66 | 67 | # Files to move to gh-pages branch 68 | # file.remove("book.Rmd") 69 | 70 | # Remove latex-specific document links for website 71 | # cfiles <- list.files("/tmp", pattern = "*.Rmd", full.names = T) 72 | # for(i in cfiles){ 73 | # text <- readLines(i) 74 | # sel <- grepl("\\(\\#", text) 75 | # text <- text[!sel] 76 | # writeLines(text, con = i) 77 | # } 78 | 79 | # # # regex with R - convert book ready for regexxing 80 | # d <- readLines("introduction.Rmd") 81 | # sel <- grep("@", d) 82 | # s <- d[sel] 83 | # gsub(".+?(?=a)", replacement = "", s, perl = T) # test of greedy matching 84 | # 85 | # # select quotes 86 | # 87 | # s <- grep(" @", d) 88 | # s <- grep("\\ @|\\[@", d) 89 | # d[s] 90 | 91 | # backup 92 | # system("cp -rv ~/Dropbox/spatial-microsim-book /media/robin/data/backups/") 93 | 94 | # command-line tools for dif tracking 95 | # latexdiff book-b4-comments.tex book.tex > dif.tex 96 | # pdflatex dif.tex 97 | 98 | -------------------------------------------------------------------------------- /cache-data-prep.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/cache-data-prep.RData -------------------------------------------------------------------------------- /cache-smsim-in-R.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/cache-smsim-in-R.RData -------------------------------------------------------------------------------- /code/CakeMap.R: -------------------------------------------------------------------------------- 1 | ############################################ 2 | #### From the spatial-microsim-book project 3 | #### https://github.com/Robinlovelace/spatial-microsim-book 4 | ############################################ 5 | 6 | library(dplyr) # load dplyr package for joining datasets 7 | 8 | # Loading the data: Ensure R is in the right working directory 9 | ind <- read.csv("data/CakeMap/ind.csv") 10 | cons <- read.csv("data/CakeMap/cons.csv") 11 | 12 | # Take a quick look at the data 13 | head(ind) 14 | head(cons) 15 | 16 | # Load constraints separately - normally this would be first stage 17 | con1 <- cons[1:12] # load the age/sex constraint 18 | con2 <- cons[13:14] # load the car/no car constraint 19 | con3 <- cons[15:24] # socio-economic class 20 | 21 | cat_labs <- names(cons) # category names, from correct from cons.R 22 | 23 | # Set-up aggregate values - column for each category 24 | source("data/CakeMap/categorise.R") # script to create binary dummy variables 25 | # Check constraint totals - should be true 26 | sum(ind_cat[,1:ncol(con1)]) == nrow(ind) # is the number in each category correct? 27 | sum(ind_cat[,ncol(con1)+1:ncol(con2)]) == nrow(ind) 28 | 29 | # Create 2D weight matrix (individuals, areas) 30 | weights <- array(NA, dim=c(nrow(ind),nrow(cons))) 31 | 32 | # Convert survey data into aggregates to compare with census 33 | ind_agg <- matrix(colSums(ind_cat), nrow(cons), ncol(cons), byrow = T) 34 | 35 | # Iterative proportional fitting (IPF) stage 36 | library(ipfp) # load the ipfp package -may need install.packages("ipfp") 37 | cons <- apply(cons, 2, as.numeric) # convert the constraints to 'numeric' 38 | ind_catt <- t(ind_cat) # transpose the dummy variables for ipfp 39 | x0 <- rep(1, nrow(ind)) # set the initial weights 40 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = 20)) 41 | 42 | ### Convert back to aggregates 43 | ind_agg <- t(apply(weights, 2, function(x) colSums(x * ind_cat))) 44 | 45 | # test results for first row (not necessary for model) 46 | ind_agg[1,1:15] - cons[1,1:15] # should be zero or close to zero 47 | cor(as.numeric(ind_agg), as.numeric(cons)) # fit between contraints and estimate 48 | 49 | # Integerise if integer results are required - open code/CakeMapInt.R to see how 50 | source("code/CakeMapInts.R") 51 | 52 | # Benchmarking 53 | # library(microbenchmark) 54 | # microbenchmark(source("CakeMap.R"), times = 1) # 2.05 s 55 | # # How long does this operation take in pure R? 56 | # old <- setwd("~/repos/smsim-course/") 57 | # microbenchmark(source("cMap.R"), times = 1) # 76.72 s 58 | # setwd(old) -------------------------------------------------------------------------------- /code/CakeMapInts.R: -------------------------------------------------------------------------------- 1 | # Script 'integerising' CakeMap weights, generating, exploring spatial microdata 2 | 3 | source("code/functions.R") # functions for spatial microsimulation, inc. int_trs 4 | 5 | ints <- unlist(apply(weights, 2, function(x) int_expand_vector(int_trs(x)))) # generate integerised result 6 | ints_df <- data.frame(id = ints, zone = rep(1:nrow(cons), round(colSums(weights)))) 7 | ind$id <- 1:nrow(ind) # assign each individual an id 8 | 9 | # Create spatial microdata, by joining the ids with associated attributes 10 | ints_df <- inner_join(ints_df, ind) # commented out to increase run-times 11 | 12 | # Exploration of individual-level variability in class by zone 13 | class(ints_df$NSSEC8) # what class is the class variable? 14 | ints_df$NSSEC <- as.numeric(ints_df$NSSEC8) # convert to numeric class 15 | ints_df$NSSEC[ ints_df$NSSEC > 10] <- NA # dealing with NA data 16 | sd_nssec <- aggregate(ints_df$NSSEC, by = list(ints_df$zone), FUN = sd, na.rm = TRUE) 17 | which.max(sd_nssec$x) # which zone has the greatest variability in NS-SEC groups 18 | -------------------------------------------------------------------------------- /code/CakeMapMipfpCon1Convert.R: -------------------------------------------------------------------------------- 1 | # Transform con1 into an 3D-array : con1_convert 2 | 3 | names <- c(list(rownames(cons)),dimnames(weight_init)[c(4,6)]) 4 | con1_convert <- array(NA, dim=c(nrow(cons),2,6), dimnames = names) 5 | 6 | for(zone in rownames(cons)){ 7 | for (sex in dimnames(con1_convert)$Sex){ 8 | for (age in dimnames(con1_convert)$ageband4){ 9 | con1_convert[zone,sex,age] <- con1[zone,paste(sex,age,sep="")] 10 | } 11 | } 12 | } 13 | 14 | # check margins per zone: 15 | table(rowSums(con1)==apply(con1_convert, 1, sum)) -------------------------------------------------------------------------------- /code/CakeMapMipfpData.R: -------------------------------------------------------------------------------- 1 | # Loading the CakeMap data 2 | # Changing the categories'names to have the same names in ind and cons 3 | 4 | 5 | # Loading the data: Ensure R is in the right working directory 6 | ind <- read.csv("data/CakeMap/ind.csv") 7 | cons <- read.csv("data/CakeMap/cons.csv") 8 | 9 | # Load constraints separately - normally this would be first stage 10 | con1 <- cons[1:12] # load the age/sex constraint 11 | con2 <- cons[13:14] # load the car/no car constraint 12 | con3 <- cons[15:24] # socio-economic class 13 | 14 | # Rename the categories in "ind" to correspond to the one of cons 15 | ind$Car <- sapply(ind$Car, FUN = switch, "Car", "NoCar") 16 | ind$Sex <- sapply(ind$Sex, FUN = switch, "m", "f") 17 | ind$NSSEC8 <- as.factor(ind$NSSEC8) 18 | levels(ind$NSSEC8) <- colnames(con3) 19 | ind$ageband4 <- gsub(pattern = "-", replacement = "_", x = ind$ageband4) 20 | 21 | -------------------------------------------------------------------------------- /code/CakeMapPlot.R: -------------------------------------------------------------------------------- 1 | # Script for plotting the output of cMap 2 | # Must be run after cMap.R and TRS-integerisation.R 3 | 4 | # Load the prerequisite packages - you may need to install these 5 | # E.g install.packages("ggplot2") 6 | x <- c("dplyr", "rgeos", "rgdal", "ggmap") 7 | lapply(x, library, character.only = T) 8 | 9 | # save geographic names to the cakes output 10 | geonames <- read.csv("data/CakeMap/cars-raw.csv") 11 | head(geonames) 12 | geonames <- as.character(geonames[3:126,2]) 13 | # NB: cakes.R must be run first 14 | source("code/CakeMap.R") 15 | cakes = as_data_frame(ind_agg) 16 | geocakes <- bind_cols(id = geonames, cakes) 17 | head(geocakes) 18 | 19 | # load the geographic data 20 | load("data/CakeMap/wards.RData") 21 | wards <- spTransform(wards, CRSobj=CRS("+init=epsg:4326")) # transform CRS 22 | 23 | # prepare to join the geographic data with cake data 24 | names(wards) 25 | names(geocakes)[1] <- names(wards)[1] <- "id" # rename geocakes' geonames for join 26 | head(geocakes) 27 | head(wards@data) 28 | head(left_join(wards@data, geocakes)) 29 | wards@data <- left_join(wards@data, geocakes) 30 | 31 | # fortify the data for ggplot2 32 | # you'll need to install.packages("rgeos") if not already installed 33 | wardsF <- fortify(wards, region="id") 34 | head(wardsF) 35 | names(wards) 36 | wardsF <- inner_join(wardsF, wards@data, by = "id") 37 | head(wardsF) # see average cake consumption added 38 | 39 | # set up bounding box 40 | bb <- b <- bbox(wards) 41 | bb[1, ] <- (b[1, ] - mean(b[1, ])) * 1.05 + mean(b[1, ]) 42 | bb[2, ] <- (b[2, ] - mean(b[2, ])) * 1.05 + mean(b[2, ]) 43 | b[1, ] <- (b[1, ] - mean(b[1, ])) * 1.7 + mean(b[1, ]) 44 | b[2, ] <- (b[2, ] - mean(b[2, ])) * 1.7 + mean(b[2, ]) 45 | 46 | # map the result! 47 | ggplot() + 48 | geom_polygon(data=wardsF, aes(long, lat, group=group, fill=avCake), color = "black", alpha=0.2) 49 | baseMap <- get_map(location=bb, maptype="terrain") 50 | # baseMap <- get_map(location=b, zoom=10, source='osm') 51 | # baseMap <- get_map(location=b, source='stamen') 52 | 53 | # ggmap(baseMap) + 54 | ggplot() + 55 | geom_polygon(data=wardsF, aes(long, lat, group=group, fill=avCake), alpha=0.5) + 56 | geom_path(data=wardsF, aes(long, lat, group=group), color="black", alpha=0.3) + 57 | scale_fill_continuous(low = "green", high = "red", name = "Simulated\naverage\nfreqency\nof cake\nconsumption\n(times/wk)") + xlim(bb[1,]) + ylim(bb[2,]) + 58 | theme_minimal() 59 | ## ggsave("figures/CakeMap.png") 60 | 61 | # analysis 62 | imd <- read.csv("data/CakeMap/inc-est-2001.csv") 63 | summary(imd$NAME %in% wards$NAME) 64 | summary(pmatch(wards$NAME, imd$NAME)) 65 | which(imd$NAME %in% wards$NAME) %in% pmatch(wards$NAME, imd$NAME) 66 | head(inner_join(wards@data, imd)) 67 | wards@data <- inner_join(wards@data, imd) 68 | plot(wards$Avinc, wards$avCake) 69 | cor(wards$Avinc, wards$avCake, use='complete.obs') 70 | 71 | # individual level analysis 72 | levels(ind$NCakes) 73 | ind$NCakes <- factor(ind$NCakes, c("<0.5", levels(ind$NCakes)[c(1,2,3,4)])) 74 | levels(ind$NCakes) 75 | ind$avnumcakes <- 1 76 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[1]] <- 0.1 77 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[2]] <- 0.5 78 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[3]] <- 1.5 79 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[4]] <- 4 80 | ind$avnumcakes[ind$NCakes == levels(ind$NCakes)[5]] <- 8 81 | summary(ind$avnumcakes) 82 | ind$NSSEC8 <- as.character(ind$NSSEC8) 83 | aggregate(ind$avnumcakes ~ ind$NSSEC8, FUN=mean) 84 | summary(ind$avnumcakes) 85 | mean(ind$avnumcakes[ ind$NSSEC8 == "1.1" | ind$NSSEC8 == "1.2" | ind$NSSEC8 == "2" ]) 86 | mean(ind$avnumcakes[ ind$NSSEC8 == "8" | ind$NSSEC8 == "7" | ind$NSSEC8 == "6" ]) 87 | 88 | (hm <- table(ind$NCakes, ind$NSSEC8)) 89 | heatmap(hm) 90 | heatmap(hm, Rowv=NA, Colv=NA) 91 | 92 | library(gplots) # for another kind of heat map 93 | 94 | heatmap.2(hm, Rowv=NA, Colv=NA, xlab = "Socio-economic class", ylab = "Frequency of cake consumption") 95 | hmm <- melt(hm) 96 | ggplot(hmm) + geom_tile(aes(Var1, as.character(Var2), fill = value)) + 97 | scale_fill_continuous(low="green", high="red") -------------------------------------------------------------------------------- /code/CakeMapTimeAnalysis.R: -------------------------------------------------------------------------------- 1 | # comparison of times to perform ipfp and mipfp 2 | 3 | Neval = 1 4 | 5 | times <- array(0,dim=c(2,Neval)) 6 | 7 | for (i in 1:Neval){ 8 | print(i) 9 | times[1,i] <- system.time(apply(cons_prop, 1, function(x) ipfp(x, ind_catt, x0, tol = 1e-12)))[1] 10 | 11 | times[2,i] <- system.time(Ipfp( weight_init, descript, target, print = FALSE, tol=1e-12))[1] 12 | } 13 | 14 | # with a problem of this size, ipfp seems to be better 15 | # we want to see how the times evolves if the individuals available are more 16 | 17 | ind_catt2 <- cbind(ind_catt, ind_catt, ind_catt) 18 | x02 <- cbind(x0,x0,x0) 19 | weight_init2 <- weight_init * 3 20 | 21 | 22 | minInd <- 200 23 | maxInd <- 2000 24 | pas <- 100 25 | 26 | times2 <- array(0,dim=c(3,ceiling((maxInd-minInd)/pas)+1)) 27 | i=1 28 | 29 | for (indiv in seq(minInd,maxInd,pas)){ 30 | print(indiv) 31 | times2[1,i] <- indiv 32 | times2[2,i] <- system.time(apply(cons_prop, 1, function(x) ipfp(x, ind_catt2[,1:indiv], x02[1:indiv], tol = 1e-12)))[1] 33 | 34 | times2[3,i] <- system.time(Ipfp( weight_init*indiv/916, descript, target, print = FALSE, tol=1e-12))[1] 35 | i=i+1 36 | } 37 | 38 | plot(times2[1,],times2[2,],pch=c(1),ylim=c(min(times2[2,])-1,max(times2[2,])+1), main= "Time to generate a weight matrix \n with tol=1e-12 ",xlab="Number of invididuals in the microdata",ylab="Time") 39 | par(new=TRUE) 40 | plot(times2[1,],times2[3,],pch=c(3),ylim=c(min(times2[2,])-1,max(times2[2,])+1),axes=F,ann=F) 41 | legend("topleft",c("ipfp","mipfp"),pch = c(1,3)) 42 | -------------------------------------------------------------------------------- /code/CakeMapWithMipfp.R: -------------------------------------------------------------------------------- 1 | library(mipfp) 2 | 3 | 4 | source("code/CakeMapMipfpData.R") 5 | 6 | 7 | # Initial weight matrix 8 | weight_init_onezone <- table(ind) 9 | # Check order of the variables 10 | dimnames(weight_init_onezone) 11 | 12 | ######################################### 13 | # All zones together 14 | ######################################### 15 | # Repeat the initial matrix n_zone times 16 | init_cells <- rep(weight_init_onezone, each = nrow(cons)) 17 | 18 | # Define the names 19 | names <- c(list(rownames(cons)),as.list(dimnames(weight_init_onezone))) 20 | 21 | # Structure the data 22 | weight_init <- array(init_cells, dim = 23 | c(nrow(cons),dim(weight_init_onezone)), 24 | dimnames = names) 25 | 26 | ########################################### 27 | # Check constraint's totals 28 | ########################################### 29 | 30 | # check the totals per zone 31 | table(rowSums(con2)==rowSums(con1)) 32 | table(rowSums(con3)==rowSums(con1)) 33 | table(rowSums(con2)==rowSums(con3)) 34 | 35 | # 1 and 2 are the same, 3 is different 36 | 37 | # Observe the global total 38 | sum(con1) 39 | sum(con2) 40 | sum(con3) 41 | # 1 and 2 are the same, 3 is different 42 | 43 | ######################################## 44 | # convert the constraint 1 to be readable for mipfp 45 | ######################################## 46 | 47 | source("code/CakeMapMipfpCon1Convert.R") 48 | 49 | ######################################## 50 | # To correctly perform the Ipfp process, 51 | # we have to use coherent constraints, 52 | # with same marginals per zone. 53 | # Since NSSEC contains less individuals and this could be due by the 54 | # possibily of having "NA", we consider con1 and con2 as example. 55 | con3_prop <- con3*rowSums(con2)/rowSums(con3) 56 | 57 | # Check the new marginals per zone 58 | table(rowSums(con2)==rowSums(con3_prop)) 59 | # This is now ok 60 | 61 | # Perform the Ipfp function 62 | target <- list(con1_convert,as.matrix(con2),as.matrix(con3_prop)) 63 | descript <- list(c(1,4,6), c(1,3),c(1,5)) 64 | 65 | 66 | 67 | weight_mipfp <- Ipfp( weight_init, descript, target, 68 | print = TRUE,tol=1e-12) 69 | ########################################## 70 | # Quality of this IPF 71 | ########################################## 72 | # con1 73 | max(abs(con1_convert-apply(weight_mipfp$x.hat,c(1,4,6),sum))) 74 | 75 | # con2 76 | max(abs(con2-apply(weight_mipfp$x.hat,c(1,3),sum))) 77 | 78 | # con3 79 | max(abs(con3_prop-apply(weight_mipfp$x.hat,c(1,5),sum))) 80 | 81 | # con3 is well fitted for all zones, but con1 and 82 | # con2 have some municipalities with big errors 83 | 84 | 85 | ################################################ 86 | # Convert ipfp result for comparison 87 | ################################################ 88 | # first execute the CakeMap.R 89 | weight_ipfp <- array(0, dim=dim(weight_init), dimnames = dimnames(weight_init)) 90 | 91 | for (indiv in 1:nrow(ind)){ 92 | temp <- weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] 93 | weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] <- temp +weights[indiv,] 94 | } 95 | 96 | # compare result; 97 | 98 | which.max(abs(weight_ipfp-weight_mipfp$x.hat),ind) 99 | sum(weight_ipfp) 100 | sum(weight_mipfp$x.hat) 101 | 102 | plot(weight_ipfp,weight_mipfp$x.hat) 103 | max(apply(weight_mipfp$x.hat,1,sum)-apply(weight_ipfp,1,sum)) 104 | # The sum of ipfp is the one of the third constraint... 105 | # This is due to the order of the constraints 106 | # Indeed, if we change the order of the constraints and re-calculate the ipfp: 107 | 108 | cons <- cons[,c(15:24,1:14)] 109 | ind_cat <- ind_cat[,c(15:24,1:14)] 110 | 111 | 112 | # Iterative proportional fitting (IPF) stage 113 | library(ipfp) # load the ipfp package -may need install.packages("ipfp") 114 | cons <- apply(cons, 2, as.numeric) # convert the constraints to 'numeric' 115 | ind_catt <- t(ind_cat) # transpose the dummy variables for ipfp 116 | x0 <- rep(1, nrow(ind)) # set the initial weights 117 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = 20)) 118 | 119 | # And the sum is now the same and the results also 120 | sum(weights) 121 | 122 | 123 | ################################################ 124 | # Convert ipfp result for comparison 125 | ################################################ 126 | 127 | weight_ipfp <- array(0, dim=dim(weight_init), dimnames = dimnames(weight_init)) 128 | 129 | for (indiv in 1:nrow(ind)){ 130 | temp <- weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] 131 | weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] <- temp +weights[indiv,] 132 | } 133 | 134 | 135 | sum(weight_ipfp-weight_mipfp$x.hat) 136 | -------------------------------------------------------------------------------- /code/ConvertIpfpWeights.R: -------------------------------------------------------------------------------- 1 | # first execute the CakeMap.R 2 | weight_ipfp <- array(0, dim=dim(weight_init), dimnames = dimnames(weight_init)) 3 | 4 | for (indiv in 1:nrow(ind)){ 5 | temp <- weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] 6 | weight_ipfp[,c(as.character(ind[indiv,1])),c(as.character(ind[indiv,2])),c(as.character(ind[indiv,3])),c(as.character(ind[indiv,4])),c(as.character(ind[indiv,5]))] <- temp +weights[indiv,] 7 | } -------------------------------------------------------------------------------- /code/SimpleWorld.R: -------------------------------------------------------------------------------- 1 | # SimpleWorld.R 2 | # Raw code needed to run the SimpleWorld example 3 | ind <- read.csv("data/SimpleWorld/ind.csv") 4 | class(ind) # verify the data type of the object 5 | ind # print the individual-level data 6 | 7 | con_age <- read.csv("data/SimpleWorld/age.csv") 8 | con_sex <- read.csv("data/SimpleWorld/sex.csv") 9 | 10 | # Convert age into a categorical variable with user-chosen labels 11 | (ind$age <- cut(ind$age, breaks = c(0, 49, 120), labels = c("a0_49", "a50+"))) 12 | names(con_age) <- levels(ind$age) # rename aggregate variables 13 | cons <- cbind(con_age, con_sex) 14 | 15 | cat_age <- model.matrix(~ ind$age - 1) 16 | cat_sex <- model.matrix(~ ind$sex - 1)[, c(2, 1)] 17 | (ind_cat <- cbind(cat_age, cat_sex)) # combine flat representations of the data 18 | 19 | colSums(ind_cat) # view the aggregated version of ind 20 | ind_agg <- colSums(ind_cat) # save the result 21 | 22 | rbind(cons[1,], ind_agg) # test compatibility between ind_agg and cons objects 23 | 24 | weights <- matrix(data = NA, nrow = nrow(ind), ncol = nrow(cons)) 25 | dim(weights) # the dimension of the weight matrix: 5 rows by 3 columns 26 | 27 | library(ipfp) # load the ipfp library after: install.packages("ipfp") 28 | cons <- apply(cons, 2, as.numeric) # convert matrix to numeric data type 29 | ind_catt <- t(ind_cat) # save transposed version of ind_cat 30 | x0 <- rep(1, nrow(ind)) # save the initial vector 31 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = 20)) 32 | 33 | source("code/functions.R") 34 | 35 | set.seed(0) 36 | int_pp(x = c(0.333, 0.667, 3)) 37 | int_pp(x = c(1.333, 1.333, 1.333)) 38 | 39 | 40 | # Method 2: using apply 41 | ints <- apply(weights, 2, int_trs) # generate integerised result 42 | indices <- NULL 43 | ints <- for(i in 1:ncol(ints)){ 44 | indices <- c(indices, int_expand_vector(ints[,i])) 45 | } 46 | 47 | ints_df <- data.frame(id = indices, zone = rep(1:nrow(cons), colSums(weights))) 48 | 49 | ind_full <- read.csv("data/SimpleWorld/ind-full.csv") 50 | library(dplyr) # install.packages(dplyr) if not installed 51 | ints_df <- inner_join(ints_df, ind_full) 52 | 53 | 54 | ## ------------------------------------------------------------------------ 55 | ints_df[ints_df$zone == 2, ] 56 | -------------------------------------------------------------------------------- /code/bbuild.R: -------------------------------------------------------------------------------- 1 | # code to build the book 2 | b <- readLines("book.tex") # read in tex file 3 | 4 | bgn <- grep("Introduction", b)[1] 5 | b <- b[bgn:(length(b) - 2)] 6 | b <- gsub("\\\\section\\{", "\\\\chapter\\{", x = b) 7 | b <- gsub("subsection\\{", "section\\{", x = b) 8 | 9 | # Take a subset of b (to test where fails occur) 10 | # b <- b[1:400] 11 | 12 | # Additional material to include 13 | # a <- "\\usepackage{hyperref}" 14 | 15 | # kp <- 9 # where do package descriptions end? 16 | # kf <- grep("mainmatter", k) # frontmatter up to and including here 17 | kf <- readLines("frontmatter/pream.tex") 18 | 19 | # kp <- k[1:kp] 20 | # kf <- k[(length(kp) + 1):kf] # frontmatter 21 | 22 | # kp <- c(kp, a) 23 | 24 | k <- c(kf, b, c("\\printindex", "\\end{document}")) 25 | 26 | 27 | biblilines <- grep("section\\*\\{Bibliography\\}|\\{section\\}\\{Bibliography\\}", x = k) 28 | kbl <- k[biblilines] 29 | kbl <- gsub(pattern = "section", replacement = "chapter", x = kbl) 30 | 31 | # glosline <- grep("chapter\\{Glossary\\}", x = k) 32 | # k[(glosline - 1):(glosline + 2)] 33 | # k[glosline + 1] <- "\\addcontentsline{toc}{chapter}{Glossary}" 34 | 35 | k[biblilines] <- kbl 36 | 37 | # add part 1 38 | p1 <- "\\part{Introducing spatial microsimulation with R}" 39 | p2 <- "\\part{Generating spatial microdata}" 40 | p3 <- "\\part{Modelling spatial microdata}" 41 | 42 | ps <- grep(pattern = "\\chapter\\{Intro|\\chapter\\{Data|\\chapter\\{The T", x = k) 43 | 44 | k <- c( 45 | k[1:(ps[1] -1)], 46 | p1, k[ps[1]:(ps[2] -1)], 47 | p2, k[ps[2]:(ps[3] -1)], 48 | p3, k[ps[3]:length(k)] 49 | ) 50 | 51 | 52 | writeLines(k, con = "spatial-microsim-book.tex") 53 | 54 | # out-takes - code not used 55 | # b[1] <- gsub("documentclass\\[\\]\\{article\\}", "documentclass\\[krantz1,ChapterTOCs\\]\\{krantz\\}", x = b[1]) # change 1st line 56 | # gsub("\\\\section\\{", "\\\\chapter\\{", x = b[108]) # test gsub -------------------------------------------------------------------------------- /code/book-functions.R: -------------------------------------------------------------------------------- 1 | # Book building functions 2 | Rmd_bind <- function(dir = ".", 3 | book_header = readLines(textConnection("---\ntitle: 'Title'\n---")), 4 | chap_ord = NULL){ 5 | old <- setwd(dir) 6 | if(length(grep("book.Rmd$", list.files())) > 0){ 7 | warning("book.Rmd already exists") 8 | } 9 | cfiles <- list.files(pattern = "*.Rmd", ) 10 | cfiles <- cfiles[-grep("book", cfiles)] 11 | if(!is.null(chap_ord)) cfiles <- cfiles[chap_ord] # chapter order 12 | write(book_header, file = "book.Rmd", ) 13 | ttext <- NULL 14 | for(i in 1:length(cfiles)){ 15 | text <- readLines(cfiles[i]) 16 | hspan <- grep("---", text) 17 | text <- text[-c(hspan[1]:hspan[2])] 18 | write(text, sep = "\n", file = "book.Rmd", append = T) 19 | } 20 | # render("book.Rmd", output_format = "pdf_document") 21 | setwd(old) 22 | } 23 | 24 | Rmd_bind_mod <- function(dir = ".", 25 | book_header = readLines(textConnection("---\ntitle: 'Title'\n---")), 26 | chap_ord = NULL){ 27 | old <- setwd(dir) 28 | if(length(grep("book.Rmd", list.files())) > 0){ 29 | warning("book.Rmd already exists") 30 | } 31 | cfiles <- list.files(pattern = "*.Rmd$", ) 32 | cfiles <- cfiles[-grep("book", cfiles)] 33 | if(!is.null(chap_ord)) cfiles <- cfiles[chap_ord] # chapter order 34 | write(book_header, file = "book.Rmd", ) 35 | ttext <- NULL 36 | for(i in 1:length(cfiles)){ 37 | text <- readLines(cfiles[i]) 38 | hspan <- grep("---", text) 39 | text <- text[-c(hspan[1]:hspan[2])] 40 | refs <- grepl("# References", text) # Remove references section from each chapter 41 | text <- text[!refs] 42 | write(text, sep = "\n", file = "book.Rmd", append = T) 43 | } 44 | # render("book.Rmd", output_format = "pdf_document") 45 | setwd(old) 46 | } 47 | -------------------------------------------------------------------------------- /code/build-CRC-version.R: -------------------------------------------------------------------------------- 1 | # Build the CRC-formatted version of the book 2 | source("code/bbuild.R") 3 | system("pdflatex --interaction=nonstopmode spatial-microsim-book.tex") 4 | system("pdflatex --interaction=nonstopmode spatial-microsim-book.tex") 5 | 6 | # tidy up the mess 7 | tt <- list.files(pattern = "*.aux|*.toc|*.log|*.lot|*.gz|*.idx|*.ilg|*.ind|*.ggmap", ) 8 | 9 | for(i in tt){ 10 | system(paste('rm', i)) 11 | } 12 | -------------------------------------------------------------------------------- /code/functions.R: -------------------------------------------------------------------------------- 1 | # Functions useful for spatial microsimulation 2 | # What others would be useful? 3 | # Could any of these be improved? 4 | # Let me know if so - rob00x@gmail.com 5 | 6 | # 'Proportional probabilities' (PP) method of integerisation 7 | # (see http://www.sciencedirect.com/science/article/pii/S0198971513000240): 8 | int_pp <- function(x){ 9 | xv <- as.vector(x) 10 | xint <- rep(0, length(x)) 11 | xs <- sample(length(x), size = round(sum(x)), prob = x, replace = T) 12 | xsumm <- summary(as.factor(xs)) 13 | topup <- as.numeric(names(xsumm)) 14 | xint[topup] <- xsumm 15 | dim(xint) <- dim(x) 16 | xint 17 | } 18 | 19 | # 'Truncate, replicate, sample' (TRS) method of integerisation 20 | # (see http://www.sciencedirect.com/science/article/pii/S0198971513000240): 21 | int_trs <- function(x){ 22 | xv <- as.vector(x) 23 | xint <- floor(xv) 24 | r <- xv - xint 25 | def <- round(sum(r)) # the deficit population 26 | # the weights be 'topped up' (+ 1 applied) 27 | topup <- sample(length(x), size = def, prob = r) 28 | xint[topup] <- xint[topup] + 1 29 | dim(xint) <- dim(x) 30 | dimnames(xint) <- dimnames(x) 31 | xint 32 | } 33 | 34 | int_expand_vector <- function(x){ 35 | index <- 1:length(x) 36 | rep(index, round(x)) 37 | } 38 | 39 | int_expand_array <- function(x){ 40 | # Transform the array into a dataframe 41 | count_data <- as.data.frame.table(x) 42 | # Store the indices of categories for the final population 43 | indices <- rep(1:nrow(count_data), count_data$Freq) 44 | # Create the final individuals 45 | ind_data <- count_data[indices,] 46 | ind_data 47 | } 48 | 49 | 50 | 51 | # Total absolute error 52 | tae <- function(observed, simulated){ 53 | obs_vec <- as.numeric(observed) 54 | sim_vec <- as.numeric(simulated) 55 | sum(abs(obs_vec - sim_vec)) 56 | } 57 | 58 | # Number of times each unique matrix row appears 59 | umat_count <- function(x) { 60 | xp <- apply(x, 1, paste0, collapse = "") # "pasted" version of constraints 61 | freq <- table(xp) # frequency of occurence of each individual 62 | xu <- unique(x) # save only unique individuals 63 | rns <- as.integer(row.names(xu)) # save the row names of unique values of ind 64 | xpu <- xp[rns] 65 | o <- order(xpu, decreasing = TRUE) # the order of the output (to rectify table) 66 | cbind(xu, data.frame(ind_num = freq[o], rns = rns)) # output 67 | } 68 | 69 | # Generates list of outputs - requires dplyr 70 | umat_count_dplyr <- function(x){ 71 | x$p <- apply(x, 1, paste0, collapse = "") 72 | up <- data.frame(p = unique(x$p)) # unique values in order they appeared 73 | y <- dplyr::count(x, p) # fast freq table 74 | umat <- inner_join(up, y) # quite fast 75 | umat <- join(umat, x, match = "first") 76 | list(u = umat, p = x$p) # return unique individuals and attributes 77 | } 78 | -------------------------------------------------------------------------------- /code/gregwt.R: -------------------------------------------------------------------------------- 1 | #library(devtools) 2 | devtools::install_github("emunozh/GREGWT") 3 | library('GREGWT') 4 | 5 | # Load the data from csv files stored under ../data 6 | age = read.csv("../data/SimpleWorld/age.csv") 7 | sex = read.csv("../data/SimpleWorld/sex.csv") 8 | ind = read.csv("../data/SimpleWorld/ind-full.csv") 9 | # Make categories for age 10 | ind$age <- cut(ind$age, breaks=c(0,49,Inf), labels = c("a0.49", "a.50.")) 11 | # Add initial weights to survey 12 | ind$w <- vector(mode = "numeric", length=dim(ind)[1]) + 1 13 | 14 | # prepare simulation data using GREGWT::prepareData 15 | data_in <- prepareData(cbind(age, sex), ind, census_area_id = F, breaks = c(2)) 16 | 17 | # prepare a data.frame to store the result 18 | fweights <- NULL 19 | Result <- as.data.frame(matrix(NA, ncol=3, nrow=dim(age)[1])) 20 | names(Result) <- c("area", "income", "cap.income") 21 | 22 | # now we loop through all areas 23 | for(area in seq(dim(age)[1])){ 24 | gregwt = GREGWT(data_in, area_code = area) 25 | fw <- gregwt$final_weights 26 | fweights <- c(fweights, fw) 27 | ## Estimate income 28 | sum.income <- sum(fw * ind$income) 29 | cap.income <- sum(fw * ind$income / sum(fw)) 30 | Result[area,] <- c(area, sum.income, cap.income) 31 | } 32 | 33 | fweights <- matrix(fweights, nrow = nrow(ind)) 34 | fweights 35 | 36 | #ind_agg <- t(apply(fweights, 2, function(x) colSums(x * ind_cat))) 37 | #ind_agg 38 | -------------------------------------------------------------------------------- /code/ipfpMultiDim.R: -------------------------------------------------------------------------------- 1 | require(cmm) 2 | 3 | Ipfp <- function(seed, target.list, target.data, print = FALSE, iter = 1000, 4 | tol = 1e-10, na.target = FALSE) { 5 | # Update an array using the iterative proportional fitting procedure. 6 | # 7 | # Args: 8 | # seed: The initial multi-dimensional array to be updated. Each cell must 9 | # be greater than 0. 10 | # target.list: A list of the target margins provided in target.data. Each 11 | # component of the list is an array whose cells indicates 12 | # which dimension the corresponding margin relates to. 13 | # target.data: A list containing the data of the target margins. Each 14 | # component of the list is an array storing a margin. 15 | # The list order must follow the one defined in target.list. 16 | # Note that the cells of the arrays must be greater than 0. 17 | # print: Verbose parameter: if TRUE prints the current iteration number 18 | # and the value of the stopping criterion. 19 | # iter: The maximum number of iteration allowed; must be greater than 0. 20 | # tol: If the maximum absolute difference between two iteration is lower 21 | # than the value specified by tol, then ipfp has reached convergence 22 | # (stopping criterion); must be greater than 0. 23 | # na.target: if set to TRUE, allows the targets to have NA cells. In that 24 | # case the margins consistency is not checked. 25 | # 26 | # Returns: An array whose margins fit the target margins and of the same 27 | # dimension as seed. 28 | 29 | # checking if NA in target cells if na.target is set to FALSE 30 | if (is.na(min(sapply(target.data, min))) & !na.target) { 31 | stop('Error: NA values present in the margins - use na.target = TRUE!') 32 | } 33 | 34 | # checking non negativity condition for the seed and the target 35 | if (min(sapply(target.data, min), na.rm = na.target) < 0 | min(seed) < 0) { 36 | stop('Error: Target and Seed cells must be non-negative!') 37 | } 38 | 39 | # checking the strict positiviy of tol and iter 40 | if (iter < 1 | tol <= 0) { 41 | stop('Error: tol and iter must be strictly positive!') 42 | } 43 | 44 | # checking the margins consistency if no missing values in the targets 45 | check.margins <- TRUE 46 | 47 | if (na.target == FALSE) { 48 | if (length(target.data) > 1) { 49 | for (m in 2:length(target.data)) { 50 | if (abs(sum(target.data[[m-1]]) - sum(target.data[[m]])) > 1e-10) { 51 | check.margins <- FALSE 52 | warning('Target not consistents - shifting to probabilities! 53 | Check input data!\n') 54 | break 55 | } 56 | } 57 | } 58 | } else { 59 | if (print) { 60 | cat('NOTE: Missing values present in target cells. ') 61 | cat('Margins consistency not checked!\n') 62 | } 63 | } 64 | 65 | # if margins are not consistent, shifting from frequencies to probabilities 66 | if (!check.margins) { 67 | seed <- seed / sum(seed) 68 | for (m in 1:length(target.data)) { 69 | target.data[[m]] <- target.data[[m]] / sum(target.data[[m]]) 70 | } 71 | } 72 | 73 | if (print & check.margins & !na.target) { 74 | cat('Margins consistency checked!\n') 75 | } 76 | 77 | # initial value is the seed 78 | result <- seed 79 | converged <- FALSE 80 | tmp.evol.stp.crit <- vector(mode="numeric", length = iter) 81 | 82 | # ipfp iterations 83 | for (i in 1:iter) { 84 | 85 | if (print) { 86 | cat('... ITER', i, '\n') 87 | } 88 | 89 | # saving previous iteration result (for testing convergence) 90 | result.temp <- result 91 | 92 | # loop over the constraints 93 | for (j in 1:length(target.list)) { 94 | # ... extracting current margins 95 | temp.sum <- apply(result, target.list[[j]], sum) 96 | # ... computation of the update factor, taking care of 0 and NA cells 97 | update.factor <- ifelse(target.data[[j]] == 0 | temp.sum == 0, 0, 98 | target.data[[j]] / temp.sum) 99 | if (na.target == TRUE) { 100 | update.factor[is.na(update.factor)] <- 1; 101 | } 102 | # ... apply the update factor 103 | result <- sweep(result,target.list[[j]], update.factor, FUN = "*") 104 | } 105 | 106 | # stopping criterion 107 | stp.crit <- max(abs(result - result.temp)) 108 | tmp.evol.stp.crit[i] <- stp.crit 109 | if (stp.crit < tol) { 110 | converged <- TRUE 111 | if (print) { 112 | cat('Convergence reached after', i, 'iterations!\n') 113 | } 114 | break 115 | } 116 | 117 | if (print) { 118 | cat (' stoping criterion:', stp.crit, '\n') 119 | } 120 | 121 | } 122 | 123 | # checking the convergence 124 | if (converged == FALSE) { 125 | warning('IPFP did not converged after ', iter, ' iteration(s)! 126 | This migh be due to 0 cells in the seed, maximum number 127 | of iteration too low or tolerance too small\n') 128 | } 129 | 130 | # computing final max difference between generated and target margins 131 | diff.margins <- vector(mode = "numeric", length = length(target.list)) 132 | if (na.target == FALSE) { 133 | for (j in 1:length(target.list)) { 134 | diff.margins[j] = max(abs(target.data[[j]] 135 | - apply(result, target.list[[j]], sum))) 136 | } 137 | } 138 | 139 | # storing the evolution of the stopping criterion 140 | evol.stp.crit <- tmp.evol.stp.crit[1:i] 141 | 142 | # gathering the results in a list 143 | result.list <- list("estimates" = result, "stp.crit" = stp.crit, 144 | "conv" = converged, "dif.margins" = diff.margins, 145 | "evol.stp.crit" = evol.stp.crit); 146 | 147 | # returning the result 148 | return(result.list) 149 | 150 | } 151 | 152 | # code from Thomas 153 | array2vector<-function(a) { 154 | #transform array a to vector, where last index moves fastest 155 | 156 | dim.array <- dim(a) 157 | a <- aperm(a, seq(length(dim.array), 1, by = -1)) 158 | return(c(a)) 159 | 160 | } 161 | 162 | vector2array<-function(vector, dim) { 163 | #transform vector to array, where last index moves fastest 164 | 165 | a <- array(vector, dim) 166 | a <- aperm(a, seq(length(dim), 1, by = -1)) 167 | return(a) 168 | 169 | } 170 | 171 | covar <- function(estimate, sample, target.list) { 172 | # Compute variance-covariance matrix of the estimators 173 | # using the formula from Little and Wu (1911) 174 | 175 | n <- sum(sample) 176 | sample.prob <- array2vector(sample / sum(sample)) 177 | estimate.prob <- array2vector(estimate / sum(estimate)) 178 | 179 | D.sample <- diag(1 / sample.prob) 180 | D.estimate <- diag(1 / estimate.prob) 181 | 182 | # computation of A such that A * vector(estimate) = vector(target.data) 183 | 184 | # ... one line filled with ones 185 | A.transp <- matrix(1, nrow = 1, ncol = length(estimate.prob)) 186 | 187 | # ... constrainst (removing the first one since it is redundant information) 188 | for (j in 1:length(target.list)) { 189 | marg.mat <- MarginalMatrix(var = 1:length(dim(sample)), marg = target.list[[j]], dim = dim(sample))[-1,] 190 | A.transp <- rbind(marg.mat, A.transp, deparse.level = FALSE) 191 | } 192 | 193 | A <- t(A.transp) 194 | 195 | # computation of the orthogonal complement of A (using QR decomposition) 196 | K <- qr.Q(qr(A), complete = TRUE)[,(dim(A)[2]+1):dim(A)[1]] 197 | 198 | # computation of the variance 199 | estimate.var <- (1 / n) * K %*% solve((t(K) %*% D.estimate %*% K)) %*% t(K) %*% D.sample %*% K %*% solve(t(K) %*% D.estimate %*% K) %*% t(K) 200 | 201 | # returning the result 202 | return(estimate.var) 203 | 204 | } 205 | -------------------------------------------------------------------------------- /code/optim-cakeMap.R: -------------------------------------------------------------------------------- 1 | # optim test CakeMap 2 | 3 | source("code/CakeMap.R") # load cakemap data 4 | indu <- unique(ind_cat) 5 | rns <- as.integer(row.names(indu)) 6 | 7 | library(dplyr) 8 | ind_cat$p <- apply(ind_cat, 1, paste0, collapse = "") 9 | umat <- count(ind_cat, p, sort = TRUE)$n 10 | 11 | ind_num <- apply(indu, 2, function(x) x * umat) # ind_num: unique row numbers to optimise 12 | -------------------------------------------------------------------------------- /code/optim-tests-SimpleWorld.R: -------------------------------------------------------------------------------- 1 | # Optimisation experiments 2 | source("code/SimpleWorld.R") 3 | library(microbenchmark) 4 | library(ggplot2) 5 | 6 | # Look at the set-up data 7 | x0 8 | cons 9 | ind 10 | ind_cat 11 | 12 | # Creating the function to optimise 13 | # Setting up the input data 14 | # ind_cat <- rbind(ind_cat, ind_cat[1,]) # add extra rows - just for testing 15 | # indu <- unique(ind_cat) # save only unique individuals - dplyr::distinct forgets row.names 16 | 17 | umat_count <- function(x) { 18 | xp <- apply(x, 1, paste0, collapse = "") # "pasted" version of constraints 19 | freq <- table(xp) # frequency of occurence of each individual 20 | xu <- unique(x) # save only unique individuals 21 | rns <- as.integer(row.names(xu)) # save the row names of unique values of ind 22 | xpu <- xp[rns] 23 | o <- order(xpu, decreasing = TRUE) # the order of the output (to rectify table) 24 | cbind(xu, data.frame(ind_n = freq[o], rns = rns)) # outputs 25 | } 26 | 27 | umat <- umat_count(ind_cat) 28 | indu <- apply(umat[1:ncol(ind_cat)], 2, 29 | function(x) x * umat$ind_n.Freq) 30 | 31 | sim <- colSums(indu * c(1.2,3.5,1.5,4.5)) # test it on approximate dataset 32 | sim - cons[1,] # test the function works 33 | 34 | fun <- function(par, ind_n.Freq, con){ 35 | sim <- colSums(par * ind_n.Freq) 36 | ae <- abs(sim - con) # Absolute error per category 37 | sum(ae) # the Total Absolute Error (TAE) 38 | } 39 | par <- c(1.2,3.5,1.5,4.5) 40 | fun(par, indu, cons[1,]) # Shows the function in action 41 | 42 | # Test the function on the weights obtained by IPF 43 | # fun(weights[rns,1], indu, cons[1,]) # the weights generated by ipfp result in a tae of 0, better than optim 44 | 45 | ores <- optim(par = rep(1, nrow(indu)), fn = fun, gr = "CG", ind_n.Freq = indu, con = cons[1,]) 46 | ores$par 47 | fun(ores$par, indu, cons[1,]) # check TAE is low 48 | fw <- ores$par[rep(1:nrow(umat), times = umat$ind_n.Freq)] # final weights 49 | 50 | umat[1:ncol(ind_cat)][rep(1:nrow(umat), umat$ind_n.Freq),] # we've returned full circle to the correct population 51 | 52 | # Next stage: try optimising the fit using diferent algorithms and do tests! 53 | 54 | optim_optim_CG <- function(){ 55 | optim(par = rep(1, nrow(indu)), fn = fun, gr = "CG", ind_n.Freq = indu, con = cons[1,]) 56 | } 57 | 58 | # GenSA test 59 | library(GenSA) # the library to test 60 | out <- GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^5, nrow(indu)), control = list(maxit = 10), ind_n.Freq = indu, con = cons[1,]) 61 | out$value 62 | 63 | # rgenoud 64 | # install.packages("rgenoud") 65 | library(rgenoud) 66 | set.seed(2014) 67 | out <- genoud(nvars = nrow(indu), fn = fun, ind_n.Freq = indu, con = cons[1,], control = list(maxit = 1000), data.type.int = TRUE, Domains = matrix(c(rep(0, nrow(indu)),rep(100000, nrow(indu))), ncol = 2)) 68 | out$par 69 | fun(par = out$par, ind_n.Freq = indu, con = cons) 70 | fun(par = c(2,2,1,6), ind_n.Freq = indu, con = cons) 71 | 72 | opt_res <- data.frame(algorithm = NA, 73 | maxit = NA, 74 | fit = NA, 75 | time = NA) 76 | init<-fun(par = rep(1,nrow(indu)), ind_n.Freq = indu, con = cons) 77 | opt_res <- rbind(opt_res, c("optim_Nelder", 0,init, NA)) 78 | opt_res <- rbind(opt_res, c("optim_SANN", 0,init, NA)) 79 | opt_res <- rbind(opt_res, c("optim_BFGS", 0,init, NA)) 80 | opt_res <- rbind(opt_res, c("optim_CG", 0,init, NA)) 81 | opt_res <- rbind(opt_res, c("ipf", 0,init, NA)) 82 | opt_res <- rbind(opt_res, c("GenSA", 0,init, NA)) 83 | 84 | Nb = 11 # default iteration number 85 | set.seed(2014) 86 | for(i in 1:Nb){ 87 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "Nelder-Mead", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 88 | opt_res <- rbind(opt_res, c("optim_Nelder", i, tmp_res$value, NA)) 89 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "SANN", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i )) 90 | opt_res <- rbind(opt_res, c("optim_SANN", i, tmp_res$value, NA)) 91 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "BFGS", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 92 | opt_res <- rbind(opt_res, c("optim_BFGS", i, tmp_res$value, NA)) 93 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "CG", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 94 | opt_res <- rbind(opt_res, c("optim_CG", i, tmp_res$value, NA)) 95 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = i)) 96 | tae <- sum(abs(colSums(weights[,1] * ind_cat) - cons[1,])) 97 | opt_res <- rbind(opt_res, c("ipf", i, tae, NA)) 98 | tmp_res <- GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^2, nrow(indu)), control = list(maxit = i), ind_n.Freq = indu, con = cons[1,]) 99 | opt_res <- rbind(opt_res, c("GenSA", i, tmp_res$value, NA)) 100 | } 101 | 102 | opt_res$fit <- as.numeric(opt_res$fit) 103 | opt_res$maxit <- as.numeric(opt_res$maxit) 104 | 105 | qplot(data = opt_res, maxit, fit, linetype = algorithm, geom="line") + 106 | ylab("Total Absolute Error") + xlab("Iterations") + 107 | scale_linetype(name = "Algorithm") + 108 | #scale_color_brewer(palette = 2, type = "qual") + 109 | theme_classic() 110 | # Save the plots! 111 | # ggsave("figures/optim-its.png") # (original plot) 112 | # ggsave("figures/TAEOptim_GenSA_Mo.png") 113 | # ggsave("figures/TAEOptim_GenSA_Mo.pdf") 114 | 115 | 116 | 117 | # Regenerate results for timings plot 118 | opt_res <- data.frame(algorithm = NA, 119 | maxit = NA, 120 | fit = NA, 121 | time = NA) 122 | 123 | 124 | Nb = 11 # default iteration number 125 | set.seed(2014) 126 | for(i in 1:Nb){ 127 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "Nelder-Mead", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 128 | opt_res <- rbind(opt_res, c("optim_Nelder", i, tmp_res$value, NA)) 129 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "SANN", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 130 | opt_res <- rbind(opt_res, c("optim_SANN", i , tmp_res$value, NA)) 131 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "BFGS", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 132 | opt_res <- rbind(opt_res, c("optim_BFGS", i, tmp_res$value, NA)) 133 | tmp_res <- optim(par = rep(1, nrow(indu)), fn = fun, method = "CG", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i)) 134 | opt_res <- rbind(opt_res, c("optim_CG", i, tmp_res$value, NA)) 135 | weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = i)) 136 | tae <- sum(abs(colSums(weights[,1] * ind_cat) - cons[1,])) 137 | opt_res <- rbind(opt_res, c("ipf", i, tae, NA)) 138 | tmp_res <- GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^2, nrow(indu)), control = list(maxit = i), ind_n.Freq = indu, con = cons[1,]) 139 | opt_res <- rbind(opt_res, c("GenSA", i, tmp_res$value, NA)) 140 | } 141 | 142 | opt_res$fit <- as.numeric(opt_res$fit) 143 | opt_res$maxit <- as.numeric(opt_res$maxit) 144 | 145 | ### Timings 146 | mb <- NULL 147 | for(i in 1:Nb){ 148 | Nelder <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "Nelder-Mead", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i ))} 149 | SANN <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "SANN", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i ))} 150 | BFGS <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "BFGS", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))} 151 | CG <- function(){optim(par = rep(1, nrow(indu)), fn = fun, method = "CG", ind_n.Freq = indu, con = cons[1,], control = list(maxit = i))} 152 | IPF <- function(){weights <- apply(cons, 1, function(x) ipfp(x, ind_catt, x0, maxit = i )) 153 | tae <- sum(abs(colSums(weights[,1] * ind_cat) - cons[1,]))} 154 | GENSA <- function(){GenSA(par = rep(1, nrow(indu)), fn = fun, lower = rep(0, nrow(indu)), upper = rep(10^2, nrow(indu)), control = list(maxit = i), ind_n.Freq = indu, con = cons[1,])} 155 | mb <- rbind(mb, print(microbenchmark(Nelder(), SANN(), BFGS(), CG(), IPF(), GENSA(), times = 20))) 156 | } 157 | 158 | opt_res[2:dim(opt_res)[1],]$time <- mb$mean 159 | opt_res$time<-as.numeric(opt_res$time) 160 | 161 | qplot(data = opt_res, maxit, time, linetype = algorithm, geom="line") + 162 | # ylim(NA, 3000) + 163 | coord_cartesian(ylim = c(0, 2000)) + 164 | ylab("Time (microseconds)") + 165 | xlab("Number of iterations") + 166 | scale_colour_brewer(palette = 2, type = "div") + 167 | scale_linetype(name = "Algorithm") + 168 | theme_classic() 169 | # Save the plots! 170 | # ggsave("figures/optim-time.png") 171 | # ggsave("figures/TimeOptim_GenSA_Mo.png") 172 | # ggsave("figures/TimeOptim_GenSA_Mo.pdf") 173 | ### Background 174 | 175 | # plot of time vs TAE 176 | qplot(data = opt_res, time, fit, linetype = algorithm, geom="line") + 177 | ylab("TAE") + xlab("Time (microseconds)") + scale_color_brewer(palette = 2, type = "qual") + theme_classic() 178 | 179 | 180 | ### Stack overflow - simplest form - representation of the above 181 | # See http://stackoverflow.com/questions/26160079/fast-concise-way-to-generate-ordered-frequency-count-of-unique-matrix-rows 182 | 183 | -------------------------------------------------------------------------------- /code/parallel-ipfp.R: -------------------------------------------------------------------------------- 1 | con_age <- read.csv("data/SimpleWorld/age.csv") 2 | con_sex <- read.csv("data/SimpleWorld/sex.csv") 3 | ind <- read.csv("data/SimpleWorld/ind.csv") 4 | 5 | (ind$age <- cut(ind$age, breaks = c(0, 49, 120), labels = c("a0_49", "a50+"))) 6 | 7 | names(con_age) <- levels(ind$age) # rename aggregate variables 8 | 9 | # make the number of constraints larger - to see benefit of parallel processing 10 | cons <- cbind(con_age, con_sex) 11 | 12 | cat_age <- model.matrix(~ ind$age - 1) 13 | cat_sex <- model.matrix(~ ind$sex - 1)[, c(2, 1)] 14 | (ind_cat <- cbind(cat_age, cat_sex)) # combine flat representations of the data 15 | 16 | library(ipfp) # load the ipfp library after: install.packages("ipfp") 17 | cons <- apply(cons, 2, as.numeric) # convert matrix to numeric data type 18 | cons <- cons[sample(3, size = 500, replace = T),] 19 | 20 | weights <- matrix(data = NA, nrow = nrow(ind), ncol = nrow(cons)) 21 | 22 | ind_catt <- t(ind_cat) 23 | x0 <- rep(1, nrow(ind)) 24 | 25 | # Tests of the speed of the for solution vs the apply solution 26 | ipfp_for <- function(){ 27 | for(i in 1:ncol(weights)){ 28 | weights[,i] <- ipfp(cons[i,], t(ind_cat), x0 = rep(1, nrow(ind))) 29 | } 30 | } 31 | 32 | ipfp_apply <- function(){ 33 | weights <- apply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), t(ind_cat), x0 = rep(1,nrow(ind)))) 34 | } 35 | 36 | ipfp_for20 <- function(){ 37 | for(i in 1:ncol(weights)){ 38 | weights[,i] <- ipfp(cons[i,], t(ind_cat), x0 = rep(1, nrow(ind)), maxit = 20) 39 | } 40 | } 41 | 42 | ipfp_apply20 <- function(){ 43 | weights <- apply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), t(ind_cat), x0 = rep(1,nrow(ind)), maxit = 20)) 44 | } 45 | 46 | ipfp_foric <- function(){ 47 | for(i in 1:ncol(weights)){ 48 | weights[,i] <- ipfp(cons[i,], ind_catt, x0 = rep(1, nrow(ind))) 49 | } 50 | } 51 | 52 | ipfp_applyic <- function(){ 53 | weights <- apply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), ind_catt, x0 = rep(1,nrow(ind)))) 54 | } 55 | 56 | ipfp_for20icx <- function(){ 57 | for(i in 1:ncol(weights)){ 58 | weights[,i] <- ipfp(cons[i,], ind_catt, x0, maxit = 20) 59 | } 60 | } 61 | 62 | ipfp_apply20icx <- function(){ 63 | weights <- apply(cons, MARGIN = 1, FUN = function(x) ipfp(as.numeric(x), ind_catt, x0 , maxit = 20)) 64 | } 65 | library(microbenchmark) 66 | microbenchmark(ipfp_for(), ipfp_apply(), ipfp_for20(), ipfp_apply20(), ipfp_foric(), ipfp_applyic(), ipfp_for20icx(), ipfp_apply20icx(), times = 5) 67 | 68 | library(parallel) 69 | detectCores() # how many cores on the system? 70 | cl <- makeCluster(getOption("cl.cores", 4)) # make the cluster 71 | clusterExport(cl,c("ipfp","ind_catt", "x0")) # packages and objects to cluster 72 | 73 | 74 | 75 | ind_catt <- t(ind_cat) 76 | 77 | f3 <- function(cl){ 78 | weights_apply <- parApply(cl = cl, cons, 1, function(x) ipfp(x, ind_catt, x0)) 79 | } 80 | 81 | library(microbenchmark) 82 | microbenchmark(ipfp_for, ipfp_apply, ipfp_for20, ipfp_apply20, ipfp_foric, ipfp_applyic, ipfp_for20icx, ipfp_apply20icx(), f3(cl), times = 3 ) 83 | 84 | stopCluster(cl) # stop the cluster -------------------------------------------------------------------------------- /courses/course-info-3day.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Course information" 3 | author: "Robin Lovelace and Morgane Dumont" 4 | output: html_document 5 | --- 6 | 7 | ```{r setup, include=FALSE} 8 | knitr::opts_chunk$set(echo = TRUE) 9 | ``` 10 | 11 | Course overview for Spatial Microsimulation course in Seville, 7^th^ - 9^th^ November. 12 | 13 | ## Day 1 (9:30 to 17:30 with one hour break) 14 | 15 | - [Introduction to spatial microsimulation in R](http://robinlovelace.net/spatial-microsim-book/slides/introduction.pdf) (RL 09:30 - 10:30) 16 | - Course overview, aims and objectives (RL 9:30 - 10:00) 17 | - Go-round of participants: course aspirations (RL 10:00 - 10:30) 18 | 19 | - What is Spatial Microsimulation and its applications (with an emphasis on EU projects) (10:30 - 11:30) 20 | - Agriculture (RL) 21 | - Wealth distribution (RL) 22 | - Transport (MD - 20 min) 23 | 24 | - [Using R and RStudio](http://rpubs.com/RobinLovelace/146447) for spatial microsimulation (RL 11:30 - 13:00 See book appendix) 25 | - Project management 26 | - GitHub 27 | - The RStudio Graphical User Interface (GUI) 28 | - Using R 29 | 30 | - [Creating spatial microdata in R](http://robinlovelace.net/spatial-microsim-book/slides/spatial-microdata-in-r.html) (MD 14:00 - 17:30) 31 | - Loading the data 32 | - Preparing the data 33 | - Reweighting procedures 34 | 35 | ## Day 2 36 | 37 | - [Applying the methods of IPF and Combinatorial Optimisation](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/slides/Applying-IPF-and-CO.Rmd) (9:30 - 12:00) 38 | - Internal and External Validation (MD) 39 | - Population synthesis with integerisation (MD) 40 | - Introduction to Combinatorial Optimisation (MD) 41 | 42 | - [simPop](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/slides/simpop-intro.Rmd) (RL 12:00 - 13:00) 43 | 44 | - Spatial data with R (14:00 - 16:00) 45 | - [Spatial data classes](http://rpubs.com/RobinLovelace/217921) (30 min) 46 | - Practical working on the [Creating-maps-in-R](https://github.com/Robinlovelace/Creating-maps-in-R) (45 min Q & A) 47 | - [Visualisation](https://github.com/Robinlovelace/Creating-maps-in-R/blob/master/vignettes/vspd-base-shiny.md) (30 min) 48 | - [Simple features](https://github.com/edzer/sfr) (**sf** package) (15 min if time) 49 | 50 | 51 | 52 | ## Day 3 53 | 54 | 55 | - Related topics (MD 9:30 - 10:30) 56 | - Spatial micro-data for agent-based models 57 | - Spatial microsimulation without micro-data 58 | - Adding the household information 59 | - Choice of data and methods 60 | 61 | - Practical application (MD - 10:45 - 12:30) 62 | 63 | - [Interactive mapping](https://github.com/Robinlovelace/Creating-maps-in-R/blob/master/vignettes/vspd-base-shiny.md) with **tmap**, **leaflet** and **shiny** (if time) (RL 12:30 - 13:00) 64 | 65 | - Apply what you've learn on your own data (14:00 - 16:45) 66 | 67 | - Conclusion to the course (16:45 - 17:00) 68 | 69 | -------------------------------------------------------------------------------- /data/Belgium/ContrainteDipl.txt: -------------------------------------------------------------------------------- 1 | "com" "dipl" "COUNT" 2 | 91005 "Aucun" 172 3 | 91013 "Aucun" 218 4 | 91015 "Aucun" 123 5 | 91030 "Aucun" 491 6 | 91034 "Aucun" 349 7 | 91054 "Aucun" 182 8 | 91059 "Aucun" 173 9 | 91064 "Aucun" 113 10 | 91072 "Aucun" 140 11 | 91103 "Aucun" 69 12 | 91114 "Aucun" 300 13 | 91120 "Aucun" 127 14 | 91141 "Aucun" 175 15 | 91142 "Aucun" 222 16 | 91143 "Aucun" 109 17 | 92003 "Aucun" 743 18 | 92006 "Aucun" 96 19 | 92035 "Aucun" 381 20 | 92045 "Aucun" 173 21 | 92048 "Aucun" 234 22 | 92054 "Aucun" 124 23 | 92087 "Aucun" 352 24 | 92094 "Aucun" 2519 25 | 92097 "Aucun" 71 26 | 92101 "Aucun" 202 27 | 92114 "Aucun" 206 28 | 92137 "Aucun" 1064 29 | 92138 "Aucun" 133 30 | 92140 "Aucun" 545 31 | 92141 "Aucun" 155 32 | 92142 "Aucun" 414 33 | 93010 "Aucun" 115 34 | 93014 "Aucun" 584 35 | 93018 "Aucun" 129 36 | 93022 "Aucun" 324 37 | 93056 "Aucun" 263 38 | 93088 "Aucun" 468 39 | 93090 "Aucun" 229 40 | 91005 "CITE1" 678 41 | 91013 "CITE1" 917 42 | 91015 "CITE1" 431 43 | 91030 "CITE1" 1441 44 | 91034 "CITE1" 1432 45 | 91054 "CITE1" 555 46 | 91059 "CITE1" 545 47 | 91064 "CITE1" 434 48 | 91072 "CITE1" 435 49 | 91103 "CITE1" 320 50 | 91114 "CITE1" 1192 51 | 91120 "CITE1" 478 52 | 91141 "CITE1" 723 53 | 91142 "CITE1" 858 54 | 91143 "CITE1" 453 55 | 92003 "CITE1" 2593 56 | 92006 "CITE1" 421 57 | 92035 "CITE1" 1240 58 | 92045 "CITE1" 644 59 | 92048 "CITE1" 956 60 | 92054 "CITE1" 415 61 | 92087 "CITE1" 1208 62 | 92094 "CITE1" 10056 63 | 92097 "CITE1" 304 64 | 92101 "CITE1" 776 65 | 92114 "CITE1" 794 66 | 92137 "CITE1" 3347 67 | 92138 "CITE1" 532 68 | 92140 "CITE1" 2029 69 | 92141 "CITE1" 587 70 | 92142 "CITE1" 1668 71 | 93010 "CITE1" 460 72 | 93014 "CITE1" 1657 73 | 93018 "CITE1" 338 74 | 93022 "CITE1" 1111 75 | 93056 "CITE1" 956 76 | 93088 "CITE1" 1546 77 | 93090 "CITE1" 777 78 | 91005 "CITE2" 1636 79 | 91013 "CITE2" 2142 80 | 91015 "CITE2" 758 81 | 91030 "CITE2" 3719 82 | 91034 "CITE2" 3290 83 | 91054 "CITE2" 1118 84 | 91059 "CITE2" 1463 85 | 91064 "CITE2" 1083 86 | 91072 "CITE2" 1072 87 | 91103 "CITE2" 732 88 | 91114 "CITE2" 2998 89 | 91120 "CITE2" 1199 90 | 91141 "CITE2" 1840 91 | 91142 "CITE2" 1577 92 | 91143 "CITE2" 716 93 | 92003 "CITE2" 6305 94 | 92006 "CITE2" 1311 95 | 92035 "CITE2" 3260 96 | 92045 "CITE2" 1670 97 | 92048 "CITE2" 2355 98 | 92054 "CITE2" 1324 99 | 92087 "CITE2" 2873 100 | 92094 "CITE2" 23911 101 | 92097 "CITE2" 1073 102 | 92101 "CITE2" 2305 103 | 92114 "CITE2" 1671 104 | 92137 "CITE2" 7019 105 | 92138 "CITE2" 1437 106 | 92140 "CITE2" 4319 107 | 92141 "CITE2" 1685 108 | 92142 "CITE2" 4689 109 | 93010 "CITE2" 1182 110 | 93014 "CITE2" 3427 111 | 93018 "CITE2" 680 112 | 93022 "CITE2" 2737 113 | 93056 "CITE2" 2283 114 | 93088 "CITE2" 4234 115 | 93090 "CITE2" 1454 116 | 91005 "CITE3" 1966 117 | 91013 "CITE3" 2223 118 | 91015 "CITE3" 817 119 | 91030 "CITE3" 4092 120 | 91034 "CITE3" 3590 121 | 91054 "CITE3" 1052 122 | 91059 "CITE3" 1818 123 | 91064 "CITE3" 1257 124 | 91072 "CITE3" 1242 125 | 91103 "CITE3" 842 126 | 91114 "CITE3" 3405 127 | 91120 "CITE3" 1273 128 | 91141 "CITE3" 2114 129 | 91142 "CITE3" 1435 130 | 91143 "CITE3" 652 131 | 92003 "CITE3" 6590 132 | 92006 "CITE3" 1702 133 | 92035 "CITE3" 3746 134 | 92045 "CITE3" 1893 135 | 92048 "CITE3" 2499 136 | 92054 "CITE3" 1643 137 | 92087 "CITE3" 3261 138 | 92094 "CITE3" 27136 139 | 92097 "CITE3" 1251 140 | 92101 "CITE3" 2907 141 | 92114 "CITE3" 1841 142 | 92137 "CITE3" 6885 143 | 92138 "CITE3" 1742 144 | 92140 "CITE3" 4632 145 | 92141 "CITE3" 2063 146 | 92142 "CITE3" 5568 147 | 93010 "CITE3" 1264 148 | 93014 "CITE3" 3440 149 | 93018 "CITE3" 729 150 | 93022 "CITE3" 3062 151 | 93056 "CITE3" 2373 152 | 93088 "CITE3" 4713 153 | 93090 "CITE3" 1550 154 | 91005 "CITE4" 112 155 | 91013 "CITE4" 183 156 | 91015 "CITE4" 48 157 | 91030 "CITE4" 231 158 | 91034 "CITE4" 193 159 | 91054 "CITE4" 69 160 | 91059 "CITE4" 93 161 | 91064 "CITE4" 82 162 | 91072 "CITE4" 93 163 | 91103 "CITE4" 55 164 | 91114 "CITE4" 144 165 | 91120 "CITE4" 82 166 | 91141 "CITE4" 133 167 | 91142 "CITE4" 87 168 | 91143 "CITE4" 42 169 | 92003 "CITE4" 344 170 | 92006 "CITE4" 91 171 | 92035 "CITE4" 202 172 | 92045 "CITE4" 112 173 | 92048 "CITE4" 162 174 | 92054 "CITE4" 113 175 | 92087 "CITE4" 196 176 | 92094 "CITE4" 1568 177 | 92097 "CITE4" 68 178 | 92101 "CITE4" 168 179 | 92114 "CITE4" 127 180 | 92137 "CITE4" 408 181 | 92138 "CITE4" 95 182 | 92140 "CITE4" 248 183 | 92141 "CITE4" 135 184 | 92142 "CITE4" 318 185 | 93010 "CITE4" 71 186 | 93014 "CITE4" 232 187 | 93018 "CITE4" 51 188 | 93022 "CITE4" 167 189 | 93056 "CITE4" 135 190 | 93088 "CITE4" 264 191 | 93090 "CITE4" 90 192 | 91005 "CITE5" 1208 193 | 91013 "CITE5" 1467 194 | 91015 "CITE5" 460 195 | 91030 "CITE5" 3026 196 | 91034 "CITE5" 2277 197 | 91054 "CITE5" 720 198 | 91059 "CITE5" 1462 199 | 91064 "CITE5" 1029 200 | 91072 "CITE5" 782 201 | 91103 "CITE5" 529 202 | 91114 "CITE5" 2016 203 | 91120 "CITE5" 856 204 | 91141 "CITE5" 2148 205 | 91142 "CITE5" 620 206 | 91143 "CITE5" 381 207 | 92003 "CITE5" 4062 208 | 92006 "CITE5" 1609 209 | 92035 "CITE5" 3375 210 | 92045 "CITE5" 1764 211 | 92048 "CITE5" 1796 212 | 92054 "CITE5" 1675 213 | 92087 "CITE5" 2419 214 | 92094 "CITE5" 26403 215 | 92097 "CITE5" 949 216 | 92101 "CITE5" 2980 217 | 92114 "CITE5" 1798 218 | 92137 "CITE5" 3966 219 | 92138 "CITE5" 1670 220 | 92140 "CITE5" 3059 221 | 92141 "CITE5" 2322 222 | 92142 "CITE5" 6509 223 | 93010 "CITE5" 766 224 | 93014 "CITE5" 2063 225 | 93018 "CITE5" 443 226 | 93022 "CITE5" 1821 227 | 93056 "CITE5" 1424 228 | 93088 "CITE5" 3678 229 | 93090 "CITE5" 727 230 | 91005 "CITE6" 22 231 | 91013 "CITE6" 25 232 | 91015 "CITE6" 10 233 | 91030 "CITE6" 51 234 | 91034 "CITE6" 62 235 | 91054 "CITE6" 5 236 | 91059 "CITE6" 24 237 | 91064 "CITE6" 23 238 | 91072 "CITE6" 19 239 | 91103 "CITE6" 15 240 | 91114 "CITE6" 33 241 | 91120 "CITE6" 14 242 | 91141 "CITE6" 47 243 | 91142 "CITE6" 9 244 | 91143 "CITE6" 11 245 | 92003 "CITE6" 76 246 | 92006 "CITE6" 52 247 | 92035 "CITE6" 101 248 | 92045 "CITE6" 39 249 | 92048 "CITE6" 24 250 | 92054 "CITE6" 58 251 | 92087 "CITE6" 24 252 | 92094 "CITE6" 741 253 | 92097 "CITE6" 21 254 | 92101 "CITE6" 75 255 | 92114 "CITE6" 49 256 | 92137 "CITE6" 46 257 | 92138 "CITE6" 69 258 | 92140 "CITE6" 55 259 | 92141 "CITE6" 81 260 | 92142 "CITE6" 248 261 | 93010 "CITE6" 10 262 | 93014 "CITE6" 30 263 | 93018 "CITE6" 6 264 | 93022 "CITE6" 25 265 | 93056 "CITE6" 21 266 | 93088 "CITE6" 37 267 | 93090 "CITE6" 22 268 | 91005 "NonConcerne" 1238 269 | 91013 "NonConcerne" 1653 270 | 91015 "NonConcerne" 524 271 | 91030 "NonConcerne" 2661 272 | 91034 "NonConcerne" 2327 273 | 91054 "NonConcerne" 764 274 | 91059 "NonConcerne" 1404 275 | 91064 "NonConcerne" 1036 276 | 91072 "NonConcerne" 774 277 | 91103 "NonConcerne" 562 278 | 91114 "NonConcerne" 2290 279 | 91120 "NonConcerne" 929 280 | 91141 "NonConcerne" 1701 281 | 91142 "NonConcerne" 850 282 | 91143 "NonConcerne" 412 283 | 92003 "NonConcerne" 4798 284 | 92006 "NonConcerne" 1328 285 | 92035 "NonConcerne" 2962 286 | 92045 "NonConcerne" 1515 287 | 92048 "NonConcerne" 1878 288 | 92054 "NonConcerne" 1364 289 | 92087 "NonConcerne" 2349 290 | 92094 "NonConcerne" 17431 291 | 92097 "NonConcerne" 960 292 | 92101 "NonConcerne" 2112 293 | 92114 "NonConcerne" 1646 294 | 92137 "NonConcerne" 4743 295 | 92138 "NonConcerne" 1464 296 | 92140 "NonConcerne" 3603 297 | 92141 "NonConcerne" 1825 298 | 92142 "NonConcerne" 4291 299 | 93010 "NonConcerne" 952 300 | 93014 "NonConcerne" 2335 301 | 93018 "NonConcerne" 509 302 | 93022 "NonConcerne" 1901 303 | 93056 "NonConcerne" 1510 304 | 93088 "NonConcerne" 3152 305 | 93090 "NonConcerne" 922 306 | -------------------------------------------------------------------------------- /data/Belgium/ContrainteGenre.txt: -------------------------------------------------------------------------------- 1 | "com" "gender" "COUNT" 2 | 91005 "Femmes" 3600 3 | 91013 "Femmes" 4577 4 | 91015 "Femmes" 1584 5 | 91030 "Femmes" 8169 6 | 91034 "Femmes" 6990 7 | 91054 "Femmes" 2294 8 | 91059 "Femmes" 3485 9 | 91064 "Femmes" 2561 10 | 91072 "Femmes" 2314 11 | 91103 "Femmes" 1575 12 | 91114 "Femmes" 6301 13 | 91120 "Femmes" 2486 14 | 91141 "Femmes" 4492 15 | 91142 "Femmes" 2793 16 | 91143 "Femmes" 1409 17 | 92003 "Femmes" 13057 18 | 92006 "Femmes" 3312 19 | 92035 "Femmes" 7777 20 | 92045 "Femmes" 3994 21 | 92048 "Femmes" 4899 22 | 92054 "Femmes" 3372 23 | 92087 "Femmes" 6457 24 | 92094 "Femmes" 57004 25 | 92097 "Femmes" 2375 26 | 92101 "Femmes" 5878 27 | 92114 "Femmes" 4127 28 | 92137 "Femmes" 14325 29 | 92138 "Femmes" 3606 30 | 92140 "Femmes" 9450 31 | 92141 "Femmes" 4523 32 | 92142 "Femmes" 11951 33 | 93010 "Femmes" 2413 34 | 93014 "Femmes" 7103 35 | 93018 "Femmes" 1472 36 | 93022 "Femmes" 5668 37 | 93056 "Femmes" 4520 38 | 93088 "Femmes" 9290 39 | 93090 "Femmes" 3000 40 | 91005 "Hommes" 3432 41 | 91013 "Hommes" 4251 42 | 91015 "Hommes" 1587 43 | 91030 "Hommes" 7543 44 | 91034 "Hommes" 6530 45 | 91054 "Hommes" 2171 46 | 91059 "Hommes" 3497 47 | 91064 "Hommes" 2496 48 | 91072 "Hommes" 2243 49 | 91103 "Hommes" 1549 50 | 91114 "Hommes" 6077 51 | 91120 "Hommes" 2472 52 | 91141 "Hommes" 4389 53 | 91142 "Hommes" 2865 54 | 91143 "Hommes" 1367 55 | 92003 "Hommes" 12454 56 | 92006 "Hommes" 3298 57 | 92035 "Hommes" 7490 58 | 92045 "Hommes" 3816 59 | 92048 "Hommes" 5005 60 | 92054 "Hommes" 3344 61 | 92087 "Hommes" 6225 62 | 92094 "Hommes" 52761 63 | 92097 "Hommes" 2322 64 | 92101 "Hommes" 5647 65 | 92114 "Hommes" 4005 66 | 92137 "Hommes" 13153 67 | 92138 "Hommes" 3536 68 | 92140 "Hommes" 9040 69 | 92141 "Hommes" 4330 70 | 92142 "Hommes" 11754 71 | 93010 "Hommes" 2407 72 | 93014 "Hommes" 6665 73 | 93018 "Hommes" 1413 74 | 93022 "Hommes" 5480 75 | 93056 "Hommes" 4445 76 | 93088 "Hommes" 8802 77 | 93090 "Hommes" 2771 78 | -------------------------------------------------------------------------------- /data/Belgium/ContrainteStatut.txt: -------------------------------------------------------------------------------- 1 | "com" "statut" "COUNT" 2 | 91005 "Chômeurs" 298 3 | 91013 "Chômeurs" 493 4 | 91015 "Chômeurs" 157 5 | 91030 "Chômeurs" 699 6 | 91034 "Chômeurs" 798 7 | 91054 "Chômeurs" 224 8 | 91059 "Chômeurs" 250 9 | 91064 "Chômeurs" 192 10 | 91072 "Chômeurs" 196 11 | 91103 "Chômeurs" 183 12 | 91114 "Chômeurs" 599 13 | 91120 "Chômeurs" 218 14 | 91141 "Chômeurs" 314 15 | 91142 "Chômeurs" 474 16 | 91143 "Chômeurs" 146 17 | 92003 "Chômeurs" 1377 18 | 92006 "Chômeurs" 200 19 | 92035 "Chômeurs" 501 20 | 92045 "Chômeurs" 270 21 | 92048 "Chômeurs" 510 22 | 92054 "Chômeurs" 232 23 | 92087 "Chômeurs" 559 24 | 92094 "Chômeurs" 5638 25 | 92097 "Chômeurs" 200 26 | 92101 "Chômeurs" 428 27 | 92114 "Chômeurs" 332 28 | 92137 "Chômeurs" 1806 29 | 92138 "Chômeurs" 209 30 | 92140 "Chômeurs" 1007 31 | 92141 "Chômeurs" 250 32 | 92142 "Chômeurs" 921 33 | 93010 "Chômeurs" 301 34 | 93014 "Chômeurs" 976 35 | 93018 "Chômeurs" 160 36 | 93022 "Chômeurs" 579 37 | 93056 "Chômeurs" 519 38 | 93088 "Chômeurs" 825 39 | 93090 "Chômeurs" 427 40 | 91005 "Inactifs" 3842 41 | 91013 "Inactifs" 5074 42 | 91015 "Inactifs" 1734 43 | 91030 "Inactifs" 8535 44 | 91034 "Inactifs" 7834 45 | 91054 "Inactifs" 2566 46 | 91059 "Inactifs" 3694 47 | 91064 "Inactifs" 2786 48 | 91072 "Inactifs" 2521 49 | 91103 "Inactifs" 1708 50 | 91114 "Inactifs" 7008 51 | 91120 "Inactifs" 2698 52 | 91141 "Inactifs" 4951 53 | 91142 "Inactifs" 3405 54 | 91143 "Inactifs" 1748 55 | 92003 "Inactifs" 14338 56 | 92006 "Inactifs" 3405 57 | 92035 "Inactifs" 8052 58 | 92045 "Inactifs" 4148 59 | 92048 "Inactifs" 5429 60 | 92054 "Inactifs" 3490 61 | 92087 "Inactifs" 6818 62 | 92094 "Inactifs" 62471 63 | 92097 "Inactifs" 2396 64 | 92101 "Inactifs" 6132 65 | 92114 "Inactifs" 4358 66 | 92137 "Inactifs" 15883 67 | 92138 "Inactifs" 3733 68 | 92140 "Inactifs" 10288 69 | 92141 "Inactifs" 4607 70 | 92142 "Inactifs" 12608 71 | 93010 "Inactifs" 2654 72 | 93014 "Inactifs" 8119 73 | 93018 "Inactifs" 1700 74 | 93022 "Inactifs" 6355 75 | 93056 "Inactifs" 5045 76 | 93088 "Inactifs" 9755 77 | 93090 "Inactifs" 3503 78 | 91005 "Travailleurs" 2892 79 | 91013 "Travailleurs" 3261 80 | 91015 "Travailleurs" 1280 81 | 91030 "Travailleurs" 6478 82 | 91034 "Travailleurs" 4888 83 | 91054 "Travailleurs" 1675 84 | 91059 "Travailleurs" 3038 85 | 91064 "Travailleurs" 2079 86 | 91072 "Travailleurs" 1840 87 | 91103 "Travailleurs" 1233 88 | 91114 "Travailleurs" 4771 89 | 91120 "Travailleurs" 2042 90 | 91141 "Travailleurs" 3616 91 | 91142 "Travailleurs" 1779 92 | 91143 "Travailleurs" 882 93 | 92003 "Travailleurs" 9796 94 | 92006 "Travailleurs" 3005 95 | 92035 "Travailleurs" 6714 96 | 92045 "Travailleurs" 3392 97 | 92048 "Travailleurs" 3965 98 | 92054 "Travailleurs" 2994 99 | 92087 "Travailleurs" 5305 100 | 92094 "Travailleurs" 41656 101 | 92097 "Travailleurs" 2101 102 | 92101 "Travailleurs" 4965 103 | 92114 "Travailleurs" 3442 104 | 92137 "Travailleurs" 9789 105 | 92138 "Travailleurs" 3200 106 | 92140 "Travailleurs" 7195 107 | 92141 "Travailleurs" 3996 108 | 92142 "Travailleurs" 10176 109 | 93010 "Travailleurs" 1865 110 | 93014 "Travailleurs" 4673 111 | 93018 "Travailleurs" 1025 112 | 93022 "Travailleurs" 4214 113 | 93056 "Travailleurs" 3401 114 | 93088 "Travailleurs" 7512 115 | 93090 "Travailleurs" 1841 116 | -------------------------------------------------------------------------------- /data/Belgium/HH_cons_INS92094: -------------------------------------------------------------------------------- 1 | HHsize count 2 | 2 27871 3 | 3 11257 4 | 4 5063 5 | -------------------------------------------------------------------------------- /data/Belgium/HH_sample: -------------------------------------------------------------------------------- 1 | HHID HHsize HHtype 2 | 1 2 Couple 3 | 2 3 NoCouple 4 | 3 3 Couple 5 | 4 2 NoCouple 6 | 5 4 NoCouple 7 | 6 2 Couple 8 | 7 3 Couple 9 | 8 4 Couple 10 | 9 2 NoCouple 11 | 10 2 NoCouple 12 | 11 3 Couple 13 | 12 1 NoCouple 14 | 13 2 Couple 15 | 14 1 NoCouple 16 | 15 2 Couple 17 | -------------------------------------------------------------------------------- /data/CakeMap/area-cat.R: -------------------------------------------------------------------------------- 1 | ## runs with integerisation code - produces categorised output with per area loop 2 | 3 | # create new age/sex variable 4 | AS <- paste0(intall[[i]]$Sex, intall[[i]]$ageband4) 5 | unique(AS) 6 | 7 | # matrix for constraint 1 - age/sex 8 | m1 <- model.matrix(~AS-1) 9 | 10 | # matrix for con2 (car ownership) 11 | intall[[i]]$Car <- as.character(intall[[i]]$Car) 12 | m2 <- model.matrix(~intall[[i]]$Car-1) 13 | 14 | # matrix for con3 (nssec) 15 | intall[[i]]$NSSEC8 <- as.character(intall[[i]]$NSSEC8) 16 | m3 <- model.matrix(~intall[[i]]$NSSEC8-1) 17 | 18 | summary(intall[[i]]$NCakes) 19 | levels(ind$NCakes) 20 | intall[[i]]$avnumcakes <- 1 21 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[1]] <- 0.5 22 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[2]] <- 1.5 23 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[3]] <- 4 24 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[4]] <- 8 25 | intall[[i]]$avnumcakes[intall[[i]]$NCakes == levels(ind$NCakes)[5]] <- 0.1 26 | summary(intall[[i]]$avnumcakes[]) 27 | 28 | # Polishing up 29 | area.cat <- data.frame(cbind(m1, m2, m3)) 30 | names(ind_cat) <- cat_labs -------------------------------------------------------------------------------- /data/CakeMap/categorise.R: -------------------------------------------------------------------------------- 1 | # converts numeric variables into categorical variables 2 | # Create 0/1 counts from survey data 3 | 4 | # create new age/sex variable 5 | AS <- paste0(ind$Sex, ind$ageband4) 6 | unique(AS) 7 | 8 | # matrix for constraint 1 - age/sex 9 | m1 <- model.matrix(~AS-1) 10 | head(cons) 11 | head(m1) 12 | colnames(m1) <- names(cons)[1:12] 13 | head(m1) 14 | summary(rowSums(m1)) 15 | 16 | # matrix for con2 (car ownership) 17 | ind$Car <- as.character(ind$Car) 18 | m2 <- model.matrix(~ind$Car-1) 19 | head(m2) 20 | summary(m2) 21 | 22 | # matrix for con3 (nssec) 23 | ind$NSSEC8 <- as.character(ind$NSSEC8) 24 | m3 <- model.matrix(~ind$NSSEC8-1) 25 | head(m3) 26 | names(cons) 27 | 28 | # Polishing up 29 | ind_cat <- data.frame(cbind(m1, m2, m3)) 30 | rm(m1, m2, m3) 31 | names(ind_cat) <- cat_labs 32 | head(ind_cat) 33 | -------------------------------------------------------------------------------- /data/CakeMap/inc-est-2001.csv: -------------------------------------------------------------------------------- 1 | ,,OCODE,NAME,Avinc 2 | 00CX,Bradford,00CXFA,Baildon,620 3 | 00CX,Bradford,00CXFB,Bingley,600 4 | 00CX,Bradford,00CXFC,Bingley Rural,620 5 | 00CX,Bradford,00CXFD,Bolton,550 6 | 00CX,Bradford,00CXFE,Bowling,360 7 | 00CX,Bradford,00CXFF,Bradford Moor,340 8 | 00CX,Bradford,00CXFG,Clayton,500 9 | 00CX,Bradford,00CXFH,Craven,620 10 | 00CX,Bradford,00CXFJ,Eccleshill,460 11 | 00CX,Bradford,00CXFK,Great Horton,450 12 | 00CX,Bradford,00CXFL,Heaton,480 13 | 00CX,Bradford,00CXFM,Idle,580 14 | 00CX,Bradford,00CXFN,Ilkley,720 15 | 00CX,Bradford,00CXFP,Keighley North,520 16 | 00CX,Bradford,00CXFQ,Keighley South,400 17 | 00CX,Bradford,00CXFR,Keighley West,480 18 | 00CX,Bradford,00CXFS,Little Horton,320 19 | 00CX,Bradford,00CXFT,Odsal,460 20 | 00CX,Bradford,00CXFU,Queensbury,580 21 | 00CX,Bradford,00CXFW,Rombalds,720 22 | 00CX,Bradford,00CXFX,Shipley East,450 23 | 00CX,Bradford,00CXFY,Shipley West,580 24 | 00CX,Bradford,00CXFZ,Thornton,510 25 | 00CX,Bradford,00CXGA,Toller,390 26 | 00CX,Bradford,00CXGB,Tong,420 27 | 00CX,Bradford,00CXGC,Undercliffe,400 28 | 00CX,Bradford,00CXGD,University,320 29 | 00CX,Bradford,00CXGE,Wibsey,500 30 | 00CX,Bradford,00CXGF,Worth Valley,630 31 | 00CX,Bradford,00CXGG,Wyke,510 32 | 00CY,Calderdale,00CYFA,Brighouse,540 33 | 00CY,Calderdale,00CYFB,Calder Valley,590 34 | 00CY,Calderdale,00CYFC,Elland,480 35 | 00CY,Calderdale,00CYFD,Greetland and Stainland,630 36 | 00CY,Calderdale,00CYFE,Hipperholme and Lightcliffe,610 37 | 00CY,Calderdale,00CYFF,Illingworth,530 38 | 00CY,Calderdale,00CYFG,Luddendenfoot,580 39 | 00CY,Calderdale,00CYFH,Mixenden,420 40 | 00CY,Calderdale,00CYFJ,Northowram and Shelf,610 41 | 00CY,Calderdale,00CYFK,Ovenden,430 42 | 00CY,Calderdale,00CYFL,Rastrick,600 43 | 00CY,Calderdale,00CYFM,Ryburn,630 44 | 00CY,Calderdale,00CYFN,St. John's,350 45 | 00CY,Calderdale,00CYFP,Skircoat,620 46 | 00CY,Calderdale,00CYFQ,Sowerby Bridge,510 47 | 00CY,Calderdale,00CYFR,Todmorden,480 48 | 00CY,Calderdale,00CYFS,Town,460 49 | 00CY,Calderdale,00CYFT,Warley,500 50 | 00CZ,Kirklees,00CZFA,Almondbury,490 51 | 00CZ,Kirklees,00CZFB,Batley East,480 52 | 00CZ,Kirklees,00CZFC,Batley West,470 53 | 00CZ,Kirklees,00CZFD,Birkby,470 54 | 00CZ,Kirklees,00CZFE,Birstall and Birkenshaw,560 55 | 00CZ,Kirklees,00CZFF,Cleckheaton,560 56 | 00CZ,Kirklees,00CZFG,Colne Valley West,540 57 | 00CZ,Kirklees,00CZFH,Crosland Moor,450 58 | 00CZ,Kirklees,00CZFJ,Dalton,480 59 | 00CZ,Kirklees,00CZFK,Deighton,410 60 | 00CZ,Kirklees,00CZFL,Denby Dale,670 61 | 00CZ,Kirklees,00CZFM,Dewsbury East,450 62 | 00CZ,Kirklees,00CZFN,Dewsbury West,450 63 | 00CZ,Kirklees,00CZFP,Golcar,530 64 | 00CZ,Kirklees,00CZFQ,Heckmondwike,560 65 | 00CZ,Kirklees,00CZFR,Holme Valley North,610 66 | 00CZ,Kirklees,00CZFS,Holme Valley South,670 67 | 00CZ,Kirklees,00CZFT,Kirkburton,620 68 | 00CZ,Kirklees,00CZFU,Lindley,580 69 | 00CZ,Kirklees,00CZFW,Mirfield,600 70 | 00CZ,Kirklees,00CZFX,Newsome,410 71 | 00CZ,Kirklees,00CZFY,Paddock,440 72 | 00CZ,Kirklees,00CZFZ,Spen,530 73 | 00CZ,Kirklees,00CZGA,Thornhill,460 74 | 00DA,Leeds,00DAFA,Aireborough,630 75 | 00DA,Leeds,00DAFB,Armley,450 76 | 00DA,Leeds,00DAFC,Barwick and Kippax,620 77 | 00DA,Leeds,00DAFD,Beeston,440 78 | 00DA,Leeds,00DAFE,Bramley,470 79 | 00DA,Leeds,00DAFF,Burmantofts,390 80 | 00DA,Leeds,00DAFG,Chapel Allerton,480 81 | 00DA,Leeds,00DAFH,City and Holbeck,370 82 | 00DA,Leeds,00DAFJ,Cookridge,620 83 | 00DA,Leeds,00DAFK,Garforth and Swillington,610 84 | 00DA,Leeds,00DAFL,Halton,620 85 | 00DA,Leeds,00DAFM,Harehills,380 86 | 00DA,Leeds,00DAFN,Headingley,390 87 | 00DA,Leeds,00DAFP,Horsforth,680 88 | 00DA,Leeds,00DAFQ,Hunslet,400 89 | 00DA,Leeds,00DAFR,Kirkstall,490 90 | 00DA,Leeds,00DAFS,Middleton,500 91 | 00DA,Leeds,00DAFT,Moortown,610 92 | 00DA,Leeds,00DAFU,Morley North,610 93 | 00DA,Leeds,00DAFW,Morley South,580 94 | 00DA,Leeds,00DAFX,North,650 95 | 00DA,Leeds,00DAFY,Otley and Wharfedale,650 96 | 00DA,Leeds,00DAFZ,Pudsey North,620 97 | 00DA,Leeds,00DAGA,Pudsey South,550 98 | 00DA,Leeds,00DAGB,Richmond Hill,400 99 | 00DA,Leeds,00DAGC,Rothwell,590 100 | 00DA,Leeds,00DAGD,Roundhay,710 101 | 00DA,Leeds,00DAGE,Seacroft,390 102 | 00DA,Leeds,00DAGF,University,350 103 | 00DA,Leeds,00DAGG,Weetwood,540 104 | 00DA,Leeds,00DAGH,Wetherby,680 105 | 00DA,Leeds,00DAGJ,Whinmoor,510 106 | 00DA,Leeds,00DAGK,Wortley,490 107 | 00DB,Wakefield,00DBFA,Castleford Ferry Fryston,430 108 | 00DB,Wakefield,00DBFB,Castleford Glasshoughton,470 109 | 00DB,Wakefield,00DBFC,Castleford Whitwood,440 110 | 00DB,Wakefield,00DBFD,Crofton and Ackworth,570 111 | 00DB,Wakefield,00DBFE,Featherstone,450 112 | 00DB,Wakefield,00DBFF,Hemsworth,430 113 | 00DB,Wakefield,00DBFG,Horbury,550 114 | 00DB,Wakefield,00DBFH,Knottingley,440 115 | 00DB,Wakefield,00DBFJ,Normanton and Sharlston,480 116 | 00DB,Wakefield,00DBFK,Ossett,560 117 | 00DB,Wakefield,00DBFL,Pontefract North,490 118 | 00DB,Wakefield,00DBFM,Pontefract South,520 119 | 00DB,Wakefield,00DBFN,South Elmsall,460 120 | 00DB,Wakefield,00DBFP,South Kirkby,430 121 | 00DB,Wakefield,00DBFQ,Stanley and Altofts,580 122 | 00DB,Wakefield,00DBFR,Stanley and Wrenthorpe,610 123 | 00DB,Wakefield,00DBFS,Wakefield Central,430 124 | 00DB,Wakefield,00DBFT,Wakefield East,390 125 | 00DB,Wakefield,00DBFU,Wakefield North,450 126 | 00DB,Wakefield,00DBFW,Wakefield Rural,620 127 | 00DB,Wakefield,00DBFX,Wakefield South,610 128 | -------------------------------------------------------------------------------- /data/CakeMap/load-all.R: -------------------------------------------------------------------------------- 1 | # Loading the aggregate dataset, saving as all.msim 2 | getwd() # should be in the smsim-course folder 3 | con1 <- read.csv("data/cakeMap/con1.csv") # age/sex variable 4 | con2 <- read.csv("data/cakeMap/con2.csv") # no car / car 5 | con3 <- read.csv("data/cakeMap/con3.csv") # ns-sec 6 | names(con1) 7 | names(con2) 8 | names(con3) 9 | 10 | con2 <- data.frame(cbind(con2[,1] - con2[,2], con2[,2])) 11 | names(con2) <- c("Car", "NoCar") 12 | head(con2) 13 | 14 | sum(con1); sum(con2); sum(con3) 15 | c(sum(con1), sum(con2), sum(con3)) / sum(con1) # how much the values deviate from expected 16 | 17 | con.pop <- rowSums(con1) 18 | con1 <- round(con1 * con.pop / rowSums(con1)) 19 | con2 <- round(con2 * con.pop / rowSums(con2)) 20 | con3 <- round(con3 * con.pop / rowSums(con3)) 21 | 22 | sum(con1); sum(con2); sum(con3); # all the numbers should be equal - this is close enough! 23 | 24 | # bind all the data frames together 25 | all.msim <- cbind(con1 26 | ,con2 27 | ,con3 28 | ) 29 | 30 | which(all.msim == 0) 31 | range(all.msim) # range of values - there are no zeros 32 | mean(con.pop) # average number of individuals in each zone 33 | 34 | # in case there are zeros, set just above 1 to avoid subsequent problems 35 | con1[con1 == 0] <- con2[con2 == 0] <- con3[con3 == 0] <- 0.0001 36 | # previous step avoids zero values (aren't any in this case...) 37 | 38 | head(all.msim) 39 | 40 | category.labels <- names(all.msim) # define the category variables we're working with 41 | 42 | write.csv(all.msim, "data/cakeMap/cons.csv", row.names=F) 43 | 44 | -------------------------------------------------------------------------------- /data/CakeMap/process-age.R: -------------------------------------------------------------------------------- 1 | # Converting the data into a suitable form 2 | # We need the age to be classified as follows: 3 | # 16 to 24, 25 to 34, 35 to 44, 45 to 54, 55 to 64, 65 to 74, 75 and over 4 | # We will also categorise by male and female 5 | 6 | # setwd("cakeMap/") # navigate into cakeMap directory 7 | # (try typing 'getwd() or Session > Set Working Directory if this does not work) 8 | 9 | ageNames <- c("m16_24", "m25_34", "m35_44", "m45_54", "m55_64", "m65_74", 10 | "f16_24", "f25_34", "f35_44", "f45_54", "f55_64", "f65_74") # the output we want 11 | 12 | age <- read.csv("age-sex-raw.csv") 13 | names(age) 14 | age[1:3,6] # note that the first 2 rows are not needed 15 | rawNames <- age[1,] 16 | age <- age[-c(1,2),] 17 | class(age[,6]) # due to mix of character and numeric data, it's loaded factors 18 | 19 | age <- read.csv("age-sex-raw.csv", skip=2) # reload data only selecting numbers 20 | head(age[1:7]) 21 | class(age[,6]) # now its integer 22 | head(age) 23 | plot(colSums(age[6:ncol(age)])) 24 | 25 | # first category: males 16 - 24 26 | sel <- seq(6, (24-16) * 2 + 6, by = 2) 27 | rawNames[sel] # double check we have the correct categories 28 | assign(x = ageNames[1], value = rowSums(age[,sel])) 29 | 30 | # second category: males 25 - 34 31 | selt <- seq(max(sel) + 2, (34 - 25) * 2 + max(sel) + 2, by = 2) 32 | rawNames[selt] # double check we have the correct categories 33 | 34 | con1 <- data.frame(matrix(nrow = nrow(age), ncol = length(ageNames))) 35 | names(con1) <- ageNames 36 | con1[1] <- rowSums(age[sel]) 37 | 38 | # automating the process 39 | for(i in 2:6){ 40 | sel <- seq(max(sel) + 2, 9 * 2 + max(sel) + 2, by = 2) 41 | print(rawNames[sel]) # test it works 42 | con1[i] <- rowSums(age[sel], na.rm=T) 43 | } 44 | 45 | # first category: females 16 - 24 46 | sel <- seq(7, (24-16) * 2 + 7, by = 2) 47 | rawNames[sel] # double check we have the correct categories 48 | con1[7] <- rowSums(age[sel]) 49 | names(con1) 50 | for(i in 2:6){ 51 | sel <- seq(max(sel) + 2, 9 * 2 + max(sel) + 2, by = 2) 52 | print(rawNames[sel]) # test it works 53 | con1[i+6] <- rowSums(age[sel], na.rm=T) 54 | } 55 | 56 | plot(colSums(con1)) 57 | write.csv(con1, "con1.csv", row.names = F) 58 | -------------------------------------------------------------------------------- /data/CakeMap/process-car.R: -------------------------------------------------------------------------------- 1 | # Script to process car ownership 2 | 3 | car <- read.csv("cakeMap/cars-raw.csv", skip = 2) 4 | head(car) 5 | 6 | write.csv(car[6:7], file="cakeMap/con2.csv", row.names=F) 7 | -------------------------------------------------------------------------------- /data/CakeMap/process-nssec.R: -------------------------------------------------------------------------------- 1 | nssecNames <- c("1.1", "1.2", 2:8, "NA") 2 | nssec <- read.csv("cakeMap/nssec-raw.csv", skip=1) 3 | head(nssec[1:6]) 4 | names(nssec)[1:10] 5 | names(nssec) <- gsub(pattern="Age...Age.16.to.74...NS.SeC..National.Statistics.Socio.economic.Classification....", replacement="", 6 | names(nssec)) 7 | names(nssec) 8 | Other <- rowSums(nssec[56:60]) 9 | plot(colSums(nssec[7:20])) 10 | (sel <- grep("^[0-9]", names(nssec))) 11 | nssec <- nssec[sel] 12 | 13 | # clean up column names 14 | library(stringr) 15 | names(nssec) <- str_split_fixed(names(nssec), "\\.[A-Z]", 2)[,1] 16 | names(nssec) <- gsub("\\.$", "", names(nssec)) 17 | head(nssec) 18 | 19 | # remove "1" category, add Other 20 | nssec[1] <- NULL 21 | nssec <- cbind(nssec, Other) 22 | head(nssec) 23 | write.csv(nssec, "cakeMap/con3.csv", row.names = F) 24 | 25 | -------------------------------------------------------------------------------- /data/CakeMap/wards.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/data/CakeMap/wards.RData -------------------------------------------------------------------------------- /data/SimpleWorld/age.csv: -------------------------------------------------------------------------------- 1 | "a0.49","a.50+" 2 | 8,4 3 | 2,8 4 | 7,4 5 | -------------------------------------------------------------------------------- /data/SimpleWorld/ind-full.csv: -------------------------------------------------------------------------------- 1 | "id","age","sex","income" 2 | 1,59,"m",2868 3 | 2,54,"m",2474 4 | 3,35,"m",2231 5 | 4,73,"f",3152 6 | 5,49,"f",2473 -------------------------------------------------------------------------------- /data/SimpleWorld/ind.csv: -------------------------------------------------------------------------------- 1 | "id","age","sex" 2 | 1,59,"m" 3 | 2,54,"m" 4 | 3,35,"m" 5 | 4,73,"f" 6 | 5,49,"f" 7 | -------------------------------------------------------------------------------- /data/SimpleWorld/sex.csv: -------------------------------------------------------------------------------- 1 | "m","f" 2 | 6,6 3 | 4,6 4 | 3,8 5 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit -o nounset 3 | BASE_REPO=$PWD 4 | 5 | update_website() { 6 | cd ..; mkdir gh-pages; cd gh-pages 7 | git init 8 | git config user.name "Robin Lovelace" 9 | git config user.email "rob00x@gmail.com" 10 | git config --global push.default simple 11 | git remote add upstream "https://$GH_TOKEN@github.com/Robinlovelace/spatial-microsim-book.git" 12 | git fetch upstream 2>err.txt 13 | git checkout gh-pages 14 | 15 | cp -fvr $BASE_REPO/_book/* . 16 | git add *.html; git add libs/; git add figures/ 17 | git add _main_files/*; git add *.json 18 | git commit -a -m "Updating book (${TRAVIS_BUILD_NUMBER})" 19 | git status 20 | git push 2>err.txt 21 | cd .. 22 | } 23 | 24 | update_website -------------------------------------------------------------------------------- /elsevier-harvard.csl: -------------------------------------------------------------------------------- 1 | 2 | 240 | -------------------------------------------------------------------------------- /figures/Belgium/BadSize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/BadSize.png -------------------------------------------------------------------------------- /figures/Belgium/CM_ENF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/CM_ENF.png -------------------------------------------------------------------------------- /figures/Belgium/Couples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/Couples.png -------------------------------------------------------------------------------- /figures/Belgium/NonAssigne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/NonAssigne.png -------------------------------------------------------------------------------- /figures/Belgium/diplome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/diplome.png -------------------------------------------------------------------------------- /figures/Belgium/diplome_statut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/diplome_statut.png -------------------------------------------------------------------------------- /figures/Belgium/statut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Belgium/statut.png -------------------------------------------------------------------------------- /figures/CakeMap-lores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/CakeMap-lores.png -------------------------------------------------------------------------------- /figures/Couple_SE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Couple_SE.png -------------------------------------------------------------------------------- /figures/HH-CO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/HH-CO.png -------------------------------------------------------------------------------- /figures/HHCouplesBelgium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/HHCouplesBelgium.png -------------------------------------------------------------------------------- /figures/HHCouplesNamur.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/HHCouplesNamur.jpg -------------------------------------------------------------------------------- /figures/IllustrationCouples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/IllustrationCouples.png -------------------------------------------------------------------------------- /figures/Jojo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Jojo.png -------------------------------------------------------------------------------- /figures/Jojo_JASS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Jojo_JASS.png -------------------------------------------------------------------------------- /figures/Jojo_JASS2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Jojo_JASS2.png -------------------------------------------------------------------------------- /figures/RandomUnif100000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/RandomUnif100000.png -------------------------------------------------------------------------------- /figures/TAEOptim_GenSA_Mo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TAEOptim_GenSA_Mo.pdf -------------------------------------------------------------------------------- /figures/TAEOptim_GenSA_Mo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TAEOptim_GenSA_Mo.png -------------------------------------------------------------------------------- /figures/TRESISModels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TRESISModels.png -------------------------------------------------------------------------------- /figures/TimeCakeMap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TimeCakeMap.png -------------------------------------------------------------------------------- /figures/TimeOptim_GenSA_Mo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TimeOptim_GenSA_Mo.pdf -------------------------------------------------------------------------------- /figures/TimeOptim_GenSA_Mo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/TimeOptim_GenSA_Mo.png -------------------------------------------------------------------------------- /figures/Trafic_Jojo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Trafic_Jojo2.png -------------------------------------------------------------------------------- /figures/Trafic_jojo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/Trafic_jojo.png -------------------------------------------------------------------------------- /figures/agri-example-hynes-2008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/agri-example-hynes-2008.png -------------------------------------------------------------------------------- /figures/austerity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/austerity.png -------------------------------------------------------------------------------- /figures/co-vs-ipf-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/co-vs-ipf-schema.png -------------------------------------------------------------------------------- /figures/cover-image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/cover-image.jpg -------------------------------------------------------------------------------- /figures/fit-obs-sim-simple-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/fit-obs-sim-simple-5.png -------------------------------------------------------------------------------- /figures/fsimple1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/fsimple1.png -------------------------------------------------------------------------------- /figures/history01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/history01.png -------------------------------------------------------------------------------- /figures/incomeCake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/incomeCake.png -------------------------------------------------------------------------------- /figures/integerisation-algorithms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/integerisation-algorithms.png -------------------------------------------------------------------------------- /figures/jtg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/jtg.png -------------------------------------------------------------------------------- /figures/msim-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/msim-flow.png -------------------------------------------------------------------------------- /figures/msim-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/msim-schema.png -------------------------------------------------------------------------------- /figures/nl-chooser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-chooser.png -------------------------------------------------------------------------------- /figures/nl-graphics-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-graphics-window.png -------------------------------------------------------------------------------- /figures/nl-income-boxplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-income-boxplots.png -------------------------------------------------------------------------------- /figures/nl-plots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-plots.png -------------------------------------------------------------------------------- /figures/nl-simpleworld-negotiating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-simpleworld-negotiating.png -------------------------------------------------------------------------------- /figures/nl-simpleworld-populated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-simpleworld-populated.png -------------------------------------------------------------------------------- /figures/nl-simpleworld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-simpleworld.png -------------------------------------------------------------------------------- /figures/nl-sliders.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-sliders.png -------------------------------------------------------------------------------- /figures/nl-ticks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-ticks.png -------------------------------------------------------------------------------- /figures/nl-zones.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/nl-zones.png -------------------------------------------------------------------------------- /figures/optim-its.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/optim-its.png -------------------------------------------------------------------------------- /figures/optim-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/optim-time.png -------------------------------------------------------------------------------- /figures/raw-data-screenshot.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/raw-data-screenshot.jpeg -------------------------------------------------------------------------------- /figures/rstudio-autocomplete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/rstudio-autocomplete.png -------------------------------------------------------------------------------- /figures/rstudio-environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/rstudio-environment.png -------------------------------------------------------------------------------- /figures/simPop-results-eg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/simPop-results-eg.png -------------------------------------------------------------------------------- /figures/simpleworld-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/simpleworld-1.png -------------------------------------------------------------------------------- /figures/studio-basic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/studio-basic.png -------------------------------------------------------------------------------- /figures/vingtile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/vingtile.png -------------------------------------------------------------------------------- /figures/why-msim-maup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/figures/why-msim-maup.png -------------------------------------------------------------------------------- /fractional_weights/BA-MakeCakeSimFractional.R: -------------------------------------------------------------------------------- 1 | ############################################ 2 | #### From the spatial-microsim-book project 3 | #### https://github.com/Robinlovelace/spatial-microsim-book 4 | ############################################ 5 | 6 | # Additions from Ben Anderson (@dataknut) 7 | # clear out all old objects etc to avoid confusion 8 | rm(list = ls()) 9 | 10 | # Loading the data: Ensure R is in the right working directory 11 | ind <- read.csv("../data/CakeMap/ind.csv") 12 | cons <- read.csv("../data/CakeMap/cons.csv") 13 | 14 | # Take a quick look at the data 15 | head(ind) 16 | head(cons) 17 | 18 | # load constraints separately - normally this would be first stage 19 | con1 <- cons[1:12] # load the age/sex constraint 20 | con2 <- cons[13:14] # load the car/no car constraint 21 | con3 <- cons[15:24] # socio-economic class 22 | 23 | cat_labs <- names(cons) # category names, from correct from cons.R 24 | 25 | # set-up aggregate values - column for each category 26 | source("../data/CakeMap/categorise.R") # this script must be customised to input data 27 | 28 | # check constraint totals - should be true 29 | sum(ind_cat[,1:ncol(con1)]) == nrow(ind) # is the number in each category correct? 30 | sum(ind_cat[,ncol(con1)+1:ncol(con2)]) == nrow(ind) 31 | 32 | # create 2D weight matrix (individuals, areas) 33 | weights <- array(NA, dim=c(nrow(ind),nrow(cons))) 34 | 35 | # convert survey data into aggregates to compare with census (3D matix) 36 | ind_agg <- matrix(colSums(ind_cat), nrow(cons), ncol(cons), byrow = T) 37 | ind_agg[1:5,1:10] # look at what we've created - n. individuals replicated throughout 38 | 39 | ############## The IPF part ############# 40 | # make sure you have this package 41 | library(ipfp) 42 | cons <- apply(cons, 2, as.numeric) 43 | ind_catt <- t(ind_cat) 44 | # set up initial vector as a load of 1s 45 | x0 <- rep(1, nrow(ind)) 46 | # you can use x0 as a way to start from the original survey weights 47 | # as it just has to be a numeric initial vector (length ncol) 48 | # this might be useful if you have a small number of constraints but 49 | # if you have many the effect of the IPF will tend to drown them out 50 | 51 | # now loop over the zones and save ipfp results to weights 52 | for(i in 1:ncol(weights)){ 53 | weights[,i] <- ipfp(cons[i,], ind_catt, x0, maxit = 20) 54 | } 55 | 56 | ### Convert back to aggregates for testing 57 | for (i in 1:nrow(cons)){ # convert con1 weights back into aggregates 58 | ind_agg[i,] <- colSums(ind_cat * weights[,i]) 59 | } 60 | 61 | # test results for first row (not necessary for model) 62 | # you could iterate over this to test each zone 63 | ind_agg[1,1:15] - cons[1,1:15] # should be zero for final column - last constraint 64 | # which should remind us that IPF works to an order - so the last constraint is 65 | # fitted perfectly. This might matter if you think other constraints should be fitted perfectly... 66 | 67 | # Test correlations between original constraints and new aggregates 68 | cor(as.numeric(ind_agg), as.numeric(cons)) # fit between contraints and estimate 69 | # Might be worth then testing zone by zone 70 | # save the results into corr_by_zone_res 71 | corr_by_zone_res <- NULL 72 | for (i in 1:nrow(cons)){ 73 | corr_by_zone_res[i] <- cor(as.numeric(ind_agg[i,]), as.numeric(cons[i,])) 74 | } 75 | # look at range of zone by zone correlations 76 | range(corr_by_zone_res) 77 | 78 | # at this point RL wants to integerise to create a spatial microdataset of whole 'units' 79 | # But we don't have to - for many applications we may want to keep all the survey units (people or households) 80 | # with their fractional weights to avoid losing information. It also helps if we're interested in distributional 81 | # statistics for each area. 82 | 83 | # to do this simply reshape the weights so that each row is 1 individual per zone with weight 84 | 85 | # make weights a dataframe first 86 | weights_df <- as.data.frame(weights) 87 | # reshape it (needs 'stats' package) 88 | weights_l <- reshape(weights_df, direction = "long", varying = names(weights_df), sep = "") 89 | # fix the variable names after doing this 90 | names(weights_l)[names(weights_l) == "time"] <- "zone" # why can't this work directly on the name?! 91 | names(weights_l)[names(weights_l) == "V"] <- "weight" 92 | names(weights_l) 93 | 94 | # now do the internal join to match the indivudal level data to the long form file 95 | # create id variable in ind (assumes final order = same!) 96 | ind$id <- 1:nrow(ind) 97 | # make sure you have this package 98 | library(dplyr) 99 | final_micro <- inner_join(weights_l,ind, by = "id") 100 | 101 | # check 102 | names(final_micro) 103 | nrow(final_micro) 104 | # notice how many fewer rows there are than in the original CakeMap.R version 105 | # - we have not needed to duplicate individuals as we are keeping the fractional weights 106 | 107 | # now let's add the geonames 108 | # get geo names 109 | geonames <- read.csv("../data/CakeMap/cars-raw.csv") 110 | geonames_df <- as.data.frame(geonames[3:126,2]) 111 | # create a zoneid 112 | geonames_df$zone <- 1:nrow(geonames_df) 113 | names(geonames_df)[1] <- "zone_name" 114 | 115 | final_micro_geo <- inner_join(geonames_df,final_micro, by = "zone") 116 | 117 | # so now we have our final long form synthetic fractional weights microdata table 118 | # with n * z rows where n = nrow(ind) and z = nrow(cons). 119 | # This is in contrast to the integerised version where we would have the 120 | # sum of npop(zi) where npop(zi) is the population for each zone 121 | # This would be a much larger file... 122 | 123 | # Test the results!! 124 | 125 | # change nssec8 to numeric 126 | final_micro_geo$NSSEC8n <- as.numeric(final_micro_geo$NSSEC8) 127 | summary(final_micro_geo$NSSEC8n) 128 | 129 | # careful, 97 = unset 130 | final_micro_geo$NSSEC8n[final_micro_geo$NSSEC8n > 10] <- NA 131 | summary(final_micro_geo$NSSEC8n) 132 | # use na.rm to ignore them 133 | # overall mean 134 | mean(final_micro_geo$NSSEC8n, na.rm = TRUE) 135 | # weighted mean - to show the difference 136 | weighted.mean(final_micro_geo$NSSEC8n, w = final_micro_geo$weight, na.rm = TRUE) 137 | 138 | # mean by zone - this fails claiming x and w are different lengths, why? 139 | aggregate(final_micro_geo$NSSEC8n, by= list(final_micro_geo$zone), FUN = weighted.mean, w = final_micro_geo$weight, na.rm = TRUE) 140 | 141 | # so for now, let's save the file out and do the stats in STATA!! 142 | write.csv(final_micro_geo, file = "final_micro_fractional_cakes_geo.csv", na = ".") 143 | 144 | # now read the summary of cakes by zone (created in STATA) back in 145 | cakes_by_zone <- read.csv("cakes_geo.csv") 146 | 147 | # and do the R mapping thing... 148 | # to do -------------------------------------------------------------------------------- /fractional_weights/BA-process-final_micro_fractional_cakes_geo.do: -------------------------------------------------------------------------------- 1 | * script to read in results of R spatial microsim using ipf and run weighted stats 2 | * in theory this should also be done in R when I work out how! 3 | 4 | * change this to your path! 5 | local where "/Users/ben/Documents/Work/Papers and Conferences/spatial-microsim-r-course" 6 | local path "`where'/spatial-microsim-book-git/fractional_weights" 7 | 8 | insheet using "`path'/final_micro_fractional_cakes_geo.csv", clear 9 | 10 | * create 'dummy' variables ready to collapse to weighted frequencies 11 | gen NCakes_rare = 0 12 | replace NCakes_rare = 1 if ncakes == "rarely" 13 | 14 | gen NCakes_l1 = 0 15 | replace NCakes_l1 = 1 if ncakes == "<1" 16 | 17 | gen NCakes_1_2 = 0 18 | replace NCakes_1_2 = 1 if ncakes == "1-2" 19 | 20 | gen NCakes_3_5 = 0 21 | replace NCakes_3_5 = 1 if ncakes == "3-5" 22 | 23 | gen NCakes_6m = 0 24 | replace NCakes_6m = 1 if ncakes == "6+" 25 | 26 | * keep the data in memory 27 | * immediately obvious = stata's inability to hold 2 or more datasets at a time! 28 | preserve 29 | * collapse cakes by zone 30 | * this is what the weighted agregate in R should do 31 | collapse (sum) NC* [iw=weight], by(zone_name) 32 | 33 | * list first 5 lines as a check 34 | li in 1/5 35 | 36 | * save the results so we can add to a map 37 | outsheet using "`path'/cakes_geo.csv", comma replace 38 | 39 | * put the data back so we can do other stuff 40 | restore 41 | -------------------------------------------------------------------------------- /fractional_weights/README.md: -------------------------------------------------------------------------------- 1 | spatial-microsim-book 2 | ===================== 3 | 4 | This is a variant on the CakeMap.R code which does not integerise the weights in order to select whole units but keeps the fractional weights and creates a long form data table with these weights attached to the individual cases. 5 | 6 | As I have not (yet) worked out how to do a weighted 'aggregate' in R the script then outputs the table as a .csv file & I use the included STATA script to calculate the number of people in each cake category in each zone. This in turn outputs this result as a .csv file to be read back into R for mapping etc (to do!) 7 | 8 | Comments welcome: dataknut@icloud.com 9 | 10 | -------------------------------------------------------------------------------- /fractional_weights/cakes_geo.csv: -------------------------------------------------------------------------------- 1 | zone_name,NCakes_rare,NCakes_l1,NCakes_1_2,NCakes_3_5,NCakes_6m 2 | "E05001341",788.7496,1083.087,3243.491,3429.721,2799.951 3 | "E05001342",1027.168,1281.698,3895.244,4122.652,3094.239 4 | "E05001343",926.5318,1259.879,3818.923,3970.354,3155.313 5 | "E05001344",1015.429,1121.801,3473.085,3503.431,2353.254 6 | "E05001345",1507.246,1394.03,4242.255,4123.901,2343.568 7 | "E05001346",1380.517,1343.121,4190.762,4384.934,2214.666 8 | "E05001347",2030.488,1468.18,4808.129,7922.088,1681.115 9 | "E05001348",1040.052,1174.773,3562.537,3456.14,2339.497 10 | "E05001349",816.2757,1131.446,3404.589,3583.401,3008.289 11 | "E05001350",1218.7,1283.018,3732.672,3711.147,2595.464 12 | "E05001351",1149.97,1176.715,3748.55,3618.266,2198.5 13 | "E05001352",934.4416,1072.336,3559.614,3634.233,2053.375 14 | "E05001353",918.9893,1186.807,3493.203,3713.727,2820.274 15 | "E05001354",677.4254,946.1523,2857.008,3029.763,2408.652 16 | "E05001355",1196.71,1153.158,3749.179,3666.576,2222.376 17 | "E05001356",995.5418,1169.456,3581.889,3590.43,2678.683 18 | "E05001357",1153.808,1217.382,3498.346,3426.038,2480.425 19 | "E05001358",1552.208,1416.026,4306.451,4253.147,2297.168 20 | "E05001359",1453.772,1287.276,4104.586,4190.749,2142.617 21 | "E05001360",933.625,1183.684,3518.908,3702.542,2699.241 22 | "E05001361",1152.941,1250.887,3623.927,3576.16,2530.086 23 | "E05001362",1009.628,1127.131,3388.394,3515.785,2425.061 24 | "E05001363",1074.221,1231.575,3692.031,3627.587,2620.585 25 | "E05001364",1154.604,1224.759,4046.874,4454.391,2258.372 26 | "E05001365",1541.816,1471.868,4326.89,4129.095,2733.33 27 | "E05001366",520.4231,795.0496,2368.377,2542.555,2079.596 28 | "E05001367",996.3626,1060.91,3113.027,3039.665,2172.035 29 | "E05001368",1095.407,1205.545,3487.233,3506.009,2526.805 30 | "E05001369",772.9461,1044.935,3116.72,3253.079,2658.319 31 | "E05001370",957.0417,1096.865,3265.335,3303.233,2495.525 32 | "E05001371",727.1841,822.8037,2390.928,2451.593,1899.492 33 | "E05001372",726.9758,899.3874,2644.114,2576.089,2060.434 34 | "E05001373",754.8888,855.8646,2503.203,2535.555,1924.489 35 | "E05001374",609.1226,823.1774,2459.433,2564.737,2039.53 36 | "E05001375",596.7382,796.1556,2374.542,2504.5,2023.064 37 | "E05001376",877.9653,949.258,2757.127,2693.096,1988.554 38 | "E05001377",599.7622,766.4562,2313.686,2342.798,1861.298 39 | "E05001378",613.9335,826.2873,2452.079,2595.25,2111.45 40 | "E05001379",935.9994,931.5789,2672.054,2592.041,1793.327 41 | "E05001380",1067.006,1008.174,3221.914,3195.288,1721.617 42 | "E05001381",698.3071,829.3501,2410.238,2472.328,1946.777 43 | "E05001382",614.3989,818.204,2450.78,2530.658,2008.959 44 | "E05001383",697.1795,873.6677,2687.298,2830.359,2036.496 45 | "E05001384",796.624,878.3806,2552.94,2635.454,1963.601 46 | "E05001385",793.0138,904.8457,2651.938,2598.89,1993.312 47 | "E05001386",951.2349,914.8583,2627.314,2648.848,1917.744 48 | "E05001387",773.8481,866.4239,2580.685,2541.224,1875.819 49 | "E05001389",1394.413,1398.818,4296.884,4111.389,2713.496 50 | "E05001390",1247.139,1275.695,4178.287,4087.808,2647.071 51 | "E05001391",1193.686,1314.861,4133.524,4100.956,2884.973 52 | "E05001392",960.0002,1168.988,3475.47,3581.839,2781.703 53 | "E05001393",1002.853,1210.083,3552.11,3700.761,2908.193 54 | "E05001396",1272.262,1291.213,3902.906,3857.708,2608.912 55 | "E05001397",813.4417,1146.423,3412.729,3634.575,3083.831 56 | "E05001398",1295.28,1344.619,4062.289,4091.768,2941.043 57 | "E05001399",1171.867,1242.268,4124.636,4039.748,2584.48 58 | "E05001400",1317.53,1278.765,4118.64,4073.498,2508.567 59 | "E05001401",1149.252,1331.316,3937.425,4073.791,2977.216 60 | "E05001402",1489.53,1350.304,4531.379,4793.342,2444.445 61 | "E05001403",1028.351,1187.217,3686.912,3721.417,2682.103 62 | "E05001405",933.0265,1335.367,4017.342,4198.878,3454.386 63 | "E05001407",1118.539,1365.281,4105.374,4207.638,3137.168 64 | "E05001408",1157.592,1412.459,4103.268,4253.207,3314.474 65 | "E05001409",1096.099,1379.314,4121.874,4285.684,3448.028 66 | "E05001410",1685.685,1369.134,4729.579,5844.794,1991.809 67 | "E05001411",944.7602,1309.646,4059.667,4248.011,3393.915 68 | "E05001412",1182,1535.306,4872.22,5053.163,3637.31 69 | "E05001413",1189.969,1611.405,4798.66,5072.652,3861.313 70 | "E05001414",2081.399,1912.993,5635.011,5782.487,3739.109 71 | "E05001415",1847.153,1664.445,4807.342,4662.09,3016.97 72 | "E05001416",1484.9,1613.027,4606.061,4681.025,3258.986 73 | "E05001417",2361.826,1946.151,5593.217,5256.458,2920.347 74 | "E05001418",1200.609,1592.462,4818.543,5161.541,3903.844 75 | "E05001419",1846.276,1750.126,5303.719,5703.543,3050.336 76 | "E05001420",2877.968,2201.887,7107.304,13815.4,2597.437 77 | "E05001421",1427.939,1629.767,4631.379,4708.701,3530.215 78 | "E05001422",1642.543,1801.259,5171.37,5302.86,3802.968 79 | "E05001423",1076.644,1419.961,4119.665,4357.1,3486.629 80 | "E05001424",2237.867,2005.265,5627.167,5365.581,2919.119 81 | "E05001425",1105.931,1564.565,4645.131,5024.538,3931.835 82 | "E05001426",765.8055,1202.659,3669.349,3952.576,3378.611 83 | "E05001427",1097.612,648.4695,2751.216,14550.66,445.0432 84 | "E05001428",1089.845,1467.317,4583.197,5055.402,3525.24 85 | "E05001429",2558.201,1153.403,4643.45,13661.53,983.421 86 | "E05001430",1848.058,1804.51,5024.786,4767.17,3201.475 87 | "E05001431",1162.698,1501.475,4350.934,4631.97,3712.923 88 | "E05001432",1645.859,1536.871,4944.128,7054.947,2258.196 89 | "E05001433",1999.059,1985.569,5578.113,5434.126,3653.133 90 | "E05001434",1244.736,1578.177,4960.665,5260.906,3586.516 91 | "E05001435",1290.863,1641.593,4818.303,5115.008,4023.233 92 | "E05001436",1407.143,1639.443,4671.05,4937.873,3666.492 93 | "E05001437",1250.437,1586.9,4588.601,4764.63,3830.432 94 | "E05001438",1380.138,1643.632,4693.117,4864.599,3696.514 95 | "E05001439",1162.646,1468.913,4239.851,4441.493,3474.097 96 | "E05001440",1194.214,1508.015,4809.567,5029.8,3416.404 97 | "E05001441",1398.875,1552.783,4577.783,4594.58,3381.979 98 | "E05001442",1325.444,1443.412,4939.498,7159.809,2469.836 99 | "E05001443",1013.459,1343.515,4158.765,4491.534,3670.727 100 | "E05001444",926.3325,1122.747,3437.887,3512.31,2894.723 101 | "E05001445",1073.639,1079.739,3128.049,3009.876,2318.697 102 | "E05001446",1061.056,1186.51,3474.851,3606.515,2801.068 103 | "E05001447",1077.493,1174.029,3325.919,3456.62,2698.938 104 | "E05001448",885.837,1089.883,3243.205,3334.574,2701.501 105 | "E05001449",1099.474,1174.684,3417.173,3358.599,2694.071 106 | "E05001450",1090.877,1112.836,3352.628,3250.854,2510.805 107 | "E05001451",880.2195,1086.064,3183.007,3290.655,2662.054 108 | "E05001452",982.6305,1025.696,2903.444,2954.476,2290.754 109 | "E05001453",1119.731,1202.496,3518.786,3523.439,2685.548 110 | "E05001454",958.7019,1173.183,3381.786,3510.419,2840.91 111 | "E05001455",1049.057,1195.917,3364.1,3460.112,2708.814 112 | "E05001456",929.9057,1077.278,3123.703,3133.324,2536.788 113 | "E05001457",1268.862,1241.734,3795.465,3684.272,2858.667 114 | "E05001458",884.1049,1119.493,3333.133,3506.355,2760.913 115 | "E05001459",1227.668,1137.13,3502.168,3360.99,2299.044 116 | "E05001460",1259.151,1160.479,3542.685,3503.871,2548.814 117 | "E05001461",890.6943,1240.08,3709.46,3858.709,3218.056 118 | "E05001462",843.2431,943.4784,2903.69,2935.147,2221.441 119 | "E05001463",1116.767,1131.625,3318.236,3164.9,2293.471 120 | "E05001464",828.2547,1055.243,3101.471,3193.602,2590.429 121 | "E05008558",1114.753,1319.397,3994.055,4080.792,2901.003 122 | "E05008559",1013.427,1240.989,3726.994,3875.042,3076.547 123 | "E05008560",1315.003,1291.036,4159.125,3941.103,2544.733 124 | "E05008561",894.7399,1174.829,3489.584,3636.375,2909.472 125 | "E05008562",745.7589,1148.107,3708.319,4179.33,2579.484 126 | -------------------------------------------------------------------------------- /frontmatter/pream.tex: -------------------------------------------------------------------------------- 1 | \documentclass[krantz1,ChapterTOCs]{krantz} 2 | 3 | % settings from RStudio 4 | \usepackage[T1]{fontenc} 5 | \usepackage{lmodern} 6 | \usepackage{amssymb,amsmath} 7 | \usepackage{ifxetex,ifluatex} 8 | \usepackage{fixltx2e} % provides \textsubscript 9 | % use upquote if available, for straight quotes in verbatim environments 10 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{} 11 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 12 | \usepackage[utf8]{inputenc} 13 | \else % if luatex or xelatex 14 | \ifxetex 15 | \usepackage{mathspec} 16 | \usepackage{xltxtra,xunicode} 17 | \else 18 | \usepackage{fontspec} 19 | \fi 20 | \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} 21 | \newcommand{\euro}{€} 22 | \fi 23 | % use microtype if available 24 | \IfFileExists{microtype.sty}{\usepackage{microtype}}{} 25 | \usepackage{color} 26 | \usepackage{fancyvrb} 27 | \newcommand{\VerbBar}{|} 28 | \newcommand{\VERB}{\Verb[commandchars=\\\{\}]} 29 | \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} 30 | % Add ',fontsize=\small' for more characters per line 31 | \usepackage{framed} 32 | \definecolor{shadecolor}{RGB}{248,248,248} 33 | \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}} 34 | \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{{#1}}}} 35 | \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{{#1}}} 36 | \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{{#1}}} 37 | \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{{#1}}} 38 | \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{{#1}}} 39 | \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{{#1}}} 40 | \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{{#1}}} 41 | \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{{#1}}}} 42 | \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{{#1}}} 43 | \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{{#1}}} 44 | \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{{#1}}} 45 | \newcommand{\RegionMarkerTok}[1]{{#1}} 46 | \newcommand{\ErrorTok}[1]{\textbf{{#1}}} 47 | \newcommand{\NormalTok}[1]{{#1}} 48 | \usepackage{longtable,booktabs} 49 | \usepackage{graphicx} 50 | % Redefine \includegraphics so that, unless explicit options are 51 | % given, the image width will not exceed the width of the page. 52 | % Images get their normal width if they fit onto the page, but 53 | % are scaled down if they would overflow the margins. 54 | \makeatletter 55 | \def\ScaleIfNeeded{% 56 | \ifdim\Gin@nat@width>\linewidth 57 | \linewidth 58 | \else 59 | \Gin@nat@width 60 | \fi 61 | } 62 | \makeatother 63 | \let\Oldincludegraphics\includegraphics 64 | {% 65 | \catcode`\@=11\relax% 66 | \gdef\includegraphics{\@ifnextchar[{\Oldincludegraphics}{\Oldincludegraphics[width=\ScaleIfNeeded]}}% 67 | }% 68 | 69 | \setlength{\parindent}{0pt} 70 | \setlength{\parskip}{6pt plus 2pt minus 1pt} 71 | \setlength{\emergencystretch}{3em} % prevent overfull lines 72 | 73 | % % Hadley's hacks 74 | \usepackage[hyphens]{url} 75 | \usepackage[setpagesize=false, % page size defined by xetex 76 | unicode=false, % unicode breaks when used with xetex 77 | % xetex, 78 | hidelinks]{hyperref} 79 | % Place links as footnotes 80 | \renewcommand{\href}[2]{#2 (\url{#1})} 81 | % Use ref for internal links 82 | \renewcommand{\hyperref}[2][???]{\autoref{#1}} 83 | \def\chapterautorefname{Chapter} 84 | \def\sectionautorefname{Section} 85 | \def\subsectionautorefname{Section} 86 | \def\subsubsectionautorefname{Section} 87 | 88 | % Krantz example 89 | 90 | \usepackage{fixltx2e,fix-cm} 91 | \usepackage{amssymb} 92 | \usepackage{amsmath} 93 | \usepackage{graphicx} 94 | \usepackage{subfigure} 95 | \usepackage{makeidx} 96 | \usepackage{multicol} 97 | \usepackage{cleveref} 98 | 99 | \frenchspacing 100 | \tolerance=5000 101 | 102 | \makeindex 103 | 104 | \include{frontmatter/preamble} %place custom commands and macros here 105 | 106 | \begin{document} 107 | 108 | \frontmatter 109 | 110 | \title{Spatial Microsimulation with R} %This is a placeholder titlepage, 111 | \author{Robin Lovelace and Morgane Dumont} 112 | \maketitle 113 | 114 | % \include{frontmatter/dedication} 115 | \cleardoublepage 116 | \setcounter{page}{7} %previous pages will be reserved for frontmatter to be added in later. 117 | \tableofcontents 118 | % \include{frontmatter/foreword} 119 | \include{frontmatter/preface} 120 | \listoffigures 121 | \listoftables 122 | % \include{frontmatter/contributor} 123 | % \include{frontmatter/symbollist} 124 | 125 | \mainmatter 126 | -------------------------------------------------------------------------------- /index.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Spatial Microsimulation with R' 3 | author: 'Robin Lovelace and Morgane Dumont' 4 | date: '`r Sys.Date()`' 5 | site: "bookdown::bookdown_site" 6 | # rmd_files: ["index.Rmd", "01-introduction.Rmd"] 7 | output: 8 | bookdown::gitbook: default 9 | documentclass: book 10 | link-citations: yes 11 | biblio-style: apalike 12 | github-repo: Robinlovelace/spatial-microsim-book 13 | url: 'https\://spatial-microsim-book.robinlovelace.net' 14 | twitter-handle: robinlovelace 15 | cover-image: figures/cover-image.jpg 16 | description: "Learn what how to model systems at individual to areal levels and discover how to do spatial microsimulation at in a reproducible manner using high performance, open source software." 17 | bibliography: bibliography.bib 18 | --- 19 | 20 | # Welcome {-} 21 | 22 | Welcome to the online home *Spatial Microsimulation with R*. 23 | 24 | This is a book by [Robin Lovelace](http://robinlovelace.net/) and [Morgane Dumont](https://directory.unamur.be/staff/modumont) (with chapter [10](http://spatial-microsim-book.robinlovelace.net/ha.html) contributed by [Johan Barthélemy](https://smart.uow.edu.au/people/UOW192467.html), chapter [11](http://spatial-microsim-book.robinlovelace.net/tresis.html) contributed by [Richard Ellison](http://sydney.edu.au/business/staff/richard.ellison) and [David Hensher](http://sydney.edu.au/business/staff/david.hensher) and chapter [12](http://spatial-microsim-book.robinlovelace.net/abm) contributed by [Maja Založnik](https://www.oxfordmartin.ox.ac.uk/people/565)). 25 | 26 | [![](https://images.tandf.co.uk/common/jackets/amazon/978149871/9781498711548.jpg)](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548) 27 | 28 | It is published by CRC Press. See their [online store](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/p/book/9781498711548) if you'd like to buy a copy. 29 | If you'd like to crack on and use the content, feel free to browse the chapters via the drop-down menu on the left. 30 | Depending on your interests and level of experience we particularly recommend: 31 | 32 | - Chapter [1](http://spatial-microsim-book.robinlovelace.net/intro.html): what is spatial microsimulation and what is it good for? 33 | - Chapter [2](http://spatial-microsim-book.robinlovelace.net/simpleworld): if you'd like to see a very simple worked-example of the process in action. 34 | - Chapter [11](http://spatial-microsim-book.robinlovelace.net/tresis.html): if you're interested in more advanced applications applicable to transport modelling 35 | - Chapter [12](http://spatial-microsim-book.robinlovelace.net/abm): for the links between spatial microsimulation and agent-based modelling. 36 | 37 | Chapters [3](http://spatial-microsim-book.robinlovelace.net/what-is.html) to [9](http://spatial-microsim-book.robinlovelace.net/nomicrodata.html) explain, with reference to reproducible code 'chunks' embedded in the text, how to generate spatial microdata, with or without a sample population. 38 | Chapter [10](http://spatial-microsim-book.robinlovelace.net/ha.html) demontrates how to add household-level variables. 39 | 40 | Furthermore, there are add-on chapters for beginners to R or the discipline. 41 | If you're completely new to R and programming in general, check-out the [appendix](http://spatial-microsim-book.robinlovelace.net/apr), which will get you up-to-speed quickly. 42 | There's also a [glossary](http://spatial-microsim-book.robinlovelace.net/glossary.html) that explains some of the jargon used in this field of research. 43 | 44 | We've put *Spatial Microsimulation with R* on-line because we want to reduce barriers to learning. 45 | We've made it open source via a [GitHub repository](https://github.com/Robinlovelace/spatial-microsim-book) because we believe in reproducibility and collaboration. 46 | Comments and suggests are most welcome [there](https://github.com/Robinlovelace/spatial-microsim-book/issues). 47 | If the content of the book helps your research, please cite it ([Lovelace and Dumont, 2016](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/sms-book-citation.bib)). 48 | 49 | ## Reference {-} 50 | 51 | Lovelace, R., Dumont, M., 2016. Spatial microsimulation with R. CRC Press. 52 | -------------------------------------------------------------------------------- /notes/BA-notes.md: -------------------------------------------------------------------------------- 1 | spatial-microsim-book/course 2 | ===================== 3 | 4 | Notes from: 5 | * An Introduction to Spatial Microsimulation using R 6 | * Dr Robin Lovelace and colleagues 7 | * Date: 18/09/2014 - 19/09/2014 8 | * Venue: Room S1,S044-01-0034, First Floor, Alison Richard Building, Sidgwick Site, University of Cambridge, Cambridge 9 | * http://www.ncrm.ac.uk/training/show.php?article=5088 10 | * Recommended pre-reading: http://eprints.ncrm.ac.uk/3348/ 11 | * See also https://github.com/Robinlovelace/spatial-microsim-book 12 | 13 | General course notes 14 | * Recent experience of speeding up R code a lot using a new package - ipfp - http://cran.r-project.org/web/packages/ipfp/index.html 15 | * this package is very fast, you can specify iterations & startng weights (if you wish) but not, seemingly, a stopping rule according to convergence (although investigate the tol = parameter as this seems to be the sum of squares of the difference between the original constraint vector and the current fitted constraint vector at iterstion i (so essentialy TAE), so could be used to control iterations) 16 | * Suggests looking at Flexible Modelling Framework (Harland) - alternative methods written in java 17 | * github vs BitBucket - latter can have private repositories 18 | * use .gitignore to stop uploading particular files e.g. data/ or .dta 19 | * Big shout for "Spatial Microsimulation: A Reference Guide for Users" http://www.springer.com/social+sciences/population+studies/book/978-94-007-4622-0 20 | * suggests looking at Advanced Spatial Data Analysis in R (Bivand et al) http://www.springer.com/statistics/life+sciences,+medicine+%26+health/book/978-1-4614-7617-7 21 | * Parallel processing 22 | * not worth using if datasets very small and you have a small number of cores as management overhead high 23 | * Assumptions 24 | * individual data is representative 25 | * target vars of interest -> some function of constraints (might not be) 26 | * correlation between contraints & target vars is constant over space 27 | * relationship between constraint distributions/tables is same at local (constraints) & national (individual) levels 28 | 29 | Other notes & conversations 30 | * Spatial MSM is one option for small area estimation see: "Evaluations and improvements in small area estimation methodologies" http://eprints.ncrm.ac.uk/3210/ 31 | * Belgian models - contact = Gijs Dekkers (https://www.linkedin.com/profile/view?id=164500929) chief editor of the International Journal of Microsimulation (http://www.microsimulation.org/IJM/IJM_editorial_board.htm). 32 | * open-source toolbox LIAM2 (http://liam2.plan.be), designed for the development of dynamic microsimulation models. 33 | * Sweden - whole popn model http://www.researchgate.net/publication/253561368_The_SVERIGE_Spatial_Microsimulation_Model 34 | * Papers on IPF: 35 | * Simpson, L., & Tranmer, M. (2005). Combining sample and census data in small area estimates: Iterative Proportional Fitting with standard software. The Professional Geographer, 57(2), 222–234. 36 | * Wong, D. (1992). The Reliability of Using the Iterative Proportional Fitting Procedure∗. The Professional Geographer. Retrieved from http://www.tandfonline.com/doi/abs/10.1111/j.0033-0124.1992.00340.x 37 | * Norman, P. (1999). Putting iterative proportional fitting on the researcher’s desk. Retrieved from http://eprints.whiterose.ac.uk/5029 38 | -------------------------------------------------------------------------------- /notes/L1.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatial Microsimulation with R: Lecture 1" 3 | author: "Robin Lovelace" 4 | date: "09/17/2014" 5 | output: ioslides_presentation 6 | --- 7 | 8 | ## Spatial Microsimulation with R 9 | 10 | Aims: 11 | 12 | 1. To provide a solid understanding of the method and applications 13 | 2. To teach its implementation in R in general terms 14 | 3. To provide guidance on next steps 15 | 16 | ## Introduction 17 | 18 | - Housekeeping 19 | - About the course and its teachers 20 | - Lectures and practicals 21 | - Getting help 22 | 23 | ## This morning's agenda 24 | 25 | **9:30 - 11:00** 26 | 27 | - Lecture: what is spatial microsimulation? 28 | - Getting used to working with RStudio (and GitHub) 29 | - Demonstration of what we'll be working on 30 | - Loading the input data (Chapter 3) 31 | 32 | *Refreshments: 11 - 11:15* 33 | 34 | **11:15 - 1:00** 35 | 36 | - Working through Chapter 3 and 4 37 | - Performance 38 | - (Parallel processing in R) 39 | 40 | ## This afternoon 41 | 42 | **1:30 - 2:45** 43 | 44 | - Finishing up and questions about SimpleWorld 45 | - Lecture: Introduction to spatial microsimulation in the wild 46 | - Cleaning messy input data for spatial microsimulation (Chapter 5) 47 | 48 | **3 - 4:30** 49 | 50 | - Performing IPF on CakeMap Data (5.2) 51 | - Description and demonstration of integerisation (5.3) 52 | - Re-cap and questions on key concepts 53 | 54 | ## Tomorrow 55 | 56 | **9:30 - 11** 57 | 58 | - Demonstration analysis of CakeMap data 59 | - Model checking and validation 60 | 61 | **11:15 - 1:30** 62 | 63 | - Visualisations 64 | - Lecture: next steps 65 | - Applying the methods to your data 66 | 67 | 68 | ## The course materials 69 | 70 | - Major update of course materials from May 71 | - New improved code is much faster 72 | - And easier to write 73 | - Booklet -> Book 74 | 75 | ## What is spatial microsimulation? 76 | 77 | 1. A method 78 | 2. An approach 79 | 80 | ## Applications 81 | 82 | - Wide variety of potential applications 83 | - So far main applications have been in health, poverty mapping and transport 84 | - What do you want to use spatial microsimulation for? 85 | - Tomintz et al. (2008). The geography of smoking in Leeds: estimating individual smoking rates and the implications for the location of stop smoking services. Area, 40(3), 341–353. 86 | - Gleeson (2014) 87 | - My research 88 | 89 | 90 | ## R 91 | 92 | - Powerful *command-line interface* 93 | - Fast - if you know how 94 | - Steep learning curve but lots of help available 95 | 96 | ## A demonstration of R and RStudio 97 | 98 | - Creating, modifying and subsetting datasets 99 | - Functions 100 | - Features of RStudio 101 | 102 | # Demonstration of GitHub 103 | 104 | # Working through Chapter 3 105 | 106 | ## Day 2 107 | 108 | - Parallel processing in R for fast/Big applications 109 | - Exploring the results of spatial microsimulation 110 | 111 | Refreshments: 11:00 112 | 113 | - Discussion of limitations and underlying assumptions of spatial microsimulation 114 | - Applying the methods to your own data 115 | 116 | # Parallel processing in R for fast/big microsimulation 117 | 118 | # Exploring the results of spatial microsimulation 119 | 120 | 121 | -------------------------------------------------------------------------------- /notes/mipfp-notes.R: -------------------------------------------------------------------------------- 1 | # mipfp to do spatial microsimulation without input data 2 | 3 | global = read.delim("data/Belgium/BelgiqueConting.txt") 4 | in_age = read.delim("data/Belgium/ContrainteAge.txt") 5 | in_dip = read.delim("data/Belgium/ContrainteDipl.txt") 6 | in_sta = read.delim("data/Belgium/ContrainteStatut.txt") 7 | in_sex = read.delim("data/Belgium/ContrainteGenre.txt") 8 | 9 | # for one zone 10 | global_cons = xtabs(Freq ~ gener + dipl + statut + sex, data = global) 11 | 12 | i = 1 # zone number 13 | uz = unique(in_age$com) 14 | z = uz[i] 15 | z = "92094" 16 | # data preparation 17 | age = in_age$COUNT[in_age$com == z] 18 | edu = in_dip$COUNT[in_dip$com == z] 19 | ocu = in_sta$COUNT[in_sta$com == z] 20 | sex = in_sex$COUNT[in_sex$com == z] 21 | 22 | target = list(age, edu, ocu, sex) 23 | descript = list(1, 2, 3, 4) 24 | 25 | res = mipfp::Ipfp(global_cons, descript, target) 26 | identical(dimnames(res$x.hat), dimnames(global_cons)) 27 | expa = as.data.frame.table(res$x.hat) 28 | 29 | # Integerisation, see here for code: 30 | # https://github.com/Robinlovelace/spatial-microsim-book/blob/master/R/functions.R 31 | source("code/functions.R") # loads functions into memory 32 | expa$int = int_trs(expa$Freq) 33 | exp_indices = int_expand_vector(expa$int) 34 | synth = expa[exp_indices,] 35 | 36 | # for many zones 37 | list_output = vector(mode = "list", length = length(uz)) 38 | for(i in 1:length(uz)) { 39 | z = uz[i] 40 | # data preparation 41 | age = in_age$COUNT[in_age$com == z] 42 | edu = in_dip$COUNT[in_dip$com == z] 43 | ocu = in_sta$COUNT[in_sta$com == z] 44 | sex = in_sex$COUNT[in_sex$com == z] 45 | target = list(age, edu, ocu, sex) 46 | res = mipfp::Ipfp(global_cons, descript, target) 47 | expa = as.data.frame.table(res$x.hat) 48 | expa$int = rakeR::integerise(expa$Freq)[,1] 49 | exp_indices = int_expand_vector(expa$int) 50 | list_output[[i]] = expa[exp_indices,] 51 | } 52 | 53 | synth_namur = dplyr::bind_rows(list_output, .id = "id") 54 | library(dplyr) 55 | pmale = group_by(synth_namur, id) %>% 56 | summarise(pmale = sum(sex == "Hommes") / 57 | n()) 58 | -------------------------------------------------------------------------------- /notes/seville-notes.R: -------------------------------------------------------------------------------- 1 | # Welcome to the course's R notes 2 | # All course material found/linked to: 3 | # https://github.com/Robinlovelace/spatial-microsim-book 4 | # examples will go here 5 | 6 | # First challenge: get set-up on RStudio server 7 | # https://rstudio.jrc.es/ 8 | 9 | # test if your RStudio account works: 10 | # example of interactive plotting 11 | library(tmap) 12 | tmap_mode("view") 13 | example(qtm) 14 | 15 | # downloading and unzipping data 16 | url_msim = "https://github.com/Robinlovelace/spatial-microsim-book/archive/master.zip" 17 | download.file(url_msim = "https://github.com/Robinlovelace/spatial-microsim-book/archive/master.zip", destfile = "master.zip") 18 | unzip("master.zip") 19 | 20 | # Notes on project management: 21 | # https://csgillespie.github.io/efficientR/ 22 | 23 | # for spatial data 24 | u = "https://github.com/Robinlovelace/vspd-base-shiny-data/archive/master.zip" 25 | download.file(u, destfile = "master.zip") 26 | unzip("master.zip") 27 | dir.create("data") 28 | f = list.files(path = "vspd-base-shiny-data-master/", 29 | full.names = T) 30 | file.copy 31 | 32 | # plot x and y 33 | x = 1:99 34 | y = x^3 35 | y = plot(x, y) 36 | system.time({x = 1:99}) 37 | 38 | # example of tab autocompletion: 39 | # use tab inside funtion calls to find arguments 40 | system2(command = "ls", args = "-hal") 41 | 42 | # loading in data 43 | ind = read.csv("data/SimpleWorld/ind-full.csv") 44 | nrow(ind) 45 | head(ind) 46 | # look at the environment pane to see it 47 | # click on it or enter View(ind) to see it 48 | View(ind) 49 | 50 | # classes 51 | class(ind) 52 | class(ind$age) 53 | class(ind$sex) 54 | 55 | # subsetting data 56 | ind[5,] # select row 57 | ind[,3] 58 | ind[3] 59 | ind["sex"] 60 | ind$sex 61 | 62 | # Alternative way of data handling 63 | # dplyr rule: always returns a data fram 64 | # concept: type stability 65 | library(dplyr) 66 | slice(ind, 5) 67 | select(ind, sex) 68 | 69 | # class coercion 70 | ind_mat = as.matrix(ind) 71 | class(ind_mat[1,]) 72 | 73 | #################################################### 74 | # spatial data with R - CakeMap for all zones 75 | 76 | ind <- read.csv("data/CakeMap/ind.csv") 77 | cons <- read.csv("data/CakeMap/cons.csv") 78 | # Load constraints separately - normally this would be first stage 79 | con1 <- cons[1:12] # load the age/sex constraint 80 | con2 <- cons[13:14] # load the car/no car constraint 81 | con3 <- cons[15:24] # socio-economic class 82 | 83 | # Rename the categories in "ind" to correspond to the one of cons 84 | ind$Car <- sapply(ind$Car, FUN = switch, "Car", "NoCar") 85 | ind$Sex <- sapply(ind$Sex, FUN = switch, "m", "f") 86 | ind$NSSEC8 <- as.factor(ind$NSSEC8) 87 | levels(ind$NSSEC8) <- colnames(con3) 88 | ind$ageband4 <- 89 | gsub(pattern = "-", replacement = "_", x = ind$ageband4) 90 | 91 | # Initialise weights 92 | weight_init_1zone <- table(ind) 93 | init_cells <- rep(weight_init_1zone, each = nrow(cons)) 94 | 95 | # Define the names 96 | names <- c(list(rownames(cons)), 97 | as.list(dimnames(weight_init_1zone))) 98 | 99 | # Structure the data 100 | weight_all <- array(init_cells, dim = 101 | c(nrow(cons), dim(weight_init_1zone)), 102 | dimnames = names) 103 | 104 | # Transform con1 into an 3D-array : con1_convert 105 | names <- c(list(rownames(cons)),dimnames(weight_all)[c(4,6)]) 106 | con1_convert <- array(NA, dim=c(nrow(cons),2,6), dimnames = names) 107 | 108 | for(zone in rownames(cons)){ 109 | for (sex in dimnames(con1_convert)$Sex){ 110 | for (age in dimnames(con1_convert)$ageband4){ 111 | con1_convert[zone,sex,age] <- con1[zone,paste(sex,age,sep="")] 112 | } 113 | } 114 | } 115 | 116 | # Rescale con3 since it has some inconsistent constraints 117 | con3_prop <- con3*rowSums(con2)/rowSums(con3) 118 | 119 | # Load mipfp package 120 | library(mipfp) 121 | 122 | # Loop on the zones and make each time the mipfp 123 | # To run in parallel: use foreach package 124 | con1m = con1_convert 125 | con2m = as.matrix(con2) 126 | con3m = as.matrix(con3_prop) 127 | descript <- list(c(3,5),2,4) 128 | 129 | for (i in 1:nrow(cons)){ 130 | target <- list(con1m[i,,], con2m[i,], con3m[i,]) 131 | res <- Ipfp(weight_init_1zone, descript,target) 132 | weight_all[i,,,,,] <- res$x.hat 133 | } 134 | 135 | # Results for zone 1 136 | weight_init_1zone <- weight_all[1,,,,,] 137 | 138 | # Validation 139 | aggr <- apply(weight_all,c(1,6,4),sum) 140 | aggr <- aggr[,,c(2,1)] # order of sex to fit cons 141 | aggr1 = as.data.frame(aggr) 142 | con2 = apply(weight_all,c(1,3),sum) 143 | con3 = apply(weight_all,c(1,5),sum) 144 | ind_agg <- cbind(aggr1,con2,con3) 145 | 146 | plot(as.matrix(ind_agg[1,]), as.matrix(cons[1,]), xlab = 'Simulated', ylab='Theoretical', main =' Validation for zone 1') 147 | 148 | cor(as.vector(as.matrix(ind_agg)),as.vector(as.matrix(cons))) 149 | 150 | 151 | CorVec <- rep (0, nrow(cons)) 152 | 153 | for (i in 1:nrow(cons)){ 154 | CorVec[i] = cor(as.numeric(ind_agg[i,]),as.numeric(cons[i,])) 155 | } 156 | 157 | which(CorVec< 0.99) 158 | 159 | # integerisation 160 | expa = as.data.frame.table(weight_init_1zone, responseName = 'COUNT') 161 | 162 | truncated = expa 163 | truncated$COUNT = floor(expa$COUNT) 164 | p = expa$COUNT - truncated$COUNT 165 | n_missing = sum(p) 166 | index = sample(1:nrow(truncated), size = n_missing, prob = p,replace=FALSE) 167 | truncated$COUNT[index] = truncated$COUNT[index] + 1 168 | 169 | # see simPop-notes.R for notes on simPop 170 | 171 | 172 | # spatial data - using this repo 173 | # https://github.com/Robinlovelace/Creating-maps-in-R 174 | 175 | url_maps = 176 | unzip() 177 | library(raster) 178 | system.time( 179 | lnd <- shapefile("data/london_sport.shp") 180 | ) 181 | class(lnd) 182 | plot(lnd) 183 | library(sf) 184 | system.time( 185 | lnd_sf <- st_read("data/london_sport.shp") 186 | ) 187 | plot(lnd_sf) 188 | 189 | r = raster(lnd) 190 | values(r) = 1:100 191 | plot(r) 192 | plot(lnd, add = T) 193 | proj4string(lnd) 194 | lnd_geo = spTransform(lnd, CRS("+proj=longlat +datum=WGS84")) 195 | proj4string(lnd_geo) 196 | spDists(lnd_geo[1:3,]) 197 | spDists(lnd[1:3,]) 198 | raster::res(r) 199 | res(r) 200 | detach("package:raster") 201 | raster::res(r) 202 | res(r) 203 | library(raster) 204 | r_highes = r 205 | raster::res(r_highes) <- 1000 206 | values(r_highes) = 1:ncell(r_highes) 207 | plot(r_highes) 208 | 209 | # further resources: http://geostat-course.org/node 210 | 211 | # Generate spatial microdata 212 | source("notes/mipfp-notes.R") 213 | 214 | # Getting spatial data for Belgium 215 | u_bel = "http://biogeo.ucdavis.edu/data/gadm2.8/rds/BEL_adm4.rds" 216 | download.file(u_bel, "BEL_adm4.rds") 217 | bel = readRDS("BEL_adm4.rds") 218 | plot(bel) 219 | d = bel@data 220 | nam = bel[bel$NAME_2 == "Namur",] 221 | nam = nam[sample(length(nam), length(uz)),] 222 | plot(nam) 223 | d = nam@data 224 | # str(nam) # show structure 225 | uz = unique(synth_namur$id) 226 | nam$id = uz[sample(length(uz), length(uz))] 227 | # check the ids match 228 | summary(nam$id %in% pmale$id) 229 | nam@data = inner_join(nam@data, pmale) 230 | head(nam@data) 231 | tmap::qtm(nam, "pmale") 232 | 233 | library(tmap) 234 | tmap_mode("view") 235 | qtm(nam, "pmale", n = 3) 236 | tm_shape(nam) + 237 | tm_fill(col = "pmale", 238 | breaks = c(0, 0.5, 1)) 239 | 240 | # Challenges: 241 | # 1: Write a for loop to create a spatial microdataset 242 | # for all zones in namur (don't just copy my code!) 243 | # 2: Create a map of a different variable (not % male) 244 | # 3: Implement the methods on your own data 245 | -------------------------------------------------------------------------------- /notes/simPop-notes.R: -------------------------------------------------------------------------------- 1 | ## Notes on simPop 2 | # install.packages("simPop") 3 | library(simPop) 4 | data(eusilcS) 5 | nrow(eusilcS) / 6 | length(unique(eusilcS$db030)) 7 | inp = specifyInput(data = eusilcS, 8 | hhid = "db030", 9 | hhsize = "hsize", 10 | strata = "db040", 11 | weight = "rb050") 12 | data("totalsRG") 13 | tt = xtabs(Freq ~ ., totalsRG) 14 | # tableWt() 15 | class(tt) = "table" 16 | oldweights = inp@data$rb050 17 | addWeights(inp) = calibSample(inp, totals = tt) 18 | newweights = inp@data$rb050 19 | plot(oldweights, newweights) 20 | synthP = simStructure(dataS = inp, 21 | method = "direct", 22 | basicHHvars = c("age", "rb090", "db040")) 23 | s = synthP@pop@data 24 | 25 | # with SimpleWorld 26 | ind = read.csv("data/SimpleWorld/ind-full.csv") 27 | ind$hhid = sample(x = 1:3, size = nrow(ind), replace = T) 28 | ind$strata = sample(x = 1:3, size = nrow(ind), replace = T) 29 | ind$weight = 1 30 | i = specifyInput(ind, hhid = "hhid", pid = "id", strata = "strata", weight = "weight") 31 | con1 = read.csv("data/SimpleWorld/sex.csv") 32 | tots = data.frame(sex = c("m", "f"), 33 | Freq =colSums(con1)) 34 | ti = xtabs(Freq ~ ., tots) 35 | class(ti) = "table" 36 | addWeights(i) = calibSample(i, ti) 37 | s = simStructure(i, "direct", c("age", "sex", "hhid")) 38 | s_data = s@pop@data 39 | head(s_data) 40 | -------------------------------------------------------------------------------- /output/.gitignore: -------------------------------------------------------------------------------- 1 | synhhlddata.RData 2 | -------------------------------------------------------------------------------- /output/ints_df.csv: -------------------------------------------------------------------------------- 1 | "","id","zone","age","sex","income" 2 | "1",1,1,59,"m",2868 3 | "2",2,1,54,"m",2474 4 | "3",3,1,35,"m",2231 5 | "4",3,1,35,"m",2231 6 | "5",3,1,35,"m",2231 7 | "6",4,1,73,"f",3152 8 | "7",5,1,49,"f",2473 9 | "8",5,1,49,"f",2473 10 | "9",5,1,49,"f",2473 11 | "10",5,1,49,"f",2473 12 | "11",3,1,35,"m",2231 13 | "12",4,1,73,"f",3152 14 | "13",1,2,59,"m",2868 15 | "14",2,2,54,"m",2474 16 | "15",4,2,73,"f",3152 17 | "16",4,2,73,"f",3152 18 | "17",4,2,73,"f",3152 19 | "18",4,2,73,"f",3152 20 | "19",5,2,49,"f",2473 21 | "20",2,2,54,"m",2474 22 | "21",4,2,73,"f",3152 23 | "22",1,2,59,"m",2868 24 | "23",3,3,35,"m",2231 25 | "24",4,3,73,"f",3152 26 | "25",4,3,73,"f",3152 27 | "26",5,3,49,"f",2473 28 | "27",5,3,49,"f",2473 29 | "28",5,3,49,"f",2473 30 | "29",5,3,49,"f",2473 31 | "30",5,3,49,"f",2473 32 | "31",4,3,73,"f",3152 33 | "32",1,3,59,"m",2868 34 | "33",3,3,35,"m",2231 35 | -------------------------------------------------------------------------------- /slides/SM-for-ABM.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatial micro-data for agent-based models" 3 | author: "Morgane Dumont" 4 | date: '`r Sys.Date()`' 5 | output: 6 | beamer_presentation: default 7 | ioslides_presentation: default 8 | slidy_presentation: default 9 | bibliography: ../bibliography.bib 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(echo = FALSE) 14 | ``` 15 | 16 | ## What is an Agent-based model? 17 | 18 | It consists of : 19 | 20 | > - Agents with characteristics (Agents Are Autonomous Decision-making Units with Diverse Characteristics (Heterogeneous)); 21 | > - their environment; 22 | > - relations between agents; 23 | > - AND possibly relations between the agents and their environment. 24 | 25 | 26 | ## 27 | 28 | "Agent-based modelling can find new, better solutions to many 29 | problems important to our environment, health, and economy" (Grimm and Railsback 2011) 30 | 31 | 32 | ## 33 | 34 | An ABM can evolve through time and space, but need as input 35 | 36 | > - the complete population, 37 | > - with the pertinent characteristics for each person, 38 | > - localized in their environment, 39 | > - the definition of the differents relations. 40 | 41 | ## 42 | 43 | The individual level data needed can be generated thanks to a spatial microsimulation. 44 | 45 | ## Examples of developped agent-based models 46 | 47 | > - NetLogo [@thiele_facilitating_2014] 48 | > - VirtualBelgium [@barthelemy_stochastic_2015] 49 | > - TransMob [@TransMob] 50 | 51 | ## 52 | 53 | NetLogo for Simpleworld: At each *time tick* the inhabitants will: 54 | 55 | > 1. move to a random location within their zone. 56 | > 2. "look across the fence": check their field of vision for inhabitants from a neighbouring zone and select the closest one in view. 57 | > 3. try to "convince" them to come over to the other side: the inhabitant with more money (`income`) will *bribe* the other with 10% of their money to come over to their zone. 58 | 59 | The model will have the following adjustable parameters: 60 | 61 | > 1. The field of vision has two parameters: the viewing angle and the distance 62 | > 2. Average level of *bribeability* of inhabitants: if their level is less than 100%, a random number generator will be used to determine whether the agent accepts the bribe or not. The distribution of bribeability is approximately normal with a mean and a standard deviation. 63 | 64 | ## 65 | 66 | 67 | \includegraphics[width=0.9\textwidth]{../figures/nl-plots.png} 68 | 69 | ## 70 | VirtualBelgium 71 | ![](../figures/Jojo_JASS.png) 72 | 73 | 74 | ## 75 | 76 | ![](../figures/Jojo_JASS2.png) 77 | 78 | ## 79 | Need of spatial microsimulation: 80 | 81 | - To create the initial individuals; 82 | - To have the household features; 83 | - To assign an activity to each person. 84 | 85 | ## 86 | If you want to create your own code for ABM, think of : 87 | 88 | - Object oriented programming (such as C++) 89 | - Repast (https://repast.github.io/index.html) 90 | 91 | ## 92 | If your aim is to make an ABM and you have not enough input data, you can generate a spatial microsimulation. 93 | 94 | But, first you need to be sure of what will be needed. 95 | 96 | [A good introduction to agent-based modelling](http://link.springer.com/article/10.1057/jos.2010.3) 97 | 98 | An example of a combination of spatial microsimulation and agent based modelling : Virtual Belgium In Health 99 | 100 | ## References 101 | 102 | 103 | -------------------------------------------------------------------------------- /slides/SM-without-microdata.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatial Microsimulation without microdata" 3 | author: "Morgane Dumont" 4 | date: '`r Sys.Date()`' 5 | output: 6 | beamer_presentation: default 7 | ioslides_presentation: default 8 | slidy_presentation: default 9 | bibliography: ../bibliography.bib 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(echo = FALSE) 14 | ``` 15 | 16 | ## Without microdata? 17 | 18 | If you have no microdata, but you would like to generate a microsimulation, you can use a wide range of methods, depending on the data you have. 19 | 20 | For example: 21 | 22 | > - Global cross-tables and local marginal distributions 23 | 24 | > - Two level aggregated data 25 | 26 | > - Only a cross-table, but also mean, standard deviation,... of the caracteristics you would like to add 27 | 28 | ## Global cross-tables and local marginal distributions 29 | 30 | The global cross-table can be the initial weight matrix. 31 | 32 | Or, thanks to *mipfp* it can be the initial matrix AND a constraint. 33 | 34 | ## Two level aggregated data 35 | 36 | For example (Barthélemy and Toint - 2013): 37 | 38 | At municipality level: 39 | 40 | > - the cross table gender x age 41 | > - and the marginals of diploma level and activity status; 42 | 43 | 44 | 45 | At district level: 46 | 47 | > - gender x activity status, 48 | > - gender x diploma level, 49 | > - age x activity status 50 | > - and age x diploma level. 51 | 52 | ## Several steps 53 | 54 | > 1. Create at district level gender x activity status x age 55 | > 2. Create at district level gender x diploma level x age 56 | > 3. Create at district level gender x activity status x age x diploma level 57 | > 4. Use this data as seed for IPF and the two municipality level databases as constraints 58 | 59 | 60 | ## Only a cross-table, but also mean, standard deviation,... 61 | 62 | First, created the constraints by knowing: 63 | 64 | - the distribution of you variable 65 | - the total number of individuals you need at the end. 66 | 67 | 68 | 69 | ## Addition of the household level 70 | 71 | Depending on the data you have, they are several possibilities. One method is : 72 | 73 | > - Run a IPF to create an individual pool. 74 | > - Run a IPF to create an household pool. 75 | > - Try to complete the household with individuals (depending on the variable of both) 76 | 77 | ## 78 | 79 | If you have more precise data, such as age differences in couples, type of household of each individual, their civil status, ... you can make a combinatorial optimization to constraint the households. 80 | 81 | In the case of my current research, the data are: 82 | 83 | > - for each individual, a zone, age, sex, some characteristics AND size and type of household; 84 | > - the age distribution between couples and between mother and child; 85 | 86 | ## 87 | 88 | ![](../figures/HH-CO.png) 89 | 90 | ## 91 | 92 | ![](../figures/IllustrationCouples.png) 93 | 94 | ## 95 | 96 | [@lenormand_generating_2012] 97 | https://arxiv.org/pdf/1208.6403v2.pdf 98 | 99 | IPU (Guo and Bhat, 2007) 100 | 101 | ## Choice of data and methods 102 | 103 | > - Major tip : first 'make a plan' before beginning to code. 104 | > - Checking the source of the data and the way it was collected. 105 | > - Does it represent your target data? 106 | > - Choice of method is important. What are the hypothesis of the method? The underlying assumptions? What are the strengths and weaknesses of the method? 107 | > - Spatial microsimulation is an approximation, so you need to be aware of the biais that the method could have. 108 | 109 | ## Coding 110 | 111 | Imagine that the aim is to create a population for a Belgian province (Namur) characterised by: 112 | 113 | > - A municipality (code INS), 114 | > - a diploma level, 115 | > - a professional status level, 116 | > - an age category (0.5 meaning from 0 to 5 years old), 117 | > - a gender. 118 | 119 | ## 120 | The data you have are stored in the *Belgium* folder. You have : 121 | 122 | > - ContrainteStatut.txt: per municipality and professional status, you have a count; 123 | > - ContrainteGenre.txt: per municipality and gender, you have a count; 124 | > - ContrainteDipl.txt: per municipality and diploma level, you have a count; 125 | > - ContrainteAge.txt: per municipality and age classes, you have a count; 126 | > - BelgiqueConting.txt : per age classes, gender, diploma and status, you have a count. 127 | 128 | 129 | ## References 130 | -------------------------------------------------------------------------------- /slides/introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to Spatial Microsimulation with R" 3 | author: "Robin Lovelace" 4 | date: '`r Sys.Date()`' 5 | output: beamer_presentation 6 | bibliography: ../bibliography.bib 7 | --- 8 | 9 | ```{r setup, include=FALSE} 10 | knitr::opts_chunk$set(echo = TRUE) 11 | 12 | ``` 13 | 14 | ## Introduction 15 | 16 | - Housekeeping 17 | - About the course and its teachers 18 | - Lectures and practicals 19 | - Getting help 20 | 21 | ## Housekeeping 22 | 23 | - Thanks to the hosts, the JRC 24 | 25 | ## Spatial Microsimulation with R 26 | 27 | Aims: 28 | 29 | 1. To provide a solid understanding of the method and applications 30 | 2. To teach its implementation in R in general terms 31 | 3. To provide guidance on next steps 32 | 33 | ## Objectives: 34 | 35 | - Become proficient with R and RStudio for handling data 36 | - Understand some applications where spatial microsimulation is useful 37 | - Realise the limitations of the method 38 | - Know about a range of packages for doing spatial microsimulation with R 39 | - Understand code for generating spatial microdata with **mipfp** 40 | - Have ideas for trying the methods on your own datasets 41 | 42 | # About the course and its teachers 43 | 44 | ## The request to teach at the EU 45 | 46 | - Links with much of the research taking place at the JRC 47 | - Big Data 48 | - Modelling 49 | - Social impact assessment 50 | - Scenarios of the future 51 | 52 | ## The course materials 53 | 54 | - Based on our book, [@lovelace_spatial_2016]. Digital versions available on-line 55 | - Slides available on-line 56 | - We'll be making small 'code chunks' and scripts available during the course 57 | - Any feedback welcome 58 | 59 | ## A bit about us 60 | 61 | Robin Lovelace 62 | 63 | > - Environmental Geographer turned Computational and Transport Geographer 64 | > - Now on 5 year University Academic Fellowship (UAF) in Transport and Big Data at Leeds Institute for Transport Studies (ITS) 65 | > - Creator of many online teaching materials - see [github.com/robinlovelace](https://github.com/Robinlovelace) 66 | > - Creator of the [**stplanr**](https://github.com/ropensci/stplanr) package 67 | > - Lead developer of the [Propensity to Cycle Tool](http://www.pct.bike/) (PCT) 68 | 69 | ## A bit about us 70 | 71 | Morgane Dumont 72 | 73 | > - Applied Mathematician with coding, algorithmic and statistics background 74 | > - Now on a project of the Wallonia Region developping an evolutionary spatial microsimulation to forecast health needs of elderly in 2030 for Belgium 75 | > - Teach statistics in R to the master's student in applied mathematics at university of Namur 76 | 77 | 78 | ## A bit about you 79 | 80 | - Go-around - who you are, interests in the course 81 | - With your neighbour: 82 | 83 | > - Experience with R 84 | > - Geographical data 85 | 86 | - What you hope to get out of the course 87 | 88 | # What is spatial microsimulation and its applications 89 | 90 | ## What is spatial microsimulation? 91 | 92 | 1. A method 93 | 2. An approach 94 | 95 | ## Applications 96 | 97 | - Wide variety of potential applications 98 | - So far main applications have been in health, poverty mapping and transport 99 | - What do you want to use spatial microsimulation for? 100 | - @tomintz_geography_2008 The geography of smoking in Leeds: estimating individual smoking rates and the implications for the location of stop smoking services. 101 | - Exploration of the energy costs of transport [@lovelace_oil_2014] 102 | 103 | ## Agriculture 104 | 105 | @hynes_modelling_2008 is a classic example 106 | 107 | Had 2 datasets: 108 | 109 | - Individual level data on farmers participating in agri-environment scheme 110 | - Farm level data with many attributes about the farms 111 | - Geographical data on farms at the Enumeration District (ED) level 112 | 113 | For confidentiallity reasons, the individual-level datasets could not be linked 114 | 115 | Spatial microsimulation used to create a synthetic dataset 116 | 117 | ## Agriculture II 118 | 119 | Results show the probability of participation across Ireland: 120 | 121 | ![](../figures/agri-example-hynes-2008.png) 122 | 123 | ## Tax policy 124 | 125 | Commonly used to evaluate distributional impacts of tax policies [@agostini_were_2014] 126 | 127 | ![](../figures/austerity.png) 128 | 129 | ## Transport 130 | 131 | A simulation of the car's traffic for Namur [@barthelemy_parallelized_2014] 132 | 133 | \includegraphics[width=0.5\textwidth]{../figures/Trafic_jojo.png} 134 | \includegraphics[width=0.5\textwidth]{../figures/Trafic_Jojo2.png} 135 | 136 | 137 | Used tools : spatial microsimulation, agent based modelling, activity chains,... 138 | 139 | ## What's next: 140 | 141 | - The RStudio Graphical User Interface (GUI) 142 | - Using R 143 | - Project management 144 | - GitHub 145 | 146 | ## References -------------------------------------------------------------------------------- /slides/r-rstudio-practical.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using R and RStudio for spatial microsimulation" 3 | author: "Robin Lovelace" 4 | date: "`r Sys.Date()`" 5 | output: ioslides_presentation 6 | bibliography: ../bibliography.bib 7 | --- 8 | 9 | ```{r setup, include=FALSE} 10 | knitr::opts_chunk$set(echo = TRUE) 11 | knitr::opts_knit$set(root.dir = "..") 12 | ``` 13 | 14 | ## Introduction 15 | 16 | - Primarily a practical session 17 | - Based on the [old](https://en.wikipedia.org/wiki/Docendo_discimus) (~2000 years old!) saying *docendo discimus*: 18 | 19 | > **by teaching we learn**. 20 | 21 | We'll use up-to-date materials: 22 | - A section from [Efficient R Programming](https://csgillespie.github.io/efficientR/set-up.html#rstudio) 23 | - And a live demo 24 | 25 | ## Practical demonstration using R and RStudio 26 | 27 | - Using the simpleworld data 28 | 29 | ```{r} 30 | source("code/SimpleWorld.R") 31 | ``` 32 | 33 | ## Loading data 34 | 35 | ```{r} 36 | con_age <- read.csv("data/SimpleWorld/age.csv") 37 | con_sex <- read.csv("data/SimpleWorld/sex.csv") 38 | ``` 39 | 40 | - R can read data from all major file-formats 41 | - See the **rio** package for more information 42 | 43 | ```{r, eval=FALSE} 44 | install.packages("rio") 45 | ``` 46 | 47 | ## Subsetting data 48 | 49 | ```{r} 50 | con_age 51 | ``` 52 | 53 | 54 | ```{r, eval=FALSE} 55 | View(con_age) 56 | ``` 57 | 58 | ## Exploring RStudio 59 | 60 | ![](../figures/rstudio-environment.png) 61 | 62 | ## Autocompletion 63 | 64 | ![](../figures/rstudio-autocomplete.png) 65 | 66 | ## Exercises 67 | 68 | Work through [Section 2.5 of Efficient R Programming](https://csgillespie.github.io/efficientR/set-up.html#rstudio) 69 | 70 | - Answer the practicals. 71 | - If you finish early, work on the Spatial Microsimulation book's [appendix](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/13-appendix.Rmd) -------------------------------------------------------------------------------- /slides/simpop-intro.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "simPop" 3 | author: "Robin Lovelace" 4 | date: '`r Sys.Date()`' 5 | output: 6 | ioslides_presentation: default 7 | beamer_presentation: default 8 | --- 9 | 10 | ## SimPop 11 | 12 | - simPop is a package for creating, analysing and modelling synthetic microdata 13 | - It is powerful and integrates a number of beneficial features: 14 | - Parallel processing 15 | - Real (EU SILC and other) test datasets 16 | - Wide range of functionality 17 | - Funded by respected organisations and implemented by skilled programmers 18 | 19 | ## Basic use 20 | 21 | ```{r, echo=TRUE} 22 | library(simPop) # loads lots of packages 23 | ``` 24 | 25 | ## simPop data 26 | 27 | ```{r, echo=TRUE} 28 | data(eusilcS) 29 | dplyr::glimpse(eusilcS[1:5]) 30 | ``` 31 | 32 | ## Create dataObj 33 | 34 | ```{r, echo=TRUE} 35 | inp = specifyInput(data = eusilcS, 36 | hhid = "db030", 37 | hhsize = "hsize", 38 | strata = "db040", 39 | weight = "rb050") 40 | class(inp) 41 | inp 42 | ``` 43 | 44 | ## Input data 45 | 46 | ```{r, echo=TRUE} 47 | head(eusilcS$db030) 48 | head(eusilcS$hsize) 49 | # number of people per household 50 | nrow(eusilcS) / 51 | length(unique(eusilcS$db030)) 52 | ``` 53 | 54 | ## Constraining the input data by cross-tabbed marginals 55 | 56 | ```{r, echo=TRUE} 57 | data(totalsRGtab) 58 | totalsRGtab 59 | rcons = colSums(totalsRGtab) / sum(totalsRGtab) 60 | rsurv = summary(eusilcS$db040) / nrow(eusilcS) 61 | ``` 62 | 63 | ## Differences in regional totals 64 | 65 | ```{r, echo=TRUE} 66 | plot(rcons) 67 | points(rsurv, pch = 3) 68 | text(1:length(rsurv), y = pmin(rsurv, rcons), labels = names(rcons)) 69 | ``` 70 | 71 | ## Add weights 72 | 73 | ```{r, echo=TRUE} 74 | addWeights(inp) = 75 | calibSample(inp = inp, totals = totalsRGtab) 76 | synthP = simStructure(dataS = inp, 77 | method = "direct", 78 | basicHHvars = c("age", "rb090", "db040")) 79 | ``` 80 | 81 | ## A look at the outputs 82 | 83 | ```{r, echo=TRUE} 84 | slotNames(synthP) 85 | nrow(synthP@pop@data) 86 | head(synthP@pop@data) 87 | rsynth = summary(synthP@pop@data$db040) / 88 | nrow(synthP@pop@data) 89 | ``` 90 | 91 | ## Comparison with marginals 92 | 93 | ```{r, echo=TRUE} 94 | plot(rcons) 95 | points(rsynth, pch = 3) 96 | text(1:length(rsurv), y = pmin(rsurv, rcons), labels = names(rcons)) 97 | ``` 98 | 99 | ## Visualisation | source: [publik.tuwien.ac.at](http://publik.tuwien.ac.at/files/PubDat_238106.pdf) 100 | 101 | ![](../figures/simPop-results-eg.png) 102 | 103 | ## Tasks 104 | 105 | - Practical (30 minutes) 106 | - Basic: read-up on **simPop** 107 | - Intermediate: build on the examples using `data("eusilcP")` and `data("eusilcS")` to explore the functionality of **simPop** 108 | - Advanced: take a look at the package's source code 109 | 110 | - Challenge (20 minutes) 111 | - Beginner: try to create a synthetic microdataset of [SimpleWorld](https://github.com/Robinlovelace/spatial-microsim-book/blob/master/02-SimpleWorld.Rmd) using **simPop** 112 | - Explore how to use `simContinuous()` to estimate mean income in the regions of Austria 113 | 114 | - Discussion: how could these methods be useful in your work? 115 | 116 | ```{r} 117 | args(simContinuous) 118 | ``` 119 | 120 | ## simPop resources: 121 | 122 | - Slides by Matthias Templ: http://publik.tuwien.ac.at/files/PubDat_238106.pdf 123 | - A youtube video on the topic: https://www.youtube.com/watch?v=fjZhAUq3JZ0 124 | - The package's documentation 125 | 126 | -------------------------------------------------------------------------------- /sms-book-citation.bib: -------------------------------------------------------------------------------- 1 | 2 | @book{lovelace_spatial_2016, 3 | title = {Spatial Microsimulation with {{R}}}, 4 | url = {http://robinlovelace.net/spatial-microsim-book/}, 5 | publisher = {{CRC Press}}, 6 | date = {2016}, 7 | author = {Lovelace, Robin and Dumont, Morgane} 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /spatial-microsim-book.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Website 16 | -------------------------------------------------------------------------------- /www/.gitignore: -------------------------------------------------------------------------------- 1 | bootstrap-2.3.2/ 2 | highlight/ 3 | jquery-1.11.0/ -------------------------------------------------------------------------------- /www/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/www/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /www/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robinlovelace/spatial-microsim-book/cdba7ae78ed8fab7caf98c54f3f953055cc0e19d/www/glyphicons-halflings.png -------------------------------------------------------------------------------- /www/highlight.css: -------------------------------------------------------------------------------- 1 | /* Affix navigation to top-left */ 2 | #nav.affix { 3 | position: static; 4 | } 5 | @media screen and (min-width: 720px) { 6 | #nav.affix, 7 | #nav.affix { 8 | position: fixed; 9 | top: 0px; 10 | } 11 | #nav.affix-bottom { 12 | position: absolute; 13 | } 14 | } 15 | 16 | /* Tweak code styling */ 17 | 18 | code { 19 | padding: 1px; 20 | } 21 | 22 | /* Tweak navigation list styling */ 23 | 24 | ul.toc { 25 | padding-left: 0px; 26 | } 27 | 28 | ul.toc .dropdown-header { 29 | padding: 5px 0 0 0; 30 | } 31 | 32 | ul .dropdown-header:first-child { 33 | margin-top: 2px; 34 | } 35 | 36 | .dropdown-header { 37 | font-weight: bold; 38 | color: #333; 39 | margin-top: 8px; 40 | } 41 | 42 | .dropdown-menu .dropdown-header { 43 | border-bottom: 1px solid #eee; 44 | } 45 | 46 | 47 | /* Syntax highlighting */ 48 | 49 | pre, code { 50 | font-family: 'Inconsolata', sans-serif; 51 | font-size: 1em; 52 | background-color: #fafafa; 53 | } 54 | pre { 55 | border-color: #ddd; 56 | } 57 | code { 58 | color: #333; 59 | white-space: normal; 60 | } 61 | 62 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { 63 | margin: 0; padding: 0; vertical-align: baseline; border: none; } 64 | table.sourceCode { width: 100%; line-height: 100%; } 65 | td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; } 66 | td.sourceCode { padding-left: 5px; } 67 | 68 | /* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css 69 | Colours from https://gist.github.com/robsimmons/1172277 */ 70 | 71 | code > span.kw { color: #555; font-weight: bold; } /* Keyword */ 72 | code > span.dt { color: #902000; } /* DataType */ 73 | code > span.dv { color: #40a070; } /* DecVal (decimal values) */ 74 | code > span.bn { color: #d14; } /* BaseN */ 75 | code > span.fl { color: #d14; } /* Float */ 76 | code > span.ch { color: #d14; } /* Char */ 77 | code > span.st { color: #d14; } /* String */ 78 | code > span.co { color: #888888; font-style: italic; } /* Comment */ 79 | code > span.ot { color: #007020; } /* OtherToken */ 80 | code > span.al { color: #ff0000; font-weight: bold; } /* AlertToken */ 81 | code > span.fu { color: #900; font-weight: bold; } /* Function calls */ 82 | code > span.er { color: #a61717; background-color: #e3d2d2; } /* ErrorTok */ 83 | 84 | /* Tables */ 85 | 86 | table { 87 | width: 100%; 88 | margin-bottom: 20px; 89 | } 90 | 91 | table thead > tr > th, 92 | table tbody > tr > th, 93 | table tfoot > tr > th, 94 | table thead > tr > td, 95 | table tbody > tr > td, 96 | table tfoot > tr > td { 97 | padding: 8px; 98 | line-height: 1.428571429; 99 | vertical-align: top; 100 | border-top: 1px solid #dddddd; 101 | } 102 | 103 | table thead > tr > th { 104 | vertical-align: bottom; 105 | border-bottom: 2px solid #dddddd; 106 | } 107 | 108 | table tr.odd { 109 | background-color: #fafafa; 110 | } 111 | -------------------------------------------------------------------------------- /www/toc.js: -------------------------------------------------------------------------------- 1 | toc = $("ul#toc"); 2 | $("#content").find("h2").each(function() { 3 | h = $(this); 4 | toc.append("
  • " + h.text() + "
  • "); 5 | }); 6 | 7 | 8 | $('#nav').affix({ 9 | offset: $('#nav').position() 10 | }); --------------------------------------------------------------------------------