├── requirements.r
├── img
    └── shield_image.png
├── datasets
    ├── planets.RData
    ├── shining_list.RData
    ├── all_wars_matrix.RData
    ├── chapter6.R
    ├── chapter5.R
    └── chapter3.R
├── README.md
├── .gitignore
├── course.yml
├── chapter1.Rmd
├── chapter6.Rmd
├── chapter4.Rmd
├── chapter3.Rmd
├── chapter5.Rmd
└── chapter2.Rmd


/requirements.r:
--------------------------------------------------------------------------------
1 | # no additional packages needed
2 | 


--------------------------------------------------------------------------------
/img/shield_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacamp/courses-introduction-to-r/master/img/shield_image.png


--------------------------------------------------------------------------------
/datasets/planets.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacamp/courses-introduction-to-r/master/datasets/planets.RData


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Introduction to R
2 | 
3 | This repository contains the source files for the introduction to R course (id 58).
4 | 


--------------------------------------------------------------------------------
/datasets/shining_list.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacamp/courses-introduction-to-r/master/datasets/shining_list.RData


--------------------------------------------------------------------------------
/datasets/all_wars_matrix.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacamp/courses-introduction-to-r/master/datasets/all_wars_matrix.RData


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *
 2 | !*.Rmd
 3 | !*.yml
 4 | !README.md
 5 | !.gitignore
 6 | .Rproj.user
 7 | !datasets/
 8 | !datasets/*
 9 | !requirements.r
10 | 


--------------------------------------------------------------------------------
/datasets/chapter6.R:
--------------------------------------------------------------------------------
1 | actors      <- c("Jack Nicholson","Shelley Duvall","Danny Lloyd","Scatman Crothers","Barry Nelson")
2 | sources     <- c("IMDb1","IMDb2","IMDb3")
3 | comments    <- c("Best Horror Film I Have Ever Seen","A truly brilliant and scary film from Stanley Kubrick","A masterpiece of psychological horror")
4 | scores      <- c(4.5,4,5)
5 | reviews     <- data.frame(scores,sources,comments)
6 | shining_list <- list(moviename="The Shining",actors=actors, reviews=reviews)
7 | 
8 | save(shining_list, file = "datasets/shining_list.RData")


--------------------------------------------------------------------------------
/datasets/chapter5.R:
--------------------------------------------------------------------------------
1 | name <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune");
2 | type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
3 | diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883); 
4 | rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67);
5 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE);
6 | planets_df <- data.frame(name, type, diameter, rotation, rings)
7 | 
8 | save(planets_df, file = "datasets/planets.RData")


--------------------------------------------------------------------------------
/datasets/chapter3.R:
--------------------------------------------------------------------------------
 1 | # Construct matrix
 2 | box_office_all <- c(461, 314.4, 290.5, 247.9, 309.3, 165.8)
 3 | movie_names <- c("A New Hope","The Empire Strikes Back","Return of the Jedi")
 4 | col_titles <- c("US","non-US")
 5 | star_wars_matrix <- matrix(box_office_all, nrow = 3, byrow = TRUE, dimnames = list(movie_names, col_titles))
 6 | 
 7 | # Construct matrix2
 8 | box_office_all2 <- c(474.5, 552.5, 310.7, 338.7, 380.3, 468.5)
 9 | movie_names2 <- c("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith")
10 | star_wars_matrix2 <- matrix(box_office_all2, nrow = 3, byrow = TRUE, dimnames = list(movie_names2, col_titles))
11 | 
12 | # Combine both Star Wars trilogies in one matrix
13 | all_wars_matrix <- rbind(star_wars_matrix, star_wars_matrix2)
14 | 
15 | # remove all except all_wars_matrix
16 | rm(box_office_all)
17 | rm(movie_names)
18 | rm(col_titles)
19 | rm(star_wars_matrix)
20 | rm(box_office_all2)
21 | rm(movie_names2)
22 | rm(star_wars_matrix2)
23 | 
24 | save(all_wars_matrix, file = "datasets/all_wars_matrix.RData")
25 | 


--------------------------------------------------------------------------------
/course.yml:
--------------------------------------------------------------------------------
 1 | id: 58
 2 | title: Introduction to R
 3 | programming_language: r
 4 | description: >-
 5 |   In this introduction to R course, you'll master the basics of this widely used
 6 |   open source language&mdash;including vectors, factors, lists, and data frames.
 7 |   With the coding skills you'll gain in this course, you'll be ready to
 8 |   undertake your own data analysis in R. There are millions of R users
 9 |   worldwide, cementing it as a leading programming language in statistics and
10 |   data science. Begin your coding journey in one of DataCamp's most popular
11 |   courses today!
12 | 
13 | 
14 |   The videos contain live transcripts you can reveal by clicking "Show
15 |   transcript" at the bottom left of the videos.
16 | 
17 |   The course glossary can be found on the right in the resources section.
18 | 
19 |   To obtain CPE credits you need to complete the course and reach a score of 70%
20 |   on the qualified assessment. You can navigate to the assessment by clicking on
21 |   the CPE credits callout on the right.
22 | from: 'r-base-prod:v2.0.1'
23 | runtime_config: minimal
24 | practice_pool_id: 106
25 | datasets:
26 |   Introduction to R.pdf: Course Glossary
27 | 


--------------------------------------------------------------------------------
/chapter1.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title_meta: Chapter 1
  3 | title: Intro to basics
  4 | description: >-
  5 |   Take your first steps with R. In this chapter, you will learn how to use the
  6 |   console as a calculator and how to assign variables. You will also get to know
  7 |   the basic data types in R. Let's get started.
  8 | free_preview: true
  9 | ---
 10 | 
 11 | ## How it works
 12 | 
 13 | ```yaml
 14 | type: NormalExercise
 15 | key: 15d729634a
 16 | xp: 100
 17 | skills:
 18 |   - 1
 19 | ```
 20 | 
 21 | In the editor, you should type R code to solve the exercises. When you submit your code, every line of code is interpreted and executed by R and you get a message whether or not your code was correct. The output of your R code is shown in the console.
 22 | 
 23 | R makes use of the `#` sign to add comments, so that you and others can understand what the R code is about. Just like Twitter! Comments are not run as R code, so they will not influence your result. For example, <span translate="no">_Calculate 3 + 4_</span> in the editor is a comment.
 24 | 
 25 | You can also execute R commands straight in the console. This is a good way to experiment with R code, as your submission is not checked for correctness.
 26 | 
 27 | `@instructions`
 28 | - In the editor there is already some sample code. Can you see which lines are actual R code and which are comments?
 29 | - Add a line of code that calculates the sum of 6 and 12, and submit your code.
 30 | 
 31 | `@hint`
 32 | Just add a line of R code that calculates the sum of 6 and 12, just like the example in the sample code!
 33 | 
 34 | `@pre_exercise_code`
 35 | ```{r}
 36 | # no pec
 37 | 
 38 | ```
 39 | 
 40 | `@sample_code`
 41 | ```{r}
 42 | # Calculate 3 + 4
 43 | 3 + 4
 44 | 
 45 | # Calculate 6 + 12
 46 | 
 47 | ```
 48 | 
 49 | `@solution`
 50 | ```{r}
 51 | # Calculate 3 + 4
 52 | 3 + 4
 53 | 
 54 | # Calculate 6 + 12
 55 | 6 + 12
 56 | ```
 57 | 
 58 | `@sct`
 59 | ```{r}
 60 | ex() %>% check_output_expr("18", missing_msg = "Make sure to add `6 + 12` on a new line. Do not start the line with a `#`, otherwise your R code is not executed!")
 61 | 
 62 | success_msg("Awesome! See how the console shows the result of the R code you submitted? Now that you're familiar with the interface, let's get down to R business!")
 63 | ```
 64 | 
 65 | ---
 66 | 
 67 | ## Arithmetic with R
 68 | 
 69 | ```yaml
 70 | type: NormalExercise
 71 | key: 720745eda5
 72 | xp: 100
 73 | skills:
 74 |   - 1
 75 | ```
 76 | 
 77 | In its most basic form, R can be used as a simple calculator. Consider the following arithmetic operators:
 78 | 
 79 | - Addition: `+`
 80 | - Subtraction: `-`
 81 | - Multiplication: `*`
 82 | - Division: `/`
 83 | - Exponentiation: `^`
 84 | - Modulo: `%%`
 85 | 
 86 | The last two might need some explaining:
 87 | 
 88 | - The `^` operator raises the number to its left to the power of the number to its right: for example `3^2` is 9.
 89 | - The modulo returns the remainder of the division of the number to the left by the number on its right, for example 5 modulo 3 or `5 %% 3` is 2.
 90 | 
 91 | With this knowledge, follow the instructions to complete the exercise.
 92 | 
 93 | `@instructions`
 94 | - Type `2^5` in the editor to calculate 2 to the power 5.
 95 | - Type `28 %% 6` to calculate 28 modulo 6.
 96 | - Submit the answer and have a look at the R output in the console.
 97 | - Note how the `#` symbol is used to add comments on the R code.
 98 | 
 99 | `@hint`
100 | Another example of the modulo operator: `9 %% 2` equals `1`.
101 | 
102 | `@pre_exercise_code`
103 | ```{r}
104 | # no pec
105 | ```
106 | 
107 | `@sample_code`
108 | ```{r}
109 | # An addition
110 | 5 + 5 
111 | 
112 | # A subtraction
113 | 5 - 5 
114 | 
115 | # A multiplication
116 | 3 * 5
117 | 
118 |  # A division
119 | (5 + 5) / 2 
120 | 
121 | # Exponentiation
122 | 
123 | 
124 | # Modulo
125 | 
126 | ```
127 | 
128 | `@solution`
129 | ```{r}
130 | # An addition
131 | 5 + 5
132 | 
133 | # A subtraction
134 | 5 - 5 
135 | 
136 | # A multiplication
137 | 3 * 5
138 | 
139 |  # A division
140 | (5 + 5) / 2 
141 | 
142 | # Exponentiation
143 | 2 ^ 5
144 | 
145 | # Modulo
146 | 28 %% 6
147 | ```
148 | 
149 | `@sct`
150 | ```{r}
151 | msg = "Do not remove the other arithmetic examples!"
152 | ex() %>% check_output_expr("2^5", missing_msg = "The exponentiation example is not correct. Write `2 ^ 5` on a new line.")
153 | 
154 | ex() %>% check_output_expr("28 %% 6", missing_msg = "There seems to be an issue with the modulo example. Write `28 %% 6` on a new line.")
155 | success_msg("Great! Head over to the next exercise.")
156 | ```
157 | 
158 | ---
159 | 
160 | ## Variable assignment
161 | 
162 | ```yaml
163 | type: NormalExercise
164 | key: 5f200ffd43
165 | xp: 100
166 | skills:
167 |   - 1
168 | ```
169 | 
170 | A basic concept in (statistical) programming is called a **variable**. 
171 | 
172 | A variable allows you to store a value (e.g. 4) or an object (e.g. a function description) in R. You can then later use this variable's name to easily access the value or the object that is stored within this variable. 
173 | 
174 | You can assign a value 4 to a variable `my_var` with the command
175 | 
176 | ```
177 | my_var <- 4
178 | ```
179 | 
180 | `@instructions`
181 | Over to you: complete the code in the editor such that it assigns the value 42 to the variable `x` in the editor. Submit the answer. Notice that when you ask R to print `x`, the value 42 appears.
182 | 
183 | `@hint`
184 | Look at how the value 4 was assigned to `my_var` in the exercise's assignment. Do the exact same thing in the editor, but now assign 42 to the variable `x`.
185 | 
186 | `@pre_exercise_code`
187 | ```{r}
188 | # no pec
189 | ```
190 | 
191 | `@sample_code`
192 | ```{r}
193 | # Assign the value 42 to x
194 | x <- 
195 | 
196 | # Print out the value of the variable x
197 | x
198 | ```
199 | 
200 | `@solution`
201 | ```{r}
202 | # Assign the value 42 to x
203 | x <- 42
204 | 
205 | # Print out the value of the variable x
206 | x
207 | ```
208 | 
209 | `@sct`
210 | ```{r}
211 | ex() %>% check_object("x", undefined_msg = "Make sure to define a variable `x`.")  %>% check_equal(incorrect_msg = "Make sure that you assign the correct value to `x`.")
212 | 
213 | success_msg("Good job! Have you noticed that R does not print the value of a variable to the console when you did the assignment? `x <- 42` did not generate any output, because R assumes that you will be needing this variable in the future. Otherwise you wouldn't have stored the value in a variable in the first place, right? Proceed to the next exercise!")
214 | ```
215 | 
216 | ---
217 | 
218 | ## Variable assignment (2)
219 | 
220 | ```yaml
221 | type: NormalExercise
222 | key: c5944b90eb
223 | xp: 100
224 | skills:
225 |   - 1
226 | ```
227 | 
228 | Suppose you have a fruit basket with five apples. As a data analyst in training, you want to store the number of apples in a variable with the name `my_apples`.
229 | 
230 | `@instructions`
231 | - Type the following code in the editor: `my_apples <- 5`. This will assign the value 5 to `my_apples`.
232 | - Type: `my_apples` below the second comment. This will print out the value of `my_apples`.
233 | - Submit your answer, and look at the output: you see that the number 5 is printed. So R now links the variable `my_apples` to the value 5.
234 | 
235 | `@hint`
236 | Remember that if you want to assign a number or an object to a variable in R, you can make use of the assignment operator `<-`. Alternatively, you can use `=`, but `<-` is widely preferred in the R community.
237 | 
238 | `@pre_exercise_code`
239 | ```{r}
240 | # no pec
241 | ```
242 | 
243 | `@sample_code`
244 | ```{r}
245 | # Assign the value 5 to the variable my_apples
246 | 
247 | 
248 | # Print out the value of the variable my_apples
249 | 
250 | ```
251 | 
252 | `@solution`
253 | ```{r}
254 | # Assign the value 5 to the variable my_apples
255 | my_apples <- 5
256 | 
257 | # Print out the value of the variable my_apples
258 | my_apples
259 | ```
260 | 
261 | `@sct`
262 | ```{r}
263 | ex() %>% check_object("my_apples", undefined_msg = "Please make sure to define a variable `my_apples`.") %>% check_equal(incorrect_msg = "Make sure that you assign the correct value to `my_apples`.")
264 | 
265 | ex() %>% check_output_expr("my_apples", missing_msg = "Have you explicitly told R to print out the `my_apples` variable to the console?")
266 | 
267 | success_msg("Great! Continue to the next exercise!")
268 | ```
269 | 
270 | ---
271 | 
272 | ## Variable assignment (3)
273 | 
274 | ```yaml
275 | type: NormalExercise
276 | key: 1c1bd25045
277 | xp: 100
278 | skills:
279 |   - 1
280 | ```
281 | 
282 | Every tasty fruit basket needs oranges, so you decide to add six oranges. As a data analyst, your reflex is to immediately create the variable `my_oranges` and assign the value 6 to it. Next, you want to calculate how many pieces of fruit you have in total. Since you have given meaningful names to these values, you can now code this in a clear way: 
283 | 
284 | ```
285 | my_apples + my_oranges
286 | ```
287 | 
288 | `@instructions`
289 | - Assign to `my_oranges` the value 6.
290 | - Add the variables `my_apples` and `my_oranges` and have R simply print the result.
291 | - Assign the result of adding `my_apples` and `my_oranges` to a new variable `my_fruit`.
292 | 
293 | `@hint`
294 | `my_fruit` is just the sum of `my_apples` and `my_oranges`. You can use the `+` operator to sum the two and `<-` to assign that value to the variable `my_fruit`.
295 | 
296 | `@pre_exercise_code`
297 | ```{r}
298 | # no pec
299 | ```
300 | 
301 | `@sample_code`
302 | ```{r}
303 | # Assign a value to the variables my_apples and my_oranges
304 | my_apples <- 5
305 | 
306 | 
307 | # Add these two variables together
308 | 
309 | 
310 | # Create the variable my_fruit
311 | 
312 | ```
313 | 
314 | `@solution`
315 | ```{r}
316 | # Assign a value to the variables my_apples and my_oranges
317 | my_apples  <- 5
318 | my_oranges <- 6
319 | 
320 | # Add these two variables together
321 | my_apples + my_oranges
322 | 
323 | # Create the variable my_fruit
324 | my_fruit <- my_apples + my_oranges
325 | ```
326 | 
327 | `@sct`
328 | ```{r}
329 | 
330 | msg <- "Have you used `my_fruit <- my_apples + my_oranges` to create the `my_fruit` variable?"
331 | ex() %>% check_object("my_apples") %>% check_equal(incorrect_msg = "Keep the line that assigns 5 to `my_apples`.")
332 | 
333 | ex() %>% check_object("my_oranges") %>% check_equal(incorrect_msg = "Keep the line that assigns 6 to `my_oranges`.")
334 | 
335 | ex() %>% check_output_expr("my_apples + my_oranges",missing_msg = "Make sure to print out the result of adding `my_apples` and `my_oranges`. The code example in the description already gives away the answer to this instruction!")
336 | 
337 | ex() %>% check_object("my_fruit", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
338 | success_msg("Nice one! The great advantage of doing calculations with variables is reusability. If you just change `my_apples` to equal 12 instead of 5 and rerun the script, `my_fruit` will automatically update as well. Continue to the next exercise.")
339 | ```
340 | 
341 | ---
342 | 
343 | ## Apples and oranges
344 | 
345 | ```yaml
346 | type: NormalExercise
347 | key: 915fcc7c99
348 | xp: 100
349 | skills:
350 |   - 1
351 | ```
352 | 
353 | Common knowledge tells you not to add apples and oranges. But hey, that is what you just did, no :-)? The `my_apples` and `my_oranges` variables both contained a number in the previous exercise. The `+` operator works with numeric variables in R. If you really tried to add <span translate="no">"apples"</span> and <span translate="no">"oranges"</span>, and assigned a text value to the variable `my_oranges` (see the editor), you would be trying to assign the addition of a numeric and a character variable to the variable `my_fruit`. This is not possible.
354 | 
355 | `@instructions`
356 | - Submit the answer and read the error message. Make sure to understand why this did not work.
357 | - Adjust the code so that R knows you have 6 oranges and thus a fruit basket with 11 pieces of fruit.
358 | 
359 | `@hint`
360 | You have to assign the numeric value `6` to the `my_oranges` variable instead of the character value `"six"`. Note how the quotation marks are used to indicate that `"six"` is a character.
361 | 
362 | `@pre_exercise_code`
363 | ```{r}
364 | # no pec
365 | ```
366 | 
367 | `@sample_code`
368 | ```{r}
369 | # Assign a value to the variable my_apples
370 | my_apples <- 5 
371 | 
372 | # Fix the assignment of my_oranges
373 | my_oranges <- "six" 
374 | 
375 | # Create the variable my_fruit and print it out
376 | my_fruit <- my_apples + my_oranges 
377 | my_fruit
378 | ```
379 | 
380 | `@solution`
381 | ```{r}
382 | # Assign a value to the variable my_apples
383 | my_apples <- 5  
384 | 
385 | # Fix the assignment of my_oranges
386 | my_oranges <- 6
387 | 
388 | # Create the variable my_fruit and print it out
389 | my_fruit <- my_apples + my_oranges 
390 | my_fruit
391 | ```
392 | 
393 | `@sct`
394 | ```{r}
395 | ex() %>% check_error(incorrect_msg = "You can do this by setting the `my_oranges` variable to a numeric value, not a string!")
396 | 
397 | ex() %>% check_object("my_apples") %>% check_equal(incorrect_msg = "Make sure that `my_apples` still contains `5`.")
398 | 
399 | ex() %>% check_object("my_oranges") %>% check_equal(incorrect_msg = "Make sure that `my_oranges` is equal to `6`.")
400 | 
401 | ex() %>% check_object("my_fruit") %>% check_equal(incorrect_msg = "The value of `my_fruit` is not correct. It should be 11, the sum of `my_apples` and `my_oranges`.")
402 | 
403 | ex() %>% check_object("my_fruit")%>% check_or(
404 |   check_code(.,"my_fruit\\s*<-\\s*my_apples\\s*\\+\\s*my_oranges", missing_msg = "Did you create `my_fruit` as the sum of variables `my_apples` and `my_oranges`?", append=F),
405 |   check_code(.,"my_fruit\\s*<-\\s*my_oranges\\s*\\+\\s*my_apples", missing_msg = "Did you create `my_fruit` as the sum of variables `my_apples` and `my_oranges`?", append=F)
406 |  )
407 | ex() %>% check_output_expr("my_fruit", missing_msg = "Don't remove the line that prints out `my_fruit`.")
408 | 
409 | 
410 | 
411 | success_msg("Awesome, keep up the good work! Continue to the next exercise.")
412 | ```
413 | 
414 | ---
415 | 
416 | ## Basic data types in R
417 | 
418 | ```yaml
419 | type: NormalExercise
420 | key: 0f23107394
421 | xp: 100
422 | skills:
423 |   - 1
424 | ```
425 | 
426 | R works with numerous data types. Some of the most basic types to get started are:
427 | 
428 | - Decimal values like `4.5` are called **numerics**.
429 | - Whole numbers like `4` are called **integers**. Integers are also numerics.
430 | - Boolean values (`TRUE` or `FALSE`) are called **logical**.
431 | - Text (or string) values are called **characters**.
432 | 
433 | Note how the quotation marks in the editor indicate that `"some text"` is a string.
434 | 
435 | `@instructions`
436 | Change the value of the:
437 | 
438 | - `my_numeric` variable to `42`.
439 | - `my_character` variable to `"universe"`. Note that the quotation marks indicate that `"universe"` is a character.
440 | - `my_logical` variable to `FALSE`.
441 | 
442 | Note that R is case sensitive!
443 | 
444 | `@hint`
445 | Replace the values in the editor with the values that are provided in the exercise. For example: 
446 | `my_numeric <- 42` assigns the value 42 to the variable `my_numeric`.
447 | 
448 | `@pre_exercise_code`
449 | ```{r}
450 | # no pec
451 | ```
452 | 
453 | `@sample_code`
454 | ```{r}
455 | # Change my_numeric to be 42
456 | my_numeric <- 42.5
457 | 
458 | # Change my_character to be "universe"
459 | my_character <- "some text"
460 | 
461 | # Change my_logical to be FALSE
462 | my_logical <- TRUE
463 | ```
464 | 
465 | `@solution`
466 | ```{r}
467 | # Change my_numeric to be 42
468 | my_numeric <- 42
469 | 
470 | # Change my_character to be "universe"
471 | my_character <- "universe"
472 | 
473 | # Change my_logical to be FALSE
474 | my_logical <- FALSE
475 | ```
476 | 
477 | `@sct`
478 | ```{r}
479 | ex() %>% check_object("my_numeric") %>% check_equal(incorrect_msg = "Have you correctly changed the declaration of `my_numeric` so it contains the value 42?")
480 | 
481 | ex() %>% check_object("my_character") %>% check_equal(incorrect_msg = "Have you correctly changed `my_character` to `\"universe\"`? Don't forget the quotes!")
482 | 
483 | ex() %>% check_object("my_logical") %>% check_equal(incorrect_msg = "Have you correctly changed `my_logical` to `FALSE`? All letters of `FALSE` should be capitalized!")
484 | 
485 | success_msg("Great work! Continue to the next exercise.")
486 | ```
487 | 
488 | ---
489 | 
490 | ## What's that data type?
491 | 
492 | ```yaml
493 | type: NormalExercise
494 | key: 99b549229d
495 | xp: 100
496 | skills:
497 |   - 1
498 | ```
499 | 
500 | Do you remember that when you added `5 + "six"`, you got an error due to a mismatch in data types? You can avoid such embarrassing situations by checking the data type of a variable beforehand. You can do this with the `class()` function, as the code in the editor shows.
501 | 
502 | `@instructions`
503 | Complete the code in the editor and also print out the classes of `my_character` and `my_logical`.
504 | 
505 | `@hint`
506 | The code that prints the data type of `my_numeric` is already included; do a similar things for `my_character` and `my_logical`.
507 | 
508 | `@pre_exercise_code`
509 | ```{r}
510 | # no pec
511 | ```
512 | 
513 | `@sample_code`
514 | ```{r}
515 | # Declare variables of different types
516 | my_numeric <- 42
517 | my_character <- "universe"
518 | my_logical <- FALSE 
519 | 
520 | # Check class of my_numeric
521 | class(my_numeric)
522 | 
523 | # Check class of my_character
524 | 
525 | 
526 | # Check class of my_logical
527 | 
528 | ```
529 | 
530 | `@solution`
531 | ```{r}
532 | # Declare variables of different types:
533 | my_numeric <- 42
534 | my_character <- "universe"
535 | my_logical <- FALSE
536 | 
537 | # Check class of my_numeric
538 | class(my_numeric)
539 | 
540 | # Check class of my_character
541 | class(my_character)
542 | 
543 | # Check class of my_logical
544 | class(my_logical)
545 | ```
546 | 
547 | `@sct`
548 | ```{r}
549 | msg <- "Do not change the declaration of the variables!"
550 | ex()  %>% check_object("my_numeric", undefined_msg = msg)  %>%  check_equal(incorrect_msg = msg)
551 | 
552 | ex()  %>% check_object("my_character", undefined_msg = msg)  %>%  check_equal(incorrect_msg = msg)
553 | 
554 | ex()  %>% check_object("my_logical", undefined_msg = msg)  %>%  check_equal(incorrect_msg = msg)
555 | 
556 | patt <- "Have you included `class(%1$s)` to print out the data type of `%1$s`?"
557 | ex() %>% check_output_expr("class(my_numeric)",missing_msg = "Do not remove the code that prints out the type of `my_numeric`.")
558 | 
559 | ex() %>% check_output_expr("class(my_character)",missing_msg = sprintf(patt, "my_character"))
560 | 
561 | ex() %>% check_output_expr("class(my_logical)",missing_msg = sprintf(patt, "my_logical"))
562 | success_msg("Congratulations! This was the last exercise for this chapter. Head over to the next chapter to get immersed in the world of vectors!")
563 | ```
564 | 


--------------------------------------------------------------------------------
/chapter6.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title_meta: Chapter 6
  3 | title: Lists
  4 | description: >-
  5 |   As opposed to vectors, lists can hold components of different types, just as
  6 |   your to-do lists can contain different categories of tasks. This chapter will
  7 |   teach you how to create, name, and subset these lists.
  8 | ---
  9 | 
 10 | ## Lists, why would you need them?
 11 | 
 12 | ```yaml
 13 | type: NormalExercise
 14 | key: 2afcdb6a76ec91bf266de9b2ac295d844d7bb004
 15 | xp: 100
 16 | skills:
 17 |   - 1
 18 | ```
 19 | 
 20 | Congratulations! At this point in the course you are already familiar with:
 21 | 
 22 | - **Vectors** (one dimensional array): can hold numeric, character or logical values. The elements in a vector all have the same data type.
 23 | - **Matrices** (two dimensional array): can hold numeric, character or logical values. The elements in a matrix all have the same data type.
 24 | - **Data frames** (two-dimensional objects): can hold numeric, character or logical values. Within a column all elements have the same data type, but different columns can be of different data type.
 25 | 
 26 | Pretty sweet for an R newbie, right? ;-)
 27 | 
 28 | `@instructions`
 29 | Submit the answer to start learning everything about lists!
 30 | 
 31 | `@hint`
 32 | Just submit the answer!
 33 | 
 34 | `@pre_exercise_code`
 35 | ```{r}
 36 | # no pec
 37 | ```
 38 | 
 39 | `@sample_code`
 40 | ```{r}
 41 | # Just submit the answer
 42 | ```
 43 | 
 44 | `@solution`
 45 | ```{r}
 46 | # Just submit the answer.
 47 | ```
 48 | 
 49 | `@sct`
 50 | ```{r}
 51 | success_msg("Ready, set, go! Continue to the next exercise.")
 52 | ```
 53 | 
 54 | ---
 55 | 
 56 | ## Lists, why would you need them? (2)
 57 | 
 58 | ```yaml
 59 | type: NormalExercise
 60 | key: 68f93c5c504616bd18876da52cd123277d56fc8b
 61 | xp: 100
 62 | skills:
 63 |   - 1
 64 | ```
 65 | 
 66 | A **list** in R is similar to your to-do list at work or school: the different items on that list most likely differ in length, characteristic, and type of activity that has to be done. 
 67 | 
 68 | A list in R allows you to gather a variety of objects under one name (that is, the name of the list) in an ordered way. These objects can be matrices, vectors, data frames, even other lists, etc. It is not even required that these objects are related to each other in any way.
 69 | 
 70 | You could say that a list is some kind super data type: you can store practically any piece of information in it!
 71 | 
 72 | `@instructions`
 73 | Just submit the answer to start the first exercise on lists.
 74 | 
 75 | `@hint`
 76 | Submit the answer to start the first exercise on lists.
 77 | 
 78 | `@pre_exercise_code`
 79 | ```{r}
 80 | # no pec
 81 | ```
 82 | 
 83 | `@sample_code`
 84 | ```{r}
 85 | # Just submit the answer to start the first exercise on lists.
 86 | ```
 87 | 
 88 | `@solution`
 89 | ```{r}
 90 | # Just submit the answer to start the first exercise on lists.
 91 | ```
 92 | 
 93 | `@sct`
 94 | ```{r}
 95 | success_msg("Cool. Let's get our hands dirty!")
 96 | ```
 97 | 
 98 | ---
 99 | 
100 | ## Creating a list
101 | 
102 | ```yaml
103 | type: NormalExercise
104 | key: 4beee9cb532c889903218b49b83ab5ef133eac83
105 | xp: 100
106 | skills:
107 |   - 1
108 | ```
109 | 
110 | Let us create our first list! To construct a list you use the function `list()`: 
111 | 
112 | ```
113 | my_list <- list(comp1, comp2 ...)
114 | ```
115 | 
116 | The arguments to the list function are the list components. Remember, these components can be matrices, vectors, other lists, ...
117 | 
118 | `@instructions`
119 | Construct a list, named `my_list`, that contains the variables `my_vector`, `my_matrix` and `my_df` as list components.
120 | 
121 | `@hint`
122 | Use the `list()` function with `my_vector`, `my_matrix` and `my_df` as arguments separated by a comma.
123 | 
124 | `@pre_exercise_code`
125 | ```{r}
126 | # no pec
127 | ```
128 | 
129 | `@sample_code`
130 | ```{r}
131 | # Vector with numerics from 1 up to 10
132 | my_vector <- 1:10 
133 | 
134 | # Matrix with numerics from 1 up to 9
135 | my_matrix <- matrix(1:9, ncol = 3)
136 | 
137 | # First 10 elements of the built-in data frame mtcars
138 | my_df <- mtcars[1:10,]
139 | 
140 | # Construct list with these different elements:
141 | my_list <-
142 | ```
143 | 
144 | `@solution`
145 | ```{r}
146 | # Vector with numerics from 1 up to 10
147 | my_vector <- 1:10 
148 | 
149 | # Matrix with numerics from 1 up to 9
150 | my_matrix <- matrix(1:9, ncol = 3)
151 | 
152 | # First 10 elements of the built-in data frame mtcars
153 | my_df <- mtcars[1:10,]
154 | 
155 | # Construct list with these different elements:
156 | my_list <- list(my_vector, my_matrix, my_df)
157 | ```
158 | 
159 | `@sct`
160 | ```{r}
161 | msg = "Do not remove or change the definition of the variables `my_vector`, `my_matrix` or `my_df`!"
162 | ex() %>% check_object("my_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
163 | 
164 | ex() %>% check_object("my_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
165 | 
166 | ex() %>% check_object("my_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
167 | 
168 | ex() %>% check_object("my_list") %>% check_equal(incorrect_msg = "It looks like `my_list` does not contain the correct elements. Make sure to pass the variables `my_vector`, `my_matrix` and `my_df` to the `list()` function, separated by commas, in this order.")
169 | 
170 | success_msg("Wonderful! Head over to the next exercise.")
171 | ```
172 | 
173 | ---
174 | 
175 | ## Creating a named list
176 | 
177 | ```yaml
178 | type: NormalExercise
179 | key: 89dd0126568b1ff5a84033c571907a8a282245e4
180 | xp: 100
181 | skills:
182 |   - 1
183 | ```
184 | 
185 | Well done, you're on a roll!
186 | 
187 | Just like on your to-do list, you want to avoid not knowing or remembering what the components of your list stand for. That is why you should give names to them: 
188 | 
189 | ```
190 | my_list <- list(name1 = your_comp1, 
191 |                 name2 = your_comp2)
192 | ``` 
193 | 
194 | This creates a list with components that are named `name1`, `name2`, and so on. If you want to name your lists after you've created them, you can use the `names()` function as you did with vectors. The following commands are fully equivalent to the assignment above:
195 | 
196 | ```
197 | my_list <- list(your_comp1, your_comp2)
198 | names(my_list) <- c("name1", "name2")
199 | ```
200 | 
201 | `@instructions`
202 | - Change the code of the previous exercise (see editor) by adding names to the components. Use for `my_vector` the name `vec`, for `my_matrix` the name `mat` and for `my_df` the name `df`. 
203 | - Print out `my_list` so you can inspect the output.
204 | 
205 | `@hint`
206 | The first method of assigning names to your list components is the easiest. It starts like this: 
207 | ```
208 | my_list <- list(vec = my_vector)
209 | ```
210 | Add the other two components in a similar fashion.
211 | 
212 | `@pre_exercise_code`
213 | ```{r}
214 | # no pec
215 | ```
216 | 
217 | `@sample_code`
218 | ```{r}
219 | # Vector with numerics from 1 up to 10
220 | my_vector <- 1:10 
221 | 
222 | # Matrix with numerics from 1 up to 9
223 | my_matrix <- matrix(1:9, ncol = 3)
224 | 
225 | # First 10 elements of the built-in data frame mtcars
226 | my_df <- mtcars[1:10,]
227 | 
228 | # Adapt list() call to give the components names
229 | my_list <- list(my_vector, my_matrix, my_df)
230 | 
231 | # Print out my_list
232 | ```
233 | 
234 | `@solution`
235 | ```{r}
236 | # Vector with numerics from 1 up to 10
237 | my_vector <- 1:10 
238 | 
239 | # Matrix with numerics from 1 up to 9
240 | my_matrix <- matrix(1:9, ncol = 3)
241 | 
242 | # First 10 elements of the built-in data frame mtcars
243 | my_df <- mtcars[1:10,]
244 | 
245 | # Adapt list() call to give the components names
246 | my_list <- list(vec = my_vector, mat = my_matrix, df = my_df)
247 | 
248 | # Print out my_list
249 | my_list
250 | ```
251 | 
252 | `@sct`
253 | ```{r}
254 | msg = "Do not remove or change the definiton of the variables `my_vector`, `my_matrix` or `my_df`!"
255 | ex() %>% check_object("my_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
256 | 
257 | ex() %>% check_object("my_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
258 | 
259 | ex() %>% check_object("my_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
260 | 
261 | ex() %>% check_object("my_list") %>% check_equal(incorrect_msg = "It looks like `my_list` does not contain the correct elements.")
262 | 
263 | ex() %>% check_object("my_list") %>% check_equal(eq_condition = "equal",incorrect_msg = "It looks like `my_list` does not contain the correct naming for the components. Make sure you use the names `vec`, `mat` and `df`, respectively. Check out the hint if you're struggling.")
264 | 
265 | ex() %>% check_output_expr("my_list", missing_msg = "Don't forget to print `my_list` to the console! Simply add `my_list` on a new line in the editor.")
266 | 
267 | success_msg("Great! Not only do you know how to construct lists now, you can also name them; a skill that will prove most useful in practice. Continue to the next exercise.")
268 | ```
269 | 
270 | ---
271 | 
272 | ## Creating a named list (2)
273 | 
274 | ```yaml
275 | type: NormalExercise
276 | key: 19fd17cc792ef870c1558b3a9bae08c1d1e3acae
277 | xp: 100
278 | skills:
279 |   - 1
280 | ```
281 | 
282 | Being a huge movie fan (remember your job at LucasFilms), you decide to start storing information on good movies with the help of lists. 
283 | 
284 | Start by creating a list for the movie "The Shining". We have already created the variables `mov`, `act` and `rev` in your R workspace. Feel free to check them out in the console.
285 | 
286 | `@instructions`
287 | Complete the code in the editor to create `shining_list`; it contains three elements:
288 | 
289 | - `moviename`: a character string with the movie title (stored in `mov`)
290 | - `actors`: a vector with the main actors' names (stored in `act`)
291 | - `reviews`: a data frame that contains some reviews (stored in `rev`)
292 | 
293 | Do not forget to name the list components accordingly (names are `moviename`, `actors` and `reviews`).
294 | 
295 | `@hint`
296 | `shining_list <- list(moviename = mov)` is only part of the solution; it's your job to also add `act` and `rev` to the list, with the appropriate names.
297 | 
298 | `@pre_exercise_code`
299 | ```{r}
300 | mov <- "The Shining"
301 | act      <- c("Jack Nicholson","Shelley Duvall","Danny Lloyd","Scatman Crothers","Barry Nelson")
302 | sources     <- c("IMDb1","IMDb2","IMDb3")
303 | comments    <- c("Best Horror Film I Have Ever Seen","A truly brilliant and scary film from Stanley Kubrick","A masterpiece of psychological horror")
304 | scores      <- c(4.5,4,5)
305 | rev     <- data.frame(scores, sources, comments)
306 | rm(scores, sources, comments)
307 | ```
308 | 
309 | `@sample_code`
310 | ```{r}
311 | # The variables mov, act and rev are available
312 | 
313 | # Finish the code to build shining_list
314 | shining_list <- list(moviename = mov)
315 | ```
316 | 
317 | `@solution`
318 | ```{r}
319 | # The variables mov, act and rev are available
320 | 
321 | # Finish the code to build shining_list
322 | shining_list <- list(moviename = mov, actors = act, reviews = rev)
323 | ```
324 | 
325 | `@sct`
326 | ```{r}
327 | msg = "Do not remove or change the definition of the pre-set variables!"
328 | ex()  %>% check_object("mov", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
329 | 
330 | ex()  %>% check_object("rev", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
331 | 
332 | ex()  %>% check_object("act", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
333 | 
334 | ex() %>% check_object("shining_list") %>% check_equal(incorrect_msg = "It looks like `shining_list` does not contain the correct elements: the first element should be `mov`, the second element `act`, and the third `rev`.")
335 | 
336 | ex() %>% check_object("shining_list") %>% check_equal(eq_condition = "equal",incorrect_msg = "It looks like `shining_list` does not contain the correct naming for the components: name the first element `moviename`, the second element `actors`, and the third element `reviews`.")
337 | 
338 | success_msg("Wonderful! You now know how to construct and name lists. As in the previous chapters, let's look at how to select elements for lists. Head over to the next exercise.")
339 | ```
340 | 
341 | ---
342 | 
343 | ## Selecting elements from a list
344 | 
345 | ```yaml
346 | type: NormalExercise
347 | key: 1ef3278944562caef64b9927dd2ddb6ee52334cd
348 | xp: 100
349 | skills:
350 |   - 1
351 | ```
352 | 
353 | Your list will often be built out of numerous elements and components. Therefore, getting a single element, multiple elements, or a component out of it is not always straightforward. 
354 | 
355 | One way to select a component is using the numbered position of that component. For example, to "grab" the first component of `shining_list` you type 
356 | 
357 | ```
358 | shining_list[[1]]
359 | ```
360 | 
361 | A quick way to check this out is typing it in the console. Important to remember: to select elements from vectors, you use single square brackets: `[ ]`. Don't mix them up!
362 | 
363 | You can also refer to the names of the components, with `[[ ]]` or with the `$` sign. Both will select the data frame representing the reviews: 
364 | 
365 | ```
366 | shining_list[["reviews"]]
367 | shining_list$reviews
368 | ``` 
369 | 
370 | Besides selecting components, you often need to select specific elements out of these components. For example, with `shining_list[[2]][1]` you select from the second component, `actors` (`shining_list[[2]]`), the first element (`[1]`). When you type this in the console, you will see the answer is Jack Nicholson.
371 | 
372 | `@instructions`
373 | - Select from `shining_list` the vector representing the actors. Simply print out this vector.
374 | - Select from `shining_list` the second element in the vector representing the actors. Do a printout like before.
375 | 
376 | `@hint`
377 | - To select the vector representing the actors, you can use `$actors`.
378 | - To select the third element in the vector representing the actors, you use `shining_list$actors[3]`. What needs to change to select the second element?
379 | 
380 | `@pre_exercise_code`
381 | ```{r}
382 | load(url("https://assets.datacamp.com/course/intro_to_r/shining_list.RData"))
383 | ```
384 | 
385 | `@sample_code`
386 | ```{r}
387 | # shining_list is already pre-loaded in the workspace
388 | 
389 | # Print out the vector representing the actors
390 | 
391 | 
392 | # Print the second element of the vector representing the actors
393 | ```
394 | 
395 | `@solution`
396 | ```{r}
397 | # shining_list is already pre-loaded in the workspace
398 | 
399 | # Print out the vector representing the actors
400 | shining_list$actors
401 | 
402 | # Print the second element of the vector representing the actors
403 | shining_list$actors[2]
404 | ```
405 | 
406 | `@sct`
407 | ```{r}
408 | msg <- "Do not remove or change the definition of `shining_list`, which is pre-loaded in the workspace!"
409 | ex() %>% check_object("shining_list", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
410 | 
411 | ex() %>% check_output_expr("shining_list$actors", missing_msg = "Have you correctly selected and printed out the vector representing actors? You can use `shining_list$actors`, for example.")
412 | 
413 | ex() %>% check_output_expr("shining_list$actors[2]", missing_msg = "To select the second actor from the vector representing actors, you should chain your selections: `shining_list$actors` represents the actors, so you can add a `[2]` to select the second element.")
414 | 
415 | success_msg("Great! Selecting elements from lists is rather easy isn't it? Continue to the next exercise.")
416 | ```
417 | 
418 | ---
419 | 
420 | ## Creating a new list for another movie
421 | 
422 | ```yaml
423 | type: NormalExercise
424 | key: ce10c83e5b
425 | xp: 100
426 | ```
427 | 
428 | You found reviews of another, more recent, Jack Nicholson movie: The Departed!
429 | 
430 | | Scores | Comments               |
431 | |--------|------------------------|
432 | | 4.6    | <span translate="no">I would watch it again</span> |
433 | | 5      | <span translate="no">Amazing!</span>               |
434 | | 4.8    | <span translate="no">I liked it</span>             |
435 | | 5      | <span translate="no">One of the best movies</span> |
436 | | 4.2    | <span translate="no">Fascinating plot</span>       |
437 | 
438 | It would be useful to collect together all the pieces of information about the movie, like the title, actors, and reviews into a single variable. Since these pieces of data are different shapes, it is natural to combine them in a list variable.
439 | 
440 | `movie_title`, containing the title of the movie, and `movie_actors`, containing the names of some of the actors in the movie, are available in your workspace.
441 | 
442 | `@instructions`
443 | - Create two vectors, called `scores` and `comments`, that contain the information from the reviews shown in the table.
444 | - Find the average of the `scores` vector and save it as `avg_review`. 
445 | - Combine the `scores` and `comments` vectors into a data frame called `reviews_df`. 
446 | - Create a list, called `departed_list`, that contains the `movie_title`, `movie_actors`, reviews data frame as `reviews_df`, and the average review score as `avg_review`, and print it out.
447 | 
448 | `@hint`
449 | - You'll use the `c()` function to create the vectors. 
450 | - To find the average of a vector, pass the name of the vector to the `mean()` function. 
451 | - Use the `data.frame()` function to create the `reviews_df` data frame.
452 | 
453 | `@pre_exercise_code`
454 | ```{r}
455 | movie_title <- "The Departed"
456 | movie_actors <- c("Leonardo DiCaprio", "Matt Damon", "Jack Nicholson","Mark Wahlberg","Vera Farmiga", "Martin Sheen")
457 | ```
458 | 
459 | `@sample_code`
460 | ```{r}
461 | # Use the table from the exercise to define the comments and scores vectors
462 | scores <- c(4.6, 5, 4.8, 5, ___
463 | comments <- c("I would watch it again", "Amazing!", "I liked it", "One of the best movies", ___ 
464 | 
465 | # Save the average of the scores vector as avg_review
466 | 
467 | 
468 | # Combine scores and comments into the reviews_df data frame
469 | 
470 | 
471 | # Create and print out a list, called departed_list
472 | 
473 | 
474 | ```
475 | 
476 | `@solution`
477 | ```{r}
478 | # Use the table from the exercise to define the comments and scores vectors
479 | scores <- c(4.6, 5, 4.8, 5, 4.2)
480 | comments <- c("I would watch it again", "Amazing!", "I liked it", "One of the best movies", "Fascinating plot")
481 | 
482 | # Save the average of the scores vector as avg_review
483 | avg_review <- mean(scores)
484 | 
485 | # Combine scores and comments into the reviews_df data frame
486 | reviews_df <- data.frame(scores, comments)
487 | 
488 | # Create and print out a list, called departed_list
489 | departed_list <- list(movie_title, movie_actors, reviews_df, avg_review)
490 | departed_list
491 | ```
492 | 
493 | `@sct`
494 | ```{r}
495 | msg <- "Do not change or remove the preloaded objects."
496 | 
497 | ex() %>% check_object("movie_title") %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
498 | ex() %>% check_object("movie_actors")  %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
499 | ex() %>% check_object("scores") %>% check_equal(eq_condition = "equal", incorrect_msg = "Did you create the `scores` vector correctly?")
500 | ex() %>% check_object("comments") %>% check_equal(eq_condition = "equal", incorrect_msg = "Did you create the `comments` vector correctly?")
501 | ex() %>% check_correct(
502 |   check_object(.,"avg_review") %>% check_equal(eq_condition = "equal", incorrect_msg = "Did you create the `avg_review` object correctly?"),
503 |   check_function(.,"mean") %>% check_arg("x") %>% check_equal()
504 | )
505 | ex() %>% check_correct(
506 |   check_object(.,"reviews_df") %>% check_equal(eq_condition = "equal", incorrect_msg = "Did you create `reviews_df` correctly?"),
507 |   check_function(.,"data.frame") %>% {
508 |     check_arg(.,"..1") %>% check_equal()
509 |     check_arg(.,"..2") %>% check_equal()
510 |   }
511 | )
512 | ex() %>% check_correct(
513 |   check_object(.,"departed_list") %>% check_equal(eq_condition = "equal", incorrect_msg = "Did you create `departed_list` correctly?"),
514 |   check_function(.,"list") %>% {
515 |     check_arg(.,"..1") %>% check_equal()
516 |     check_arg(.,"..2") %>% check_equal()
517 |     check_arg(.,"..3") %>% check_equal()
518 |     check_arg(.,"..4") %>% check_equal()
519 |   }
520 | )
521 | ex() %>% check_output_expr("departed_list", missing_msg = "Did you print the `departed_list`?")
522 | 
523 | success_msg("Good work! You successfully created another list of movie information, and combined different components into a single list. Congratulations on finishing the course!")
524 | ```
525 | 


--------------------------------------------------------------------------------
/chapter4.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title_meta: Chapter 4
  3 | title: Factors
  4 | description: >-
  5 |   Data often falls into a limited number of categories. For example, human hair
  6 |   color can be categorized as black, brown, blond, red, grey, or white—and
  7 |   perhaps a few more options for people who color their hair. In R, categorical
  8 |   data is stored in factors. Factors are very important in data analysis, so
  9 |   start learning how to create, subset, and compare them now.
 10 | ---
 11 | 
 12 | ## What's a factor and why would you use it?
 13 | 
 14 | ```yaml
 15 | type: NormalExercise
 16 | key: 05273321916d99bb9c0deadf75c6834d25a47244
 17 | xp: 100
 18 | skills:
 19 |   - 1
 20 | ```
 21 | 
 22 | In this chapter you dive into the wonderful world of **factors**.
 23 | 
 24 | The term factor refers to a statistical data type used to store categorical variables. The difference between a categorical variable and a continuous variable is that a categorical variable can belong to a **limited number of categories**. A continuous variable, on the other hand, can correspond to an infinite number of values.
 25 | 
 26 | It is important that R knows whether it is dealing with a continuous or a categorical variable, as the statistical models you will develop in the future treat both types differently. (You will see later why this is the case.)
 27 | 
 28 | A good example of a categorical variable is sex. In many circumstances you can limit the sex categories to "Male" or "Female". (Sometimes you may need different categories. For example, you may need to consider chromosomal variation, hermaphroditic animals, or different cultural norms, but you will always have a finite number of categories.)
 29 | 
 30 | `@instructions`
 31 | Assign to variable `theory` the value `"factors"`.
 32 | 
 33 | `@hint`
 34 | Simply assign a variable (`<-`); make sure to capitalize correctly.
 35 | 
 36 | `@pre_exercise_code`
 37 | ```{r}
 38 | # no pec
 39 | ```
 40 | 
 41 | `@sample_code`
 42 | ```{r}
 43 | # Assign to the variable theory what this chapter is about!
 44 | ```
 45 | 
 46 | `@solution`
 47 | ```{r}
 48 | # Assign to the variable theory what this chapter is about!
 49 | theory <- "factors"
 50 | ```
 51 | 
 52 | `@sct`
 53 | ```{r}
 54 | msg_undef <- "It looks like you haven't defined the variable `theory`."
 55 | msg_incor <- "The value of `theory` looks incorrect. Make sure to assign it the character string `\"factors\"`. Remember that R is case sensitive."
 56 | msg_err <- "Make sure that you defined `theory` correctly, using `<-` for assignment."
 57 | 
 58 | # If get error and theory is undefined, point out the error
 59 | ex()  %>% check_or(check_error(.,msg_err), check_object(.,"theory")  %>% check_equal(eval = FALSE))
 60 | 
 61 | check_object(.,"theory", undefined_msg = msg_undef)  %>% check_equal(incorrect_msg = msg_incor)
 62 | success_msg("Good job! Ready to start? Continue to the next exercise!")
 63 | ```
 64 | 
 65 | ---
 66 | 
 67 | ## What's a factor and why would you use it? (2)
 68 | 
 69 | ```yaml
 70 | type: NormalExercise
 71 | key: 6cc21c842b075347926bb1b244782213df32e370
 72 | xp: 100
 73 | skills:
 74 |   - 1
 75 | ```
 76 | 
 77 | To create factors in R, you make use of the function `factor()`. First thing that you have to do is create a vector that contains all the observations that belong to a limited number of categories. For example, `sex_vector` contains the sex of 5 different individuals:
 78 | 
 79 | ```
 80 | sex_vector <- c("Male","Female","Female","Male","Male")
 81 | ```
 82 | 
 83 | It is clear that there are two categories, or in R-terms **'factor levels'**, at work here: <span translate="no">"Male"</span> and <span translate="no">"Female"</span>.
 84 | 
 85 | The function `factor()` will encode the vector as a factor:
 86 | 
 87 | ```
 88 | factor_sex_vector <- factor(sex_vector)
 89 | ```
 90 | 
 91 | `@instructions`
 92 | - Convert the character vector `sex_vector` to a factor with `factor()` and assign the result to `factor_sex_vector`
 93 | - Print out `factor_sex_vector` and assert that R prints out the factor levels below the actual values.
 94 | 
 95 | `@hint`
 96 | Simply use the function `factor()` on `sex_vector`. Have a look at the assignment, the answer is already there somewhere...
 97 | 
 98 | `@pre_exercise_code`
 99 | ```{r}
100 | # no pec
101 | ```
102 | 
103 | `@sample_code`
104 | ```{r}
105 | # Sex vector
106 | sex_vector <- c("Male", "Female", "Female", "Male", "Male")
107 | 
108 | # Convert sex_vector to a factor
109 | factor_sex_vector <-
110 | 
111 | # Print out factor_sex_vector
112 | 
113 | ```
114 | 
115 | `@solution`
116 | ```{r}
117 | # Sex vector
118 | sex_vector <- c("Male", "Female", "Female", "Male", "Male")
119 | 
120 | # Convert sex_vector to a factor
121 | factor_sex_vector <- factor(sex_vector)
122 | 
123 | # Print out factor_sex_vector
124 | factor_sex_vector
125 | ```
126 | 
127 | `@sct`
128 | ```{r}
129 | ex()  %>%  check_object("factor_sex_vector")  %>% check_equal(incorrect_msg = "Did you assign the factor of `sex_vector` to `factor_sex_vector`?")
130 | ex()  %>%  check_output_expr("factor_sex_vector", missing_msg = "Don't forget to print out `factor_sex_vector`!")
131 | 
132 | success_msg("Great! If you want to find out more about the `factor()` function, do not hesitate to type `?factor` in the console. This will open up a help page. Continue to the next exercise.");
133 | ```
134 | 
135 | ---
136 | 
137 | ## What's a factor and why would you use it? (3)
138 | 
139 | ```yaml
140 | type: NormalExercise
141 | key: 5bd4f50afc2c2dbc881e16b8ca94ca56960dff42
142 | xp: 100
143 | skills:
144 |   - 1
145 | ```
146 | 
147 | There are two types of categorical variables: a **nominal categorical variable** and an **ordinal categorical variable**.
148 | 
149 | A nominal variable is a categorical variable without an implied order. This means that it is impossible to say that 'one is worth more than the other'. For example, think of the categorical variable `animals_vector` with the categories `"Elephant"`, `"Giraffe"`, `"Donkey"` and `"Horse"`. Here, it is impossible to say that one stands above or below the other. (Note that some of you might disagree ;-) ).
150 | 
151 | In contrast, ordinal variables do have a natural ordering. Consider for example the categorical variable `temperature_vector` with the categories: `"Low"`, `"Medium"` and `"High"`. Here it is obvious that `"Medium"` stands above `"Low"`, and `"High"` stands above `"Medium"`.
152 | 
153 | `@instructions`
154 | Submit the answer to check how R constructs and prints nominal and ordinal variables. Do not worry if you do not understand all the code just yet, we will get to that.
155 | 
156 | `@hint`
157 | Just submit the answer and look at the console. Notice how R indicates the ordering of the factor levels for ordinal categorical variables.
158 | 
159 | `@pre_exercise_code`
160 | ```{r}
161 | # no pec
162 | ```
163 | 
164 | `@sample_code`
165 | ```{r}
166 | # Animals
167 | animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
168 | factor_animals_vector <- factor(animals_vector)
169 | factor_animals_vector
170 | 
171 | # Temperature
172 | temperature_vector <- c("High", "Low", "High","Low", "Medium")
173 | factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
174 | factor_temperature_vector
175 | ```
176 | 
177 | `@solution`
178 | ```{r}
179 | # Animals
180 | animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
181 | factor_animals_vector <- factor(animals_vector)
182 | factor_animals_vector
183 | 
184 | # Temperature
185 | temperature_vector <- c("High", "Low", "High","Low", "Medium")
186 | factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
187 | factor_temperature_vector
188 | ```
189 | 
190 | `@sct`
191 | ```{r}
192 | msg <- "Do not change anything about the sample code. Simply submit the answer and inspect the solution!"
193 | 
194 | ex() %>% check_object("animals_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
195 | 
196 | ex() %>% check_object("temperature_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
197 | 
198 | ex() %>% check_object("factor_animals_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
199 | 
200 | ex() %>% check_output_expr("factor_animals_vector", missing_msg = msg)
201 | 
202 | ex() %>% check_object("factor_temperature_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
203 | 
204 | ex() %>% check_output_expr("factor_temperature_vector", missing_msg = msg)
205 | 
206 | 
207 | success_msg("Can you already tell what's happening in this exercise? Awesome! Continue to the next exercise and get into the details of factor levels.")
208 | ```
209 | 
210 | ---
211 | 
212 | ## Factor levels
213 | 
214 | ```yaml
215 | type: NormalExercise
216 | key: 1aa698978d32d1a0befa4700d7da85a648e1d69e
217 | xp: 100
218 | skills:
219 |   - 1
220 | ```
221 | 
222 | When you first get a dataset, you will often notice that it contains factors with specific factor levels. However, sometimes you will want to change the names of these levels for clarity or other reasons. R allows you to do this with the function `levels()`:
223 | 
224 | ```
225 | levels(factor_vector) <- c("name1", "name2",...)
226 | ```
227 | 
228 | A good illustration is the raw data that is provided to you by a survey. A common question for every questionnaire is the sex of the respondent. Here, for simplicity, just two categories were recorded, `"M"` and `"F"`. (You usually need more categories for survey data; either way, you use a factor to store the categorical data.) 
229 | 
230 | ```
231 | survey_vector <- c("M", "F", "F", "M", "M")
232 | ```
233 | 
234 | Recording the sex with the abbreviations `"M"` and `"F"` can be convenient if you are collecting data with pen and paper, but it can introduce confusion when analyzing the data. At that point, you will often want to change the factor levels to `"Male"` and `"Female"` instead of `"M"` and `"F"` for clarity.
235 | 
236 | **Watch out:** the order with which you assign the levels is important. If you type `levels(factor_survey_vector)`, you'll see that it outputs `[1] "F" "M"`. If you don't specify the levels of the factor when creating the vector, `R` will automatically assign them alphabetically. To correctly map `"F"` to `"Female"` and `"M"` to `"Male"`, the levels should be set to `c("Female", "Male")`, in this order.
237 | 
238 | `@instructions`
239 | - Check out the code that builds a factor vector from `survey_vector`. You should use `factor_survey_vector` in the next instruction.
240 | - Change the factor levels of `factor_survey_vector` to `c("Female", "Male")`. Mind the order of the vector elements here.
241 | 
242 | `@hint`
243 | Mind the order in which you have to type in the factor levels. Hint: look at the order in which the levels are printed when typing `levels(factor_survey_vector)`.
244 | 
245 | `@pre_exercise_code`
246 | ```{r}
247 | # no pec
248 | survey_vector <- c("M", "F", "F", "M", "M")
249 | factor_survey_vector <- factor(survey_vector)
250 | ```
251 | 
252 | `@sample_code`
253 | ```{r}
254 | # Code to build factor_survey_vector
255 | survey_vector <- c("M", "F", "F", "M", "M")
256 | factor_survey_vector <- factor(survey_vector)
257 | 
258 | # Specify the levels of factor_survey_vector
259 | levels(factor_survey_vector) <-
260 | 
261 | factor_survey_vector
262 | ```
263 | 
264 | `@solution`
265 | ```{r}
266 | # Code to build factor_survey_vector
267 | survey_vector <- c("M", "F", "F", "M", "M")
268 | factor_survey_vector <- factor(survey_vector)
269 | 
270 | # Specify the levels of factor_survey_vector
271 | levels(factor_survey_vector) <- c("Female", "Male")
272 | 
273 | factor_survey_vector
274 | ```
275 | 
276 | `@sct`
277 | ```{r}
278 | msg = "Do not change the definition of `survey_vector`!"
279 | ex() %>% check_object("survey_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
280 | msg = "Do not change or remove the code to create the factor vector."
281 | ex()  %>% check_function("factor", not_called_msg = msg) %>% check_arg('x')  %>% check_equal(incorrect_msg = msg)
282 | 
283 | # MC-note: ideally would want to test assign operator `<-`, and have it highlight whole line.
284 | 
285 | # MC-note: or negate this test_student_typed, to highlight where they type this incorrect phrase
286 | # test_student_typed('c("Male", "Female")')
287 | 
288 | ex()  %>% check_object("factor_survey_vector") %>% check_equal(eq_condition = "equal", incorrect_msg = paste("Did you assign the correct factor levels to `factor_survey_vector`? Use `levels(factor_survey_vector) <- c(\"Female\", \"Male\")`. Remember that R is case sensitive!"))
289 | 
290 | 
291 | success_msg("Wonderful! Proceed to the next exercise.")
292 | ```
293 | 
294 | ---
295 | 
296 | ## Summarizing a factor
297 | 
298 | ```yaml
299 | type: NormalExercise
300 | key: a549f13c0644ccc89cd39a10aa48706754637ed0
301 | xp: 100
302 | skills:
303 |   - 1
304 | ```
305 | 
306 | After finishing this course, one of your favorite functions in R will be `summary()`. This will give you a quick overview of the contents of a variable:
307 | 
308 | ```
309 | summary(my_var)
310 | ```
311 | 
312 | Going back to our survey, you would like to know how many `"Male"` responses you have in your study, and how many `"Female"` responses. The `summary` function gives you the answer to this question.
313 | 
314 | `@instructions`
315 | Ask a `summary()` of the `survey_vector` and `factor_survey_vector`. Interpret the results of both vectors. Are they both equally useful in this case?
316 | 
317 | `@hint`
318 | Call the `summary()` function on both `survey_vector` and `factor_survey_vector`, it's as simple as that!
319 | 
320 | `@pre_exercise_code`
321 | ```{r}
322 | # no pec
323 | ```
324 | 
325 | `@sample_code`
326 | ```{r}
327 | # Build factor_survey_vector with clean levels
328 | survey_vector <- c("M", "F", "F", "M", "M")
329 | factor_survey_vector <- factor(survey_vector)
330 | levels(factor_survey_vector) <- c("Female", "Male")
331 | factor_survey_vector
332 | 
333 | # Generate summary for survey_vector
334 | 
335 | 
336 | # Generate summary for factor_survey_vector
337 | 
338 | ```
339 | 
340 | `@solution`
341 | ```{r}
342 | # Build factor_survey_vector with clean levels
343 | survey_vector <- c("M", "F", "F", "M", "M")
344 | factor_survey_vector <- factor(survey_vector)
345 | levels(factor_survey_vector) <- c("Female", "Male")
346 | factor_survey_vector
347 | 
348 | # Generate summary for survey_vector
349 | summary(survey_vector)
350 | 
351 | # Generate summary for factor_survey_vector
352 | summary(factor_survey_vector)
353 | ```
354 | 
355 | `@sct`
356 | ```{r}
357 | msg = "Do not change anything about the first few lines that define `survey_vector` and `factor_survey_vector`."
358 | 
359 | ex() %>% check_object("survey_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
360 | 
361 | ex() %>% check_object("factor_survey_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
362 | 
363 | msg <- "Have you correctly used `summary()` to generate a summary for `%s`?"
364 | 
365 | ex() %>% check_output_expr("summary(survey_vector)", missing_msg = sprintf(msg, "survey_vector"))
366 | 
367 | ex() %>% check_output_expr("summary(factor_survey_vector)", missing_msg = sprintf(msg, "factor_survey_vector"))
368 | 
369 | success_msg("Nice! Have a look at the output. The fact that you identified `\"Male\"` and `\"Female\"` as factor levels in `factor_survey_vector` enables R to show the number of elements for each category.")
370 | ```
371 | 
372 | ---
373 | 
374 | ## Battle of the sexes
375 | 
376 | ```yaml
377 | type: NormalExercise
378 | key: 90ecc160d1ebf2f75bf53f9c3843fc1632bdd0a5
379 | xp: 100
380 | skills:
381 |   - 1
382 | ```
383 | 
384 | You might wonder what happens when you try to compare elements of a factor.  In `factor_survey_vector` you have a factor with two levels: `"Male"` and `"Female"`. But how does R value these relative to each other?
385 | 
386 | `@instructions`
387 | Read the code in the editor and submit the answer to test if `male` is greater than (`>`) `female`.
388 | 
389 | `@hint`
390 | Just submit the answer and have a look at output that gets printed to the console.
391 | 
392 | `@pre_exercise_code`
393 | ```{r}
394 | # no pec
395 | ```
396 | 
397 | `@sample_code`
398 | ```{r}
399 | # Build factor_survey_vector with clean levels
400 | survey_vector <- c("M", "F", "F", "M", "M")
401 | factor_survey_vector <- factor(survey_vector)
402 | levels(factor_survey_vector) <- c("Female", "Male")
403 | 
404 | # Male
405 | male <- factor_survey_vector[1]
406 | 
407 | # Female
408 | female <- factor_survey_vector[2]
409 | 
410 | # Battle of the sexes: Male 'larger' than female?
411 | male > female
412 | ```
413 | 
414 | `@solution`
415 | ```{r}
416 | # Build factor_survey_vector with clean levels
417 | survey_vector <- c("M", "F", "F", "M", "M")
418 | factor_survey_vector <- factor(survey_vector)
419 | levels(factor_survey_vector) <- c("Female", "Male")
420 | 
421 | # Male
422 | male <- factor_survey_vector[1]
423 | 
424 | # Female
425 | female <- factor_survey_vector[2]
426 | 
427 | # Battle of the sexes: Male 'larger' than female?
428 | male > female
429 | ```
430 | 
431 | `@sct`
432 | ```{r}
433 | msg = "Do not change anything about the code; simply submit the answer and look at the result."
434 | 
435 | ex() %>% check_object("survey_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
436 | 
437 | ex() %>% check_object("factor_survey_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
438 | 
439 | ex() %>% check_object("male", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
440 | 
441 | ex() %>% check_object("female", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
442 | 
443 | ex() %>% check_output_expr("male > female", missing_msg = msg)
444 | 
445 | 
446 | success_msg("How interesting! By default, R returns `NA` when you try to compare values in a factor, since the idea doesn't make sense. Next you'll learn about ordered factors, where more meaningful comparisons are possible.")
447 | ```
448 | 
449 | ---
450 | 
451 | ## Ordered factors
452 | 
453 | ```yaml
454 | type: NormalExercise
455 | key: 9ab0928916bf84ab225713a9a1ce40d9e322c6a0
456 | xp: 100
457 | skills:
458 |   - 1
459 | ```
460 | 
461 | Since `"Male"` and `"Female"` are unordered (or nominal) factor levels, R returns a warning message, telling you that the greater than operator is not meaningful. As seen before, R attaches an equal value to the levels for such factors.
462 | 
463 | But this is not always the case! Sometimes you will also deal with factors that do have a natural ordering between its categories. If this is the case, we have to make sure that we pass this information to R...
464 | 
465 | Let us say that you are leading a research team of five data analysts and that you want to evaluate their performance. To do this, you track their speed, evaluate each analyst as `"slow"`, `"medium"` or `"fast"`, and save the results in `speed_vector`.
466 | 
467 | `@instructions`
468 | As a first step, assign `speed_vector` a vector with 5 entries, one for each analyst. Each entry should be either `"slow"`, `"medium"`, or `"fast"`. Use the list below:
469 | 
470 | - Analyst 1 is medium,
471 | - Analyst 2 is slow,
472 | - Analyst 3 is slow,
473 | - Analyst 4 is medium and
474 | - Analyst 5 is fast.
475 | 
476 | No need to specify these are factors yet.
477 | 
478 | `@hint`
479 | Assign to `speed_vector` a vector containing the character strings `"slow"`, `"medium"`, or `"fast"`.
480 | 
481 | `@pre_exercise_code`
482 | ```{r}
483 | # no pec
484 | ```
485 | 
486 | `@sample_code`
487 | ```{r}
488 | # Create speed_vector
489 | speed_vector <-
490 | ```
491 | 
492 | `@solution`
493 | ```{r}
494 | # Create speed_vector
495 | speed_vector <- c("medium", "slow", "slow", "medium", "fast")
496 | ```
497 | 
498 | `@sct`
499 | ```{r}
500 | ex() %>% check_object("speed_vector") %>% check_equal(incorrect_msg = "`speed_vector` should be a vector with 5 entries, one for each analyst's speed rating. Don't use capital letters; R is case sensitive!")
501 | 
502 | success_msg("A job well done! Continue to the next exercise.")
503 | ```
504 | 
505 | ---
506 | 
507 | ## Ordered factors (2)
508 | 
509 | ```yaml
510 | type: NormalExercise
511 | key: 279077d10248ce03d5f972939ef8576430a16683
512 | xp: 100
513 | skills:
514 |   - 1
515 | ```
516 | 
517 | `speed_vector` should be converted to an ordinal factor since its categories have a natural ordering. By default, the function `factor()` transforms `speed_vector` into an unordered factor. To create an ordered factor, you have to add two additional arguments: `ordered` and `levels`.
518 | 
519 | ```
520 | factor(some_vector,
521 |        ordered = TRUE,
522 |        levels = c("lev1", "lev2" ...))
523 | ```
524 | 
525 | By setting the argument `ordered` to `TRUE` in the function `factor()`, you indicate that the factor is ordered. With the argument `levels` you give the values of the factor in the correct order.
526 | 
527 | `@instructions`
528 | From `speed_vector`, create an ordered factor vector: `factor_speed_vector`. Set `ordered` to `TRUE`, and set `levels` to `c("slow", "medium", "fast")`.
529 | 
530 | `@hint`
531 | Use the function `factor()` to create `factor_speed_vector` based on `speed_character_vector`. The argument `ordered` should be set to `TRUE` since there is a natural ordering. Also, set `levels = c("slow", "medium", "fast")`.
532 | 
533 | `@pre_exercise_code`
534 | ```{r}
535 | # no pec
536 | ```
537 | 
538 | `@sample_code`
539 | ```{r}
540 | # Create speed_vector
541 | speed_vector <- c("medium", "slow", "slow", "medium", "fast")
542 | 
543 | # Convert speed_vector to ordered factor vector
544 | factor_speed_vector <-
545 | 
546 | # Print factor_speed_vector
547 | factor_speed_vector
548 | summary(factor_speed_vector)
549 | ```
550 | 
551 | `@solution`
552 | ```{r}
553 | # Create speed_vector
554 | speed_vector <- c("medium", "slow", "slow", "medium", "fast")
555 | 
556 | # Convert speed_vector to ordered factor vector
557 | factor_speed_vector <- factor(speed_vector, ordered = TRUE, levels = c("slow", "medium", "fast"))
558 | 
559 | # Print factor_speed_vector
560 | factor_speed_vector
561 | summary(factor_speed_vector)
562 | ```
563 | 
564 | `@sct`
565 | ```{r}
566 | msg = "Do not change anything about the command that specifies the `speed_vector` variable."
567 | 
568 | ex() %>% check_object("speed_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
569 | 
570 | ex() %>% check_function("factor") %>% {
571 | 	check_arg(., 'x') %>% check_equal(incorrect_msg="The first argument you pass to `factor()` should be `speed_vector`.")
572 | 	check_arg(., 'ordered') %>% check_equal(incorrect_msg="Make sure to set `ordered = TRUE` inside your call of `factor()`.")
573 | 	check_arg(., 'levels') %>% check_equal(incorrect_msg="Make sure to set `levels = c(\"slow\", \"medium\", \"fast\")` inside your call of `factor()`.")
574 | 	}
575 | 
576 | ex() %>% check_object("factor_speed_vector") %>% check_equal(eq_condition = "equal", incorrect_msg="There's still something wrong with `factor_speed_vector`; make sure to only pass `speed_vector`, `ordered = TRUE` and `levels = c(\"slow\", \"medium\", \"fast\")` inside your call of `factor()`.")
577 | 
578 |                       
579 |                       
580 | success_msg("Great! Have a look at the console. It is now indicated that the Levels indeed have an order associated, with the `<` sign. Continue to the next exercise.")
581 | ```
582 | 
583 | ---
584 | 
585 | ## Comparing ordered factors
586 | 
587 | ```yaml
588 | type: NormalExercise
589 | key: db16e69805625bcfde227743a8cbc985f8482a37
590 | xp: 100
591 | skills:
592 |   - 1
593 | ```
594 | 
595 | Having a bad day at work, 'data analyst number two' enters your office and starts complaining that 'data analyst number five' is slowing down the entire project. Since you know that 'data analyst number two' has the reputation of being a smarty-pants, you first decide to check if his statement is true.
596 | 
597 | The fact that `factor_speed_vector` is now ordered enables us to compare different elements (the data analysts in this case). You can simply do this by using the well-known operators.
598 | 
599 | `@instructions`
600 | - Use `[2]` to select from `factor_speed_vector` the factor value for the second data analyst. Store it as `da2`.
601 | - Use `[5]` to select the `factor_speed_vector` factor value for the fifth data analyst. Store it as `da5`.
602 | - Check if `da2` is greater than `da5`; simply print out the result. Remember that you can use the `>` operator to check whether one element is larger than the other.
603 | 
604 | `@hint`
605 | - To select the factor value for the third data analyst, you'd need `factor_speed_vector[3]`.
606 | - To compare two values, you can use `>`. For example: `da3 > da4`.
607 | 
608 | `@pre_exercise_code`
609 | ```{r}
610 | # no pec
611 | ```
612 | 
613 | `@sample_code`
614 | ```{r}
615 | # Create factor_speed_vector
616 | speed_vector <- c("medium", "slow", "slow", "medium", "fast")
617 | factor_speed_vector <- factor(speed_vector, ordered = TRUE, levels = c("slow", "medium", "fast"))
618 | 
619 | # Factor value for second data analyst
620 | da2 <-
621 | 
622 | # Factor value for fifth data analyst
623 | da5 <-
624 | 
625 | # Is data analyst 2 faster than data analyst 5?
626 | 
627 | ```
628 | 
629 | `@solution`
630 | ```{r}
631 | # Create factor_speed_vector
632 | speed_vector <- c("medium", "slow", "slow", "medium", "fast")
633 | factor_speed_vector <- factor(speed_vector, ordered = TRUE, levels = c("slow", "medium", "fast"))
634 | 
635 | # Factor value for second data analyst
636 | da2 <- factor_speed_vector[2]
637 | 
638 | # Factor value for fifth data analyst
639 | da5 <- factor_speed_vector[5]
640 | 
641 | # Is data analyst 2 faster data analyst 5?
642 | da2 > da5
643 | ```
644 | 
645 | `@sct`
646 | ```{r}
647 | msg = "Do not change anything about the commands that define `speed_vector` and `factor_speed_vector`!"
648 | ex() %>% check_object("speed_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
649 | 
650 | ex() %>% check_object("factor_speed_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
651 | 
652 | 
653 | msg <- "Have you correctly selected the factor value for the %s data analyst? You can use `factor_speed_vector[%s]`."
654 | ex() %>% check_object("da2") %>% check_equal(eq_condition = "equal", incorrect_msg = sprintf(msg,"second", "2"))
655 | 
656 | ex() %>% check_object("da5") %>% check_equal(eq_condition = "equal",incorrect_msg = sprintf("fifth", "5"))
657 | 
658 | ex() %>% check_output_expr("da2 > da5", missing_msg = "Have you correctly compared `da2` and `da5`? You can use the `>`. Simply print out the result.")
659 | 
660 | 
661 | success_msg("Bellissimo! What does the result tell you? Data analyst two is complaining about the data analyst five while in fact they are the one slowing everything down! This concludes the chapter on factors. With a solid basis in vectors, matrices and factors, you're ready to dive into the wonderful world of data frames, a very important data structure in R!")
662 | ```
663 | 


--------------------------------------------------------------------------------
/chapter3.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title_meta: Chapter 3
  3 | title: Matrices
  4 | description: >-
  5 |   In this chapter, you will learn how to work with matrices in R. By the end of
  6 |   the chapter, you will be able to create matrices and understand how to do
  7 |   basic computations with them. You will analyze the box office numbers of the
  8 |   Star Wars movies and learn how to use matrices in R. May the force be with
  9 |   you!
 10 | ---
 11 | 
 12 | ## What's a matrix?
 13 | 
 14 | ```yaml
 15 | type: NormalExercise
 16 | key: d61aeba84c
 17 | xp: 100
 18 | skills:
 19 |   - 1
 20 | ```
 21 | 
 22 | In R, a matrix is a collection of elements of the same data type (numeric, character, or logical) arranged into a fixed number of rows and columns. Since you are only working with rows and columns, a matrix is called two-dimensional. 
 23 | 
 24 | You can construct a matrix in R with the `matrix()` function. Consider the following example:
 25 | 
 26 | ```
 27 | matrix(1:9, byrow = TRUE, nrow = 3)
 28 | ```
 29 | 
 30 | In the `matrix()` function:
 31 | 
 32 | - The first argument is the collection of elements that R will arrange into the rows and columns of the matrix. Here, we use `1:9` which is a shortcut for `c(1, 2, 3, 4, 5, 6, 7, 8, 9)`.
 33 | - The argument `byrow` indicates that the matrix is filled by the rows. If we want the matrix to be filled by the columns, we just place `byrow = FALSE`. 
 34 | - The third argument `nrow` indicates that the matrix should have three rows.
 35 | 
 36 | `@instructions`
 37 | Construct a matrix with 3 rows containing the numbers 1 up to 9, filled row-wise.
 38 | 
 39 | `@hint`
 40 | Read the assignment carefully, the answer is already given!
 41 | 
 42 | `@pre_exercise_code`
 43 | ```{r}
 44 | # no pec
 45 | ```
 46 | 
 47 | `@sample_code`
 48 | ```{r}
 49 | # Construct a matrix with 3 rows that contain the numbers 1 up to 9
 50 | ```
 51 | 
 52 | `@solution`
 53 | ```{r}
 54 | # Construct a matrix with 3 rows that contain the numbers 1 up to 9
 55 | matrix(1:9, byrow = TRUE, nrow = 3)
 56 | ```
 57 | 
 58 | `@sct`
 59 | ```{r}
 60 | ex() %>% check_function("matrix") %>% {
 61 | 	check_arg(., 'data') %>% check_equal(incorrect_msg = "Have you correctly created the matrix? Have a look at the assignment, the answer is already given!")
 62 | 	check_arg(., 'byrow') %>% check_equal(incorrect_msg = "Have you correctly created the matrix? Have a look at the assignment, the answer is already given!")
 63 | 	check_arg(., 'nrow') %>% check_equal(incorrect_msg = "Have you correctly created the matrix? Have a look at the assignment, the answer is already given!")
 64 | 	}
 65 | 
 66 | ex() %>% check_output_expr("matrix(1:9, byrow=TRUE, nrow=3)",missing_msg = "There seems to be an issue with the matrix definition. Have a look at the assignment, the answer is already given!")
 67 | 
 68 | success_msg("Great! Continue to the next exercise.")
 69 | ```
 70 | 
 71 | ---
 72 | 
 73 | ## Analyze matrices, you shall
 74 | 
 75 | ```yaml
 76 | type: NormalExercise
 77 | key: effc2fb945
 78 | xp: 100
 79 | skills:
 80 |   - 1
 81 | ```
 82 | 
 83 | It is now time to get your hands dirty. In the following exercises you will analyze the box office numbers of the Star Wars franchise. May the force be with you! 
 84 | 
 85 | In the editor, three vectors are defined. Each one represents the box office numbers from the first three Star Wars movies. The first element of each vector indicates the US box office revenue, the second element refers to the Non-US box office (source: Wikipedia).
 86 | 
 87 | In this exercise, you'll combine all these figures into a single vector. Next, you'll build a matrix from this vector.
 88 | 
 89 | `@instructions`
 90 | - Use `c(new_hope, empire_strikes, return_jedi)` to combine the three vectors into one vector. Call this vector `box_office`.
 91 | - Construct a matrix with 3 rows, where each row represents a movie. Use the `matrix()` function to do this. The first argument is the vector `box_office`, containing all box office figures. Next, you'll have to specify `nrow = 3` and `byrow = TRUE`. Name the resulting matrix `star_wars_matrix`.
 92 | 
 93 | `@hint`
 94 | - `box_office <- c(new_hope, empire_strikes, return_jedi)` will combine all numbers in the different vectors into a single vector with 6 elements.
 95 | - `matrix(box_office, nrow = ..., byrow =  ...)` is a template for the solution to the second instruction.
 96 | 
 97 | `@pre_exercise_code`
 98 | ```{r}
 99 | # no pec
100 | ```
101 | 
102 | `@sample_code`
103 | ```{r}
104 | # Box office Star Wars (in millions!)
105 | new_hope <- c(460.998, 314.4)
106 | empire_strikes <- c(290.475, 247.900)
107 | return_jedi <- c(309.306, 165.8)
108 | 
109 | # Create box_office
110 | box_office <- 
111 | 
112 | # Construct star_wars_matrix
113 | star_wars_matrix <- 
114 | ```
115 | 
116 | `@solution`
117 | ```{r}
118 | # Box office Star Wars (in millions!)
119 | new_hope <- c(460.998, 314.4)
120 | empire_strikes <- c(290.475, 247.900)
121 | return_jedi <- c(309.306, 165.8)
122 | 
123 | # Create box_office
124 | box_office <- c(new_hope, empire_strikes, return_jedi)
125 | 
126 | # Construct star_wars_matrix
127 | star_wars_matrix <- matrix(box_office, nrow = 3, byrow = TRUE) 
128 | ```
129 | 
130 | `@sct`
131 | ```{r}
132 | msg <- "Do not change anything about the box office variables `new_hope`, `empire_strikes` and `return_jedi`!"
133 | 
134 | ex() %>% check_object("new_hope", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
135 | 
136 | ex() %>% check_object("empire_strikes", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
137 | 
138 | ex() %>% check_object("return_jedi", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
139 | 
140 | ex() %>% check_object("box_office") %>% check_equal(incorrect_msg = "Have you correctly combined the values of `new_hope`, `empire_strikes` and `return_jedi` into the vector `box_office`?")
141 | 
142 | ex() %>% check_function("matrix") %>% {
143 | 	check_arg(., 'data') %>% check_equal(incorrect_msg = "Make sure to correctly specify the arguments you pass to `matrix()`: `box_office`, `nrow = 3`, `by_row = TRUE`.")
144 | 	check_arg(., 'nrow') %>% check_equal(incorrect_msg = "Make sure to correctly specify the arguments you pass to `matrix()`: `box_office`, `nrow = 3`, `by_row = TRUE`.")
145 | 	check_arg(., 'byrow') %>% check_equal(incorrect_msg = "Make sure to correctly specify the arguments you pass to `matrix()`: `box_office`, `nrow = 3`, `by_row = TRUE`.")
146 | 	}
147 | 
148 | ex() %>% check_object("star_wars_matrix") %>% check_equal(incorrect_msg = "Did you assign the result of the `matrix()` call to `star_wars_matrix`?")
149 | 
150 | 
151 | success_msg("The force is actually with you! Continue to the next exercise.")
152 | ```
153 | 
154 | ---
155 | 
156 | ## Naming a matrix
157 | 
158 | ```yaml
159 | type: NormalExercise
160 | key: f734e8bf74
161 | xp: 100
162 | skills:
163 |   - 1
164 | ```
165 | 
166 | To help you remember what is stored in `star_wars_matrix`, you would like to add the names of the movies for the rows. Not only does this help you to read the data, but it is also useful to select certain elements from the matrix. 
167 | 
168 | Similar to vectors, you can add names for the rows and the columns of a matrix
169 | 
170 | ```
171 | rownames(my_matrix) <- row_names_vector
172 | colnames(my_matrix) <- col_names_vector
173 | ```
174 | 
175 | We went ahead and prepared two vectors for you: `region`, and `titles`. You will need these vectors to name the columns and rows of `star_wars_matrix`, respectively.
176 | 
177 | `@instructions`
178 | - Use `colnames()` to name the columns of `star_wars_matrix` with the `region` vector.
179 | - Use `rownames()` to name the rows of `star_wars_matrix` with the `titles` vector.
180 | - Print out `star_wars_matrix` to see the result of your work.
181 | 
182 | `@hint`
183 | You can use `colnames(star_wars_matrix) <- region` to name the columns of `star_wars_matrix`. Do a similar thing to name the rows.
184 | 
185 | `@pre_exercise_code`
186 | ```{r}
187 | # no pec
188 | ```
189 | 
190 | `@sample_code`
191 | ```{r}
192 | # Box office Star Wars (in millions!)
193 | new_hope <- c(460.998, 314.4)
194 | empire_strikes <- c(290.475, 247.900)
195 | return_jedi <- c(309.306, 165.8)
196 | 
197 | # Construct matrix
198 | star_wars_matrix <- matrix(c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = TRUE)
199 | 
200 | # Vectors region and titles, used for naming
201 | region <- c("US", "non-US")
202 | titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")
203 | 
204 | # Name the columns with region
205 | 
206 | 
207 | # Name the rows with titles
208 | 
209 | 
210 | # Print out star_wars_matrix
211 | ```
212 | 
213 | `@solution`
214 | ```{r}
215 | # Box office Star Wars (in millions!)
216 | new_hope <- c(460.998, 314.4)
217 | empire_strikes <- c(290.475, 247.900)
218 | return_jedi <- c(309.306, 165.8)
219 | 
220 | # Construct matrix
221 | star_wars_matrix <- matrix(c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = TRUE)
222 | 
223 | # Vectors region and titles, used for naming
224 | region <- c("US", "non-US")
225 | titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")
226 | 
227 | # Name the columns with region
228 | colnames(star_wars_matrix) <- region
229 | 
230 | # Name the rows with titles
231 | rownames(star_wars_matrix) <- titles
232 | 
233 | # Print out star_wars_matrix
234 | star_wars_matrix
235 | ```
236 | 
237 | `@sct`
238 | ```{r}
239 | msg <- "Do not change anything about the box office variables `new_hope`, `empire_strikes` and `return_jedi`!"
240 | 
241 | ex()  %>% check_object("new_hope", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
242 | 
243 | ex()  %>% check_object("empire_strikes", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
244 | 
245 | ex()  %>% check_object("return_jedi", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
246 | 
247 | msg <- "Don't change the contents of `star_wars_matrix`; only the names of the rows and columns!" 
248 | 
249 | ex()  %>% check_object("star_wars_matrix") %>% check_equal(incorrect_msg = msg)
250 | 
251 | msg <- "Don't change anything about the `region` and `titles` vectors that have been defined for you."
252 | 
253 | ex()  %>% check_object("region", undefined_msg = msg)  %>%  check_equal(incorrect_msg = msg)
254 | ex() %>% check_object("titles", undefined_msg = msg) %>% check_equal( incorrect_msg = msg)
255 | 
256 | ex() %>% check_object("star_wars_matrix") %>% check_equal(eq_condition = "equal",incorrect_msg = "Did you set the row and column names of `star_wars_matrix` correctly? Use `colnames(star_wars_matrix) <- region` for the column names; do a similar thing to name the rows.")
257 | 
258 | ex() %>% check_output_expr("star_wars_matrix", missing_msg = "Don't forget to print out `star_wars_matrix` after you've named the rows and columns.")
259 | 
260 | success_msg("Great! You're on the way of becoming an R jedi! Continue to the next exercise.")
261 | ```
262 | 
263 | ---
264 | 
265 | ## Calculating the worldwide box office
266 | 
267 | ```yaml
268 | type: NormalExercise
269 | key: 3fd7499a12
270 | xp: 100
271 | skills:
272 |   - 1
273 | ```
274 | 
275 | The single most important thing for a movie in order to become an instant legend in Tinseltown is its worldwide box office figures. 
276 | 
277 | To calculate the total box office revenue for the three Star Wars movies, you have to take the sum of the US revenue column and the non-US revenue column. 
278 | 
279 | In R, the function `rowSums()` conveniently calculates the totals for each row of a matrix. This function creates a new vector:
280 | 
281 | ```
282 | rowSums(my_matrix)
283 | ```
284 | 
285 | `@instructions`
286 | Calculate the worldwide box office figures for the three movies and put these in the vector named `worldwide_vector`.
287 | 
288 | `@hint`
289 | `rowSums(star_wars_matrix)` will calculate the sum of every row, so the total box office for each movie.
290 | 
291 | `@pre_exercise_code`
292 | ```{r}
293 | # no pec
294 | ```
295 | 
296 | `@sample_code`
297 | ```{r}
298 | # Construct star_wars_matrix
299 | box_office <- c(460.998, 314.4, 290.475, 247.900, 309.306, 165.8)
300 | region <- c("US", "non-US")
301 | titles <- c("A New Hope", 
302 |                  "The Empire Strikes Back", 
303 |                  "Return of the Jedi")
304 |                
305 | star_wars_matrix <- matrix(box_office, 
306 |                       nrow = 3, byrow = TRUE,
307 |                       dimnames = list(titles, region))
308 | 
309 | # Calculate worldwide box office figures
310 | worldwide_vector <- 
311 | ```
312 | 
313 | `@solution`
314 | ```{r}
315 | # Construct star_wars_matrix
316 | box_office <- c(460.998, 314.4, 290.475, 247.900, 309.306, 165.8)
317 | region <- c("US", "non-US")
318 | titles <- c("A New Hope", 
319 |             "The Empire Strikes Back", 
320 |             "Return of the Jedi")
321 |                
322 | star_wars_matrix <- matrix(box_office, 
323 |                       nrow = 3, byrow = TRUE,
324 |                       dimnames = list(titles, region))
325 | 
326 | # Calculate worldwide box office figures
327 | worldwide_vector <- rowSums(star_wars_matrix)
328 | ```
329 | 
330 | `@sct`
331 | ```{r}
332 | msg <- "Do not change anything about the preset variables `box_office_all` and `star_wars_marix`!"
333 | 
334 | ex() %>% check_object("box_office", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
335 | 
336 | ex() %>% check_object("star_wars_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
337 | 
338 | ex() %>% check_object("worldwide_vector") %>% check_equal(incorrect_msg = "Call `rowSums()` on `star_wars_matrix` and store the result in `worldwide_vector`.")
339 | 
340 | success_msg("Well done! Continue to the next exercise.")
341 | ```
342 | 
343 | ---
344 | 
345 | ## Adding a column for the Worldwide box office
346 | 
347 | ```yaml
348 | type: NormalExercise
349 | key: 86b87a8545
350 | xp: 100
351 | skills:
352 |   - 1
353 | ```
354 | 
355 | In the previous exercise you calculated the vector that contained the worldwide box office receipt for each of the three Star Wars movies. However, this vector is not yet part of `star_wars_matrix`.
356 | 
357 | You can add a column or multiple columns to a matrix with the `cbind()` function, which merges matrices and/or vectors together by column. For example: 
358 | 
359 | ```
360 | big_matrix <- cbind(matrix1, matrix2, vector1 ...)
361 | ```
362 | 
363 | `@instructions`
364 | Add `worldwide_vector` as a new column to the `star_wars_matrix` and assign the result to `all_wars_matrix`. Use the `cbind()` function.
365 | 
366 | `@hint`
367 | In this exercise, you should pass two variables to `cbind()`: `star_wars_matrix` and `worldwide_vector`, in this order. Assign the result to `all_wars_matrix`.
368 | 
369 | `@pre_exercise_code`
370 | ```{r}
371 | # no pec
372 | ```
373 | 
374 | `@sample_code`
375 | ```{r}
376 | # Construct star_wars_matrix
377 | box_office <- c(460.998, 314.4, 290.475, 247.900, 309.306, 165.8)
378 | region <- c("US", "non-US")
379 | titles <- c("A New Hope", 
380 |             "The Empire Strikes Back", 
381 |             "Return of the Jedi")
382 |                
383 | star_wars_matrix <- matrix(box_office, 
384 |                       nrow = 3, byrow = TRUE,
385 |                       dimnames = list(titles, region))
386 | 
387 | # The worldwide box office figures
388 | worldwide_vector <- rowSums(star_wars_matrix)
389 | 
390 | # Bind the new variable worldwide_vector as a column to star_wars_matrix
391 | all_wars_matrix <- 
392 | ```
393 | 
394 | `@solution`
395 | ```{r}
396 | # Construct star_wars_matrix
397 | box_office <- c(460.998, 314.4, 290.475, 247.900, 309.306, 165.8)
398 | region <- c("US", "non-US")
399 | titles <- c("A New Hope", 
400 |             "The Empire Strikes Back", 
401 |             "Return of the Jedi")
402 |                
403 | star_wars_matrix <- matrix(box_office, 
404 |                       nrow = 3, byrow = TRUE,
405 |                       dimnames = list(titles, region))
406 | 
407 | # The worldwide box office figures
408 | worldwide_vector <- rowSums(star_wars_matrix)
409 | 
410 | # Bind the new variable worldwide_vector as a column to star_wars_matrix
411 | all_wars_matrix <- cbind(star_wars_matrix, worldwide_vector)
412 | ```
413 | 
414 | `@sct`
415 | ```{r}
416 | msg <- "Do not change anything about the preset variables `box_office_all` and `star_wars_marix`!"
417 | ex() %>% check_object("box_office", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
418 | 
419 | ex() %>% check_object("star_wars_matrix", undefined_msg = msg) %>% check_equal(, incorrect_msg = msg)
420 | 
421 | ex() %>% check_object("worldwide_vector") %>% check_equal(incorrect_msg = "Store the result of `rowSums(star_wars_matrix)` in `worldwide_vector`.")
422 | 
423 | 
424 | msg <- "Have you correctly used `cbind()` to add `worldwide_vector` to `star_wars_matrix`? You should pass `star_wars_matrix` and `world_wide_vector` to `cbind()`, in this order. The resulting matrix, `all_wars_matrix`, should consist of three rows and three columns."
425 | ex() %>% check_object("all_wars_matrix") %>% check_equal(incorrect_msg = msg)
426 | 
427 | 
428 | success_msg("Nice job! After adding column to a matrix, the logical next step is adding rows. Learn how in the next exercise.");
429 | ```
430 | 
431 | ---
432 | 
433 | ## Adding a row
434 | 
435 | ```yaml
436 | type: NormalExercise
437 | key: bcadb29139
438 | xp: 100
439 | skills:
440 |   - 1
441 | ```
442 | 
443 | Just like every action has a reaction, every `cbind()` has an `rbind()`. (We admit, we are pretty bad with metaphors.) 
444 | 
445 | Your R workspace, where all variables you defined 'live' ([check out what a workspace is](http://www.statmethods.net/interface/workspace.html)), has already been initialized and contains two matrices:
446 | 
447 | - `star_wars_matrix` that we have used all along, with data on the original trilogy,
448 | - `star_wars_matrix2`, with similar data for the prequels trilogy. 
449 | 
450 | Explore these matrices in the console if you want to have a closer look. If you want to check out the contents of the workspace, you can type `ls()` in the console.
451 | 
452 | `@instructions`
453 | Use `rbind()` to paste together `star_wars_matrix` and `star_wars_matrix2`, in this order. Assign the resulting matrix to `all_wars_matrix`.
454 | 
455 | `@hint`
456 | Bind the two matrices together like this:
457 | ```
458 | rbind(matrix1, matrix2)
459 | ```
460 | Assign the result to `all_wars_matrix`.
461 | 
462 | `@pre_exercise_code`
463 | ```{r}
464 | # Construct matrix
465 | box_office_all <- c(461, 314.4, 290.5, 247.9, 309.3, 165.8)
466 | movie_names <- c("A New Hope","The Empire Strikes Back","Return of the Jedi")
467 | col_titles <- c("US","non-US")
468 | star_wars_matrix <- matrix(box_office_all, nrow = 3, byrow = TRUE, dimnames = list(movie_names, col_titles))
469 | 
470 | # Construct matrix2
471 | box_office_all2 <- c(474.5, 552.5, 310.7, 338.7, 380.3, 468.5)
472 | movie_names2 <- c("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith")
473 | star_wars_matrix2 <- matrix(box_office_all2, nrow=3, byrow = TRUE, dimnames = list(movie_names2, col_titles))
474 | 
475 | # remove all except all_wars_matrix
476 | rm(box_office_all)
477 | rm(movie_names)
478 | rm(col_titles)
479 | rm(box_office_all2)
480 | rm(movie_names2)
481 | ```
482 | 
483 | `@sample_code`
484 | ```{r}
485 | # star_wars_matrix and star_wars_matrix2 are available in your workspace
486 | star_wars_matrix  
487 | star_wars_matrix2 
488 | 
489 | # Combine both Star Wars trilogies in one matrix
490 | all_wars_matrix <- 
491 | ```
492 | 
493 | `@solution`
494 | ```{r}
495 | # star_wars_matrix and star_wars_matrix2 are available in your workspace
496 | star_wars_matrix  
497 | star_wars_matrix2 
498 | 
499 | # Combine both Star Wars trilogies in one matrix
500 | all_wars_matrix <- rbind(star_wars_matrix, star_wars_matrix2)
501 | ```
502 | 
503 | `@sct`
504 | ```{r}
505 | msg = "Do not override the variables that have been defined for you in the workspace (`star_wars_matrix` and `star_wars_matrix2`)."
506 | ex() %>% check_object("star_wars_matrix",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
507 | 
508 | ex() %>% check_object("star_wars_matrix2", undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
509 | 
510 | ex() %>% check_object("all_wars_matrix") %>% check_equal(incorrect_msg = "Did you use the `rbind()` correctly to create `all_wars_matrix()`? `rbind()` should take two arguments: `star_wars_matrix` and `star_wars_matrix2`, in this order.")
511 | 
512 | 
513 | success_msg("Wonderful! Continue with the next exercise and see how you can combine the results of the `rbind()` function with the `colSums()` function!")
514 | ```
515 | 
516 | ---
517 | 
518 | ## The total box office revenue for the entire saga
519 | 
520 | ```yaml
521 | type: NormalExercise
522 | key: 1bfe5ae096
523 | xp: 100
524 | skills:
525 |   - 1
526 | ```
527 | 
528 | Just like `cbind()` has  `rbind()`, `colSums()` has `rowSums()`. Your R workspace already contains the `all_wars_matrix` that you constructed in the previous exercise; type `all_wars_matrix` to have another look. Let's now calculate the total box office revenue for the entire saga.
529 | 
530 | `@instructions`
531 | - Calculate the total revenue for the US and the non-US region and assign `total_revenue_vector`. You can use the `colSums()`function.
532 | - Print out `total_revenue_vector` to have a look at the results.
533 | 
534 | `@hint`
535 | You should use the `colSums()` function with `star_wars_matrix` as the argument to find the total box office per region.
536 | 
537 | `@pre_exercise_code`
538 | ```{r}
539 | load(url("https://assets.datacamp.com/course/intro_to_r/all_wars_matrix.RData"))
540 | ```
541 | 
542 | `@sample_code`
543 | ```{r}
544 | # all_wars_matrix is available in your workspace
545 | all_wars_matrix
546 | 
547 | # Total revenue for US and non-US
548 | total_revenue_vector <- 
549 |   
550 | # Print out total_revenue_vector
551 | ```
552 | 
553 | `@solution`
554 | ```{r}
555 | # all_wars_matrix is available in your workspace
556 | all_wars_matrix
557 | 
558 | # Total revenue for US and non-US
559 | total_revenue_vector <- colSums(all_wars_matrix)
560 | 
561 | # Print out total_revenue_vector
562 | total_revenue_vector
563 | ```
564 | 
565 | `@sct`
566 | ```{r}
567 | msg = "Do not change the contents of `all_wars_matrix`; it was created for you in the workspace."
568 | ex() %>% check_object("all_wars_matrix", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
569 | 
570 | ex() %>% check_function("colSums") %>% check_arg('x') %>% check_equal(incorrect_msg = "Did you use the `colSums()` function on the all_wars_matrix?")
571 | 
572 | ex() %>% check_object("total_revenue_vector") %>% check_equal(incorrect_msg = "Have you correctly assigned the result of `colSums(all_wars_matrix)` to `total_revenue_vector`?")
573 | 
574 | ex() %>% check_output_expr("total_revenue_vector", missing_msg = "Don't forget to print out `total_revenue_vector`!")
575 | 
576 | success_msg("Bellissimo! Head over to the next exercise to learn matrix subsetting.")
577 | ```
578 | 
579 | ---
580 | 
581 | ## Selection of matrix elements
582 | 
583 | ```yaml
584 | type: NormalExercise
585 | key: 41d9d69713
586 | xp: 100
587 | skills:
588 |   - 1
589 | ```
590 | 
591 | Similar to vectors, you can use the square brackets `[ ]` to select one or multiple elements from a matrix. Whereas vectors have one dimension, matrices have two dimensions. You should therefore use a comma to separate the rows you want to select from the columns. For example: 
592 | 
593 | - `my_matrix[1,2]` selects the element at the first row and second column.
594 | - `my_matrix[1:3,2:4]` results in a matrix with the data on the rows 1, 2, 3 and columns 2, 3, 4.
595 | 
596 | If you want to select all elements of a row or a column, no number is needed before or after the comma, respectively:
597 | 
598 | - `my_matrix[,1]` selects all elements of the first column.
599 | - `my_matrix[1,]` selects all elements of the first row.
600 | 
601 | Back to Star Wars with this newly acquired knowledge! As in the previous exercise, `all_wars_matrix` is already available in your workspace.
602 | 
603 | `@instructions`
604 | - Select the non-US revenue for all movies (the entire second column of `all_wars_matrix`), store the result as `non_us_all`.
605 | - Use `mean()` on `non_us_all` to calculate the average non-US revenue for all movies. Simply print out the result.
606 | - This time, select the non-US revenue for the first two movies in `all_wars_matrix`. Store the result as `non_us_some`.
607 | - Use `mean()` again to print out the average of the values in `non_us_some`.
608 | 
609 | `@hint`
610 | You can select the entire second column of a matrix `my_matrix` with `my_matrix[,2]`.
611 | 
612 | `@pre_exercise_code`
613 | ```{r}
614 | load(url("https://assets.datacamp.com/course/intro_to_r/all_wars_matrix.RData"))
615 | ```
616 | 
617 | `@sample_code`
618 | ```{r}
619 | # all_wars_matrix is available in your workspace
620 | all_wars_matrix
621 | 
622 | # Select the non-US revenue for all movies
623 | non_us_all <- 
624 |   
625 | # Average non-US revenue
626 | 
627 |   
628 | # Select the non-US revenue for first two movies
629 | non_us_some <- 
630 |   
631 | # Average non-US revenue for first two movies
632 | 
633 | ```
634 | 
635 | `@solution`
636 | ```{r}
637 | # all_wars_matrix is available in your workspace
638 | all_wars_matrix
639 | 
640 | # Select the non-US revenue for all movies
641 | non_us_all <- all_wars_matrix[,2]
642 |   
643 | # Average non-US revenue
644 | mean(non_us_all)
645 |   
646 | # Select the non-US revenue for first two movies
647 | non_us_some <- all_wars_matrix[1:2,2]
648 |   
649 | # Average non-US revenue for first two movies
650 | mean(non_us_some)
651 | ```
652 | 
653 | `@sct`
654 | ```{r}
655 | msg = "Do not change the contents of `all_wars_matrix`; this matrix has already been created for you in the workspace."
656 | 
657 | ex() %>% check_object("all_wars_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
658 | 
659 | ex() %>% check_object("non_us_all") %>% check_equal(incorrect_msg = "Did you assign to `non_us_all` the entire second column of `all_wars_matrix`? You can use `[, 2]` to do this!")
660 | 
661 | ex() %>% check_output_expr("mean(non_us_all)", missing_msg = "Have you calculated the average of the values in `non_us_all` by calling `mean(non_us_all)`? Simply print out the result.")
662 | 
663 | ex() %>% check_object("non_us_some") %>% check_equal(incorrect_msg = "Did you assign to `non_us_some` the non-US revenue for the first two movies? You can use `[1:2,2]` to do this!")
664 | 
665 | ex() %>% check_output_expr("mean(non_us_some)", missing_msg = "Have you calculated the average of the values in `non_us_some` by calling `mean(non_us_some)`? Simply print out the result.")
666 | 
667 | success_msg("Nice one! Continue to the next exercise.")
668 | ```
669 | 
670 | ---
671 | 
672 | ## A little arithmetic with matrices
673 | 
674 | ```yaml
675 | type: NormalExercise
676 | key: c81c656f06
677 | xp: 100
678 | skills:
679 |   - 1
680 | ```
681 | 
682 | Similar to what you have learned with vectors, the standard operators like `+`, `-`, `/`, `*`, etc. work in an element-wise way on matrices in R. 
683 | 
684 | For example, `2 * my_matrix` multiplies each element of `my_matrix` by two.
685 | 
686 | As a newly-hired data analyst for Lucasfilm, it is your job to find out how many visitors went to each movie for each geographical area. You already have the total revenue figures in `all_wars_matrix`. Assume that the price of a ticket was 5 dollars. Simply dividing the box office numbers by this ticket price gives you the number of visitors.
687 | 
688 | `@instructions`
689 | - Divide `all_wars_matrix` by 5, giving you the number of visitors in millions. Assign the resulting matrix to `visitors`.
690 | - Print out `visitors` so you can have a look.
691 | 
692 | `@hint`
693 | The number of visitors is equal to `all_wars_matrix` divided by 5.
694 | 
695 | `@pre_exercise_code`
696 | ```{r}
697 | load(url("https://assets.datacamp.com/course/intro_to_r/all_wars_matrix.RData"))
698 | ```
699 | 
700 | `@sample_code`
701 | ```{r}
702 | # all_wars_matrix is available in your workspace
703 | all_wars_matrix
704 | 
705 | # Estimate the visitors
706 | visitors <- 
707 |   
708 | # Print the estimate to the console
709 | 
710 | ```
711 | 
712 | `@solution`
713 | ```{r}
714 | # all_wars_matrix is available in your workspace
715 | all_wars_matrix
716 | 
717 | # Estimate the visitors
718 | visitors <- all_wars_matrix / 5
719 | 
720 | # Print the estimate to the console
721 | visitors
722 | ```
723 | 
724 | `@sct`
725 | ```{r}
726 | msg = "Do not change the contents of `all_wars_matrix`; this matrix has already been created for you in the workspace."
727 | 
728 | ex() %>% check_object("all_wars_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
729 | 
730 | ex() %>% check_object("visitors") %>% check_equal(incorrect_msg = "It looks like `visitors` is not correct. Simply divide `all_wars_matrix` by 5 and store the resulting matrix as `visitors`.")
731 | 
732 | ex() %>% check_output_expr("visitors", missing_msg = "Don't forget to also print out `visitors` so you can have a look.")
733 | 
734 | 
735 | success_msg("Great! What do these results tell you? A staggering 92 million people went to see A New Hope in US theaters! Continue to the next exercise.")
736 | ```
737 | 
738 | ---
739 | 
740 | ## A little arithmetic with matrices (2)
741 | 
742 | ```yaml
743 | type: NormalExercise
744 | key: 1e0b39d6e9
745 | xp: 100
746 | skills:
747 |   - 1
748 | ```
749 | 
750 | Just like `2 * my_matrix` multiplied every element of `my_matrix` by two, `my_matrix1 * my_matrix2` creates a matrix where each element is the product of the corresponding elements in `my_matrix1` and `my_matrix2`. 
751 | 
752 | After looking at the result of the previous exercise, big boss Lucas points out that the ticket prices went up over time. He asks to redo the analysis based on the prices you can find in `ticket_prices_matrix` (source: imagination).
753 | 
754 | _Those who are familiar with matrices should note that this is not the standard matrix multiplication for which you should use `%*%` in R._
755 | 
756 | `@instructions`
757 | - Divide `all_wars_matrix` by `ticket_prices_matrix` to get the estimated number of US and non-US visitors for the six movies. Assign the result to `visitors`.
758 | - From the `visitors` matrix, select the entire first column, representing the number of visitors in the US. Store this selection as `us_visitors`.
759 | - Calculate the average number of US visitors; print out the result.
760 | 
761 | `@hint`
762 | - You can use the function `mean()` to calculate the average of the inputs to the function.
763 | - To get the number of visitors in the US, select the first column from `visitors` using `visitors[ ,1]`.
764 | 
765 | `@pre_exercise_code`
766 | ```{r}
767 | load(url("https://assets.datacamp.com/course/intro_to_r/all_wars_matrix.RData"))
768 | movie_names <- c("A New Hope","The Empire Strikes Back","Return of the Jedi", "The Phantom Menace", "Attack of the Clones", "Revenge of the Sith")
769 | col_titles <- c("US","non-US")
770 | ticket_prices_matrix <- matrix(c(5, 5, 6, 6, 7, 7, 4, 4, 4.5, 4.5, 4.9, 4.9), nrow = 6, byrow = TRUE, dimnames = list(movie_names,col_titles))
771 | ```
772 | 
773 | `@sample_code`
774 | ```{r}
775 | # all_wars_matrix and ticket_prices_matrix are available in your workspace
776 | all_wars_matrix
777 | ticket_prices_matrix
778 | 
779 | # Estimated number of visitors
780 | visitors <- 
781 | 
782 | # US visitors
783 | us_visitors <- 
784 | 
785 | # Average number of US visitors
786 | 
787 | ```
788 | 
789 | `@solution`
790 | ```{r}
791 | # all_wars_matrix and ticket_prices_matrix are available in your workspace
792 | all_wars_matrix
793 | ticket_prices_matrix
794 | 
795 | # Estimated number of visitors
796 | visitors <- all_wars_matrix / ticket_prices_matrix
797 | 
798 | # US visitors
799 | us_visitors <- visitors[ ,1]
800 | 
801 | # Average number of US visitors
802 | mean(us_visitors)
803 | ```
804 | 
805 | `@sct`
806 | ```{r}
807 | msg <- "Do not change the contents of `all_wars_matrix`; this matrix has already been created for you in the workspace."
808 | ex() %>% check_object("all_wars_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
809 | 
810 | ex() %>% check_object("ticket_prices_matrix", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
811 | 
812 | ex() %>% check_object("visitors") %>% check_equal(incorrect_msg = "Have you correctly created the `visitors` matrix? You should divide `all_wars_matrix` by `ticket_prices_matrix` to get there.")
813 | 
814 | ex() %>% check_object("us_visitors") %>% check_equal(incorrect_msg = "To create `us_visitors`, you should correctly select the entire first column from `visitors`. You can use `[,1]` for this!")
815 | 
816 | ex() %>% check_output_expr("mean(us_visitors)", missing_msg = "Once you have created `us_visitors`, you can use `mean()` to calculate the average number of visitors in the US. Make sure to print out the result.")
817 | 
818 | 
819 | success_msg("It's a fact: the R force is with you! This exercise concludes the chapter on matrices. Next stop on your journey through the R language: factors.")
820 | ```
821 | 


--------------------------------------------------------------------------------
/chapter5.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title_meta: Chapter 5
  3 | title: Data frames
  4 | description: >-
  5 |   Most datasets you will be working with will be stored as data frames. By the
  6 |   end of this chapter, you will be able to create a data frame, select
  7 |   interesting parts of a data frame, and order a data frame according to certain
  8 |   variables.
  9 | ---
 10 | 
 11 | ## What's a data frame?
 12 | 
 13 | ```yaml
 14 | type: NormalExercise
 15 | key: 7f95849020a2563168920409022ce7bed20835b5
 16 | xp: 100
 17 | skills:
 18 |   - 1
 19 | ```
 20 | 
 21 | You may remember from the chapter about matrices that all the elements that you put in a matrix should be of the same type. Back then, your dataset on Star Wars only contained numeric elements. 
 22 | 
 23 | When doing a market research survey, however, you often have questions such as:
 24 | 
 25 | - 'Are you married?' or 'yes/no' questions (`logical`)
 26 | - 'How old are you?' (`numeric`)
 27 | - 'What is your opinion on this product?' or other 'open-ended' questions (`character`)
 28 | - ...
 29 | 
 30 | The output, namely the respondents' answers to the questions formulated above, is a dataset of different data types. You will often find yourself working with datasets that contain different data types instead of only one. 
 31 | 
 32 | A data frame has the variables of a dataset as columns and the observations as rows. This will be a familiar concept for those coming from different statistical software packages such as SAS or SPSS.
 33 | 
 34 | `@instructions`
 35 | Submit the answer. The data from the built-in example data frame `mtcars` will be printed to the console.
 36 | 
 37 | `@hint`
 38 | Submit the answer and witness the magic!
 39 | 
 40 | `@pre_exercise_code`
 41 | ```{r}
 42 | # no pec
 43 | ```
 44 | 
 45 | `@sample_code`
 46 | ```{r}
 47 | # Print out built-in R data frame
 48 | mtcars 
 49 | ```
 50 | 
 51 | `@solution`
 52 | ```{r}
 53 | # Print out built-in R data frame
 54 | mtcars 
 55 | ```
 56 | 
 57 | `@sct`
 58 | ```{r}
 59 | ex() %>% check_output_expr("mtcars", missing_msg = "Do not change anything about the code, Make sure that you output `mtcars`.")
 60 | 
 61 | success_msg("Great! Continue to the next exercise.")
 62 | ```
 63 | 
 64 | ---
 65 | 
 66 | ## Quick, have a look at your dataset
 67 | 
 68 | ```yaml
 69 | type: NormalExercise
 70 | key: 3d0e64ecf5f69521ee538ecc713caa02b8b0ec46
 71 | xp: 100
 72 | skills:
 73 |   - 1
 74 | ```
 75 | 
 76 | Wow, that is a lot of cars! 
 77 | 
 78 | Working with large datasets is not uncommon in data analysis. When you work with (extremely) large datasets and data frames, your first task as a data analyst is to develop a clear understanding of its structure and main elements. Therefore, it is often useful to show only a small part of the entire dataset. 
 79 | 
 80 | So how to do this in R? Well, the function `head()` enables you to show the first observations of a data frame. Similarly, the function `tail()` prints out the last observations in your dataset.
 81 | 
 82 | Both `head()` and `tail()` print a top line called the 'header', which contains the names of the different variables in your dataset.
 83 | 
 84 | `@instructions`
 85 | Call `head()` on the `mtcars` dataset to have a look at the header and the first observations.
 86 | 
 87 | `@hint`
 88 | `head(mtcars)` will show the first observations of the `mtcars` data frame.
 89 | 
 90 | `@pre_exercise_code`
 91 | ```{r}
 92 | # no pec
 93 | ```
 94 | 
 95 | `@sample_code`
 96 | ```{r}
 97 | # Call head() on mtcars
 98 | 
 99 | ```
100 | 
101 | `@solution`
102 | ```{r}
103 | # Call head() on mtcars
104 | head(mtcars)
105 | ```
106 | 
107 | `@sct`
108 | ```{r}
109 | ex() %>% check_function("head") %>% check_arg('x') %>% check_equal(incorrect_msg = "Have you correctly passed `mtcars` to the `head()` function?")
110 | 
111 | ex() %>% check_output_expr("head(mtcars)", missing_msg = "Simply print out the result of the `head()` call, no need to assign it to a new variable.")
112 | 
113 | success_msg("Wonderful! So, what do we have in this dataset? For example, `hp` represents the car's horsepower; the Datsun has the lowest horse power of the 6 cars that are displayed. For a full overview of the variables' meaning, type `?mtcars` in the console and read the help page. Continue to the next exercise!");
114 | ```
115 | 
116 | ---
117 | 
118 | ## Have a look at the structure
119 | 
120 | ```yaml
121 | type: NormalExercise
122 | key: f4d5b1a2c4aef31645fc7e3505e699fb6e48f3e6
123 | xp: 100
124 | skills:
125 |   - 1
126 | ```
127 | 
128 | Another method that is often used to get a rapid overview of your data is the function `str()`. The function `str()` shows you the structure of your dataset. For a data frame it tells you:
129 | 
130 | - The total number of observations (e.g. 32 car types)
131 | - The total number of variables (e.g. 11 car features)
132 | - A full list of the variables names (e.g. `mpg`, `cyl` ... )
133 | - The data type of each variable (e.g. `num`)
134 | - The first observations
135 | 
136 | Applying the `str()`] function will often be the first thing that you do when receiving a new dataset or data frame. It is a great way to get more insight in your dataset before diving into the real analysis.
137 | 
138 | `@instructions`
139 | Investigate the structure of `mtcars`. Make sure that you see the same numbers, variables and data types as mentioned above.
140 | 
141 | `@hint`
142 | Use the `str()` function on `mtcars`.
143 | 
144 | `@pre_exercise_code`
145 | ```{r}
146 | # no pec
147 | ```
148 | 
149 | `@sample_code`
150 | ```{r}
151 | # Investigate the structure of mtcars
152 | 
153 | ```
154 | 
155 | `@solution`
156 | ```{r}
157 | # Investigate the structure of mtcars
158 | str(mtcars)
159 | ```
160 | 
161 | `@sct`
162 | ```{r}
163 | ex() %>% check_output_expr("str(mtcars)", missing_msg = "Have you correctly called `str()` on `mtcars`?")
164 | 
165 | success_msg("Nice work! You can find lots of information by viewing the `str()` of a dataset. Continue to the next exercise.")
166 | ```
167 | 
168 | ---
169 | 
170 | ## Creating a data frame
171 | 
172 | ```yaml
173 | type: NormalExercise
174 | key: df0b89706d90526b3c0bbe15e400b74cbd900704
175 | xp: 100
176 | skills:
177 |   - 1
178 | ```
179 | 
180 | Since using built-in datasets is not even half the fun of creating your own datasets, the rest of this chapter is based on your personally developed dataset. Put your jet pack on because it is time for some space exploration! 
181 | 
182 | As a first goal, you want to construct a data frame that describes the main characteristics of eight planets in our solar system. According to your good friend Buzz, the main features of a planet are:
183 | 
184 | - The type of planet (Terrestrial or Gas Giant).
185 | - The planet's diameter relative to the diameter of the Earth.
186 | - The planet's rotation across the sun relative to that of the Earth.
187 | - If the planet has rings or not (TRUE or FALSE).
188 | 
189 | After doing some high-quality research on [Wikipedia](https://en.wikipedia.org/wiki/Planet), you feel confident enough to create the necessary vectors: `name`, `type`, `diameter`, `rotation` and `rings`; these vectors have already been coded up in the editor. The first element in each of these vectors correspond to the first observation.
190 | 
191 | You construct a data frame with the `data.frame()` function. As arguments, you pass the vectors from before: they will become the different columns of your data frame. Because every column has the same length, the vectors you pass should also have the same length. But don't forget that it is possible (and likely) that they contain different types of data.
192 | 
193 | `@instructions`
194 | Use the function `data.frame()` to construct a data frame. Pass the vectors `name`, `type`, `diameter`, `rotation` and `rings` as arguments to `data.frame()`, in this order. Call the resulting data frame `planets_df`.
195 | 
196 | `@hint`
197 | Your `data.frame()` call starts as follows:
198 | ```
199 | data.frame(planets, type, diameter)
200 | ```
201 | Can you finish it?
202 | 
203 | `@pre_exercise_code`
204 | ```{r}
205 | # no pec
206 | ```
207 | 
208 | `@sample_code`
209 | ```{r}
210 | # Definition of vectors
211 | name <- c("Mercury", "Venus", "Earth", 
212 |           "Mars", "Jupiter", "Saturn", 
213 |           "Uranus", "Neptune")
214 | type <- c("Terrestrial planet", 
215 |           "Terrestrial planet", 
216 |           "Terrestrial planet", 
217 |           "Terrestrial planet", "Gas giant", 
218 |           "Gas giant", "Gas giant", "Gas giant")
219 | diameter <- c(0.382, 0.949, 1, 0.532, 
220 |               11.209, 9.449, 4.007, 3.883)
221 | rotation <- c(58.64, -243.02, 1, 1.03, 
222 |               0.41, 0.43, -0.72, 0.67)
223 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
224 | 
225 | # Create a data frame from the vectors
226 | planets_df <-
227 | 
228 | ```
229 | 
230 | `@solution`
231 | ```{r}
232 | # Definition of vectors
233 | name <- c("Mercury", "Venus", "Earth", 
234 |           "Mars", "Jupiter", "Saturn", 
235 |           "Uranus", "Neptune")
236 | type <- c("Terrestrial planet", 
237 |           "Terrestrial planet", 
238 |           "Terrestrial planet", 
239 |           "Terrestrial planet", "Gas giant", 
240 |           "Gas giant", "Gas giant", "Gas giant")
241 | diameter <- c(0.382, 0.949, 1, 0.532, 
242 |               11.209, 9.449, 4.007, 3.883)
243 | rotation <- c(58.64, -243.02, 1, 1.03,
244 |               0.41, 0.43, -0.72, 0.67)
245 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
246 | 
247 | # Create a data frame from the vectors
248 | planets_df <- data.frame(name, type, diameter, rotation, rings)
249 | ```
250 | 
251 | `@sct`
252 | ```{r}
253 | msg = "Do not change anything about the definition of the vectors. Only add a `data.frame()` call to create `planets_df`."
254 | ex() %>% check_object("name", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
255 | 
256 | ex() %>% check_object("type", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
257 | 
258 | ex() %>% check_object("diameter", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
259 | 
260 | ex() %>% check_object("rotation", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
261 | 
262 | ex() %>% check_object("rings", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
263 | 
264 | ex() %>% check_object("planets_df") %>% check_equal(incorrect_msg = "Have you correctly called `data.frame()` to create `planets_df`. Inside `data.frame()`, make sure to pass all vectors in the correct order: `name`, `type`, `diameter`, `rotation` and finally `rings`.")
265 | 
266 | 
267 | success_msg("Great job! The logical next step, as you know by now, is inspecting the data frame you just created. Head over to the next exercise.");
268 | ```
269 | 
270 | ---
271 | 
272 | ## Creating a data frame (2)
273 | 
274 | ```yaml
275 | type: NormalExercise
276 | key: c13ea421dd078030a225f49e53a8927ce8fefbe0
277 | xp: 100
278 | skills:
279 |   - 1
280 | ```
281 | 
282 | The `planets_df` data frame should have 8 observations and 5 variables. It has been made available in the workspace, so you can directly use it.
283 | 
284 | `@instructions`
285 | Use `str()` to investigate the structure of the new `planets_df` variable.
286 | 
287 | `@hint`
288 | `planets_df` is already available in your workspace, so `str(planets_df)` will do the trick.
289 | 
290 | `@pre_exercise_code`
291 | ```{r}
292 | # Definition of vectors
293 | name <- c("Mercury", "Venus", "Earth", 
294 |           "Mars", "Jupiter", "Saturn", 
295 |           "Uranus", "Neptune")
296 | type <- c("Terrestrial planet", 
297 |           "Terrestrial planet", 
298 |           "Terrestrial planet", 
299 |           "Terrestrial planet", "Gas giant", 
300 |           "Gas giant", "Gas giant", "Gas giant")
301 | diameter <- c(0.382, 0.949, 1, 0.532, 
302 |               11.209, 9.449, 4.007, 3.883)
303 | rotation <- c(58.64, -243.02, 1, 1.03,
304 |               0.41, 0.43, -0.72, 0.67)
305 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
306 | 
307 | # Create a data frame from the vectors
308 | planets_df <- data.frame(name, type, diameter, rotation, rings)
309 | ```
310 | 
311 | `@sample_code`
312 | ```{r}
313 | # Check the structure of planets_df
314 | ```
315 | 
316 | `@solution`
317 | ```{r}
318 | # Check the structure of planets_df
319 | str(planets_df)
320 | ```
321 | 
322 | `@sct`
323 | ```{r}
324 | msg = "Do not remove or overwrite the `planets_df` data frame that is already available in the workspace!"
325 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
326 | 
327 | ex() %>% check_output_expr("str(planets_df)", missing_msg = "Have you correctly displayed the structure of `planets_df`? Use `str()` to do this!")
328 | 
329 | success_msg("Awesome! Now that you have a clear understanding of the `planets_df` dataset, it's time to see how you can select elements from it. Learn all about in the next exercises!")
330 | ```
331 | 
332 | ---
333 | 
334 | ## Selection of data frame elements
335 | 
336 | ```yaml
337 | type: NormalExercise
338 | key: 8c664726b8a173cda730cbb20a52ac1795d9a0e9
339 | xp: 100
340 | skills:
341 |   - 1
342 | ```
343 | 
344 | Similar to vectors and matrices, you select elements from a data frame with the help of square brackets `[ ]`. By using a comma, you can indicate what to select from the rows and the columns respectively. For example:
345 | 
346 | - `my_df[1,2]` selects the value at the first row and second column in `my_df`.
347 | - `my_df[1:3,2:4]` selects rows 1, 2, 3 and columns 2, 3, 4 in `my_df`.
348 | 
349 | Sometimes you want to select all elements of a row or column. For example, `my_df[1, ]` selects all elements of the first row. Let us now apply this technique on `planets_df`!
350 | 
351 | `@instructions`
352 | - From `planets_df`, select the diameter of Mercury: this is the value at the first row and the third column. Simply print out the result.
353 | - From `planets_df`, select all data on Mars (the fourth row). Simply print out the result.
354 | 
355 | `@hint`
356 | To select the diameter for Venus (the second row), you would need: `planets_df[2,3]`. What do you need for Mercury then?
357 | 
358 | `@pre_exercise_code`
359 | ```{r}
360 | # Definition of vectors
361 | name <- c("Mercury", "Venus", "Earth", 
362 |           "Mars", "Jupiter", "Saturn", 
363 |           "Uranus", "Neptune")
364 | type <- c("Terrestrial planet", 
365 |           "Terrestrial planet", 
366 |           "Terrestrial planet", 
367 |           "Terrestrial planet", "Gas giant", 
368 |           "Gas giant", "Gas giant", "Gas giant")
369 | diameter <- c(0.382, 0.949, 1, 0.532, 
370 |               11.209, 9.449, 4.007, 3.883)
371 | rotation <- c(58.64, -243.02, 1, 1.03,
372 |               0.41, 0.43, -0.72, 0.67)
373 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
374 | 
375 | # Create a data frame from the vectors
376 | planets_df <- data.frame(name, type, diameter, rotation, rings)
377 | ```
378 | 
379 | `@sample_code`
380 | ```{r}
381 | # The planets_df data frame from the previous exercise is pre-loaded
382 | 
383 | # Print out diameter of Mercury (row 1, column 3)
384 | 
385 | 
386 | # Print out data for Mars (entire fourth row)
387 | 
388 | ```
389 | 
390 | `@solution`
391 | ```{r}
392 | # The planets_df data frame from the previous exercise is pre-loaded
393 | 
394 | # Print out diameter of Mercury (row 1, column 3)
395 | planets_df[1,3]
396 | 
397 | # Print out data for Mars (entire fourth row)
398 | planets_df[4, ]
399 | ```
400 | 
401 | `@sct`
402 | ```{r}
403 | msg = "Do not remove or overwrite the `planets_df` data frame!"
404 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
405 | 
406 | ex() %>% check_output_expr("planets_df[1,3]", missing_msg = "Have you correctly selected and printed out the diameter for Mercury? You can use `[1,3]`.")
407 | 
408 | ex() %>% check_output_expr("planets_df[4, ]", missing_msg = "Have you correctly selected and printed out all data for Mars? You can use `[4,]`.")
409 | 
410 | success_msg("Great! Apart from selecting elements from your data frame by index, you can also use the column names. To learn how, head over to the next exercise.")
411 | ```
412 | 
413 | ---
414 | 
415 | ## Selection of data frame elements (2)
416 | 
417 | ```yaml
418 | type: NormalExercise
419 | key: faf104fb0c605fd89f048648a4a588200bc89c76
420 | xp: 100
421 | skills:
422 |   - 1
423 | ```
424 | 
425 | Instead of using numerics to select elements of a data frame, you can also use the variable names to select columns of a data frame. 
426 | 
427 | Suppose you want to select the first three elements of the `type` column. One way to do this is
428 | 
429 | ```
430 | planets_df[1:3,2]
431 | ```
432 | 
433 | A possible disadvantage of this approach is that you have to know (or look up) the column number of `type`, which gets hard if you have a lot of variables. It is often easier to just make use of the variable name:
434 | 
435 | ```
436 | planets_df[1:3,"type"]
437 | ```
438 | 
439 | `@instructions`
440 | Select and print out the first 5 values in the `"diameter"` column of `planets_df`.
441 | 
442 | `@hint`
443 | You can select the first five values with `planets_df[1:5, ...]`. Can you fill in the `...` bit to only select the `"diameter"` column?
444 | 
445 | `@pre_exercise_code`
446 | ```{r}
447 | # Definition of vectors
448 | name <- c("Mercury", "Venus", "Earth", 
449 |           "Mars", "Jupiter", "Saturn", 
450 |           "Uranus", "Neptune")
451 | type <- c("Terrestrial planet", 
452 |           "Terrestrial planet", 
453 |           "Terrestrial planet", 
454 |           "Terrestrial planet", "Gas giant", 
455 |           "Gas giant", "Gas giant", "Gas giant")
456 | diameter <- c(0.382, 0.949, 1, 0.532, 
457 |               11.209, 9.449, 4.007, 3.883)
458 | rotation <- c(58.64, -243.02, 1, 1.03,
459 |               0.41, 0.43, -0.72, 0.67)
460 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
461 | 
462 | # Create a data frame from the vectors
463 | planets_df <- data.frame(name, type, diameter, rotation, rings)
464 | ```
465 | 
466 | `@sample_code`
467 | ```{r}
468 | # The planets_df data frame from the previous exercise is pre-loaded
469 | 
470 | # Select first 5 values of diameter column
471 | 
472 | ```
473 | 
474 | `@solution`
475 | ```{r}
476 | # The planets_df data frame from the previous exercise is pre-loaded
477 | 
478 | # Select first 5 values of diameter column
479 | planets_df[1:5, "diameter"]
480 | ```
481 | 
482 | `@sct`
483 | ```{r}
484 | msg = "Do not remove or overwrite the `planets_df` data frame!"
485 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
486 | 
487 | ex() %>% check_output_expr("planets_df[1:5, \"diameter\"]", missing_msg = "Have you correctly selected the first five values from the diameter column and printed them out? You can use `[1:5, \"diameter\"]` here.")
488 | 
489 | success_msg("Nice! Continue to the next exercise!")
490 | ```
491 | 
492 | ---
493 | 
494 | ## Only planets with rings
495 | 
496 | ```yaml
497 | type: NormalExercise
498 | key: e550ecb6ec45b856e6160ddfbb3d7875998e8365
499 | xp: 100
500 | skills:
501 |   - 1
502 | ```
503 | 
504 | You will often want to select an entire column, namely one specific variable from a data frame. If you want to select all elements of the variable `diameter`, for example, both of these will do the trick:
505 | 
506 | ```
507 | planets_df[,3]
508 | planets_df[,"diameter"]
509 | ```
510 | 
511 | However, there is a short-cut. If your columns have names, you can use the `$` sign:
512 | 
513 | ```
514 | planets_df$diameter
515 | ```
516 | 
517 | `@instructions`
518 | - Use the `$` sign to select the `rings` variable from `planets_df`. Store the vector that results as `rings_vector`.
519 | - Print out `rings_vector` to see if you got it right.
520 | 
521 | `@hint`
522 | `planets_df$diameter` selects the `diameter` column from `planets_df`; what do you need to select the `rings` column then?
523 | 
524 | `@pre_exercise_code`
525 | ```{r}
526 | # Definition of vectors
527 | name <- c("Mercury", "Venus", "Earth", 
528 |           "Mars", "Jupiter", "Saturn", 
529 |           "Uranus", "Neptune")
530 | type <- c("Terrestrial planet", 
531 |           "Terrestrial planet", 
532 |           "Terrestrial planet", 
533 |           "Terrestrial planet", "Gas giant", 
534 |           "Gas giant", "Gas giant", "Gas giant")
535 | diameter <- c(0.382, 0.949, 1, 0.532, 
536 |               11.209, 9.449, 4.007, 3.883)
537 | rotation <- c(58.64, -243.02, 1, 1.03,
538 |               0.41, 0.43, -0.72, 0.67)
539 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
540 | 
541 | # Create a data frame from the vectors
542 | planets_df <- data.frame(name, type, diameter, rotation, rings)
543 | ```
544 | 
545 | `@sample_code`
546 | ```{r}
547 | # planets_df is pre-loaded in your workspace
548 | 
549 | # Select the rings variable from planets_df
550 | rings_vector <- 
551 |   
552 | # Print out rings_vector
553 | ```
554 | 
555 | `@solution`
556 | ```{r}
557 | # planets_df is pre-loaded in your workspace
558 | 
559 | # Select the rings variable from planets_df
560 | rings_vector <- planets_df$rings
561 | 
562 | # Print out rings_vector
563 | rings_vector
564 | ```
565 | 
566 | `@sct`
567 | ```{r}
568 | msg = "Do not remove or overwrite the `planets_df` data frame!"
569 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
570 | 
571 | ex() %>% check_object("rings_vector") %>% check_equal(incorrect_msg = "Have you correctly selected the `rings` variable from `planets_df`? Use `$rings`. Store the result as `rings_vector`.")
572 | 
573 | ex() %>% check_output_expr("rings_vector", missing_msg = "Don't forget to print out `rings_vector` after you've created it!")
574 | 
575 | success_msg("Great! Continue to the next exercise and discover yet another way of subsetting!")
576 | ```
577 | 
578 | ---
579 | 
580 | ## Only planets with rings (2)
581 | 
582 | ```yaml
583 | type: NormalExercise
584 | key: 1581bf4667477f274188f4f637ec7fdc73659651
585 | xp: 100
586 | skills:
587 |   - 1
588 | ```
589 | 
590 | You probably remember from high school that some planets in our solar system have rings and others do not. Unfortunately you can not recall their names. Could R help you out?
591 | 
592 | If you type `rings_vector` in the console, you get:
593 | 
594 | ```
595 | [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
596 | ```
597 | 
598 | This means that the first four observations (or planets) do not have a ring (`FALSE`), but the other four do (`TRUE`). However, you do not get a nice overview of the names of these planets, their diameter, etc. Let's try to use `rings_vector` to select the data for the four planets with rings.
599 | 
600 | `@instructions`
601 | The code in the editor selects the `name` column of all planets that have rings. Adapt the code so that instead of only the `name` column, _all_ columns for planets that have rings are selected.
602 | 
603 | `@hint`
604 | Remember that to select _all_ columns, you simply have to leave the columns part inside the `[ ]` empty! This means you'll need `[rings_vector, ]`.
605 | 
606 | `@pre_exercise_code`
607 | ```{r}
608 | # Definition of vectors
609 | name <- c("Mercury", "Venus", "Earth", 
610 |           "Mars", "Jupiter", "Saturn", 
611 |           "Uranus", "Neptune")
612 | type <- c("Terrestrial planet", 
613 |           "Terrestrial planet", 
614 |           "Terrestrial planet", 
615 |           "Terrestrial planet", "Gas giant", 
616 |           "Gas giant", "Gas giant", "Gas giant")
617 | diameter <- c(0.382, 0.949, 1, 0.532, 
618 |               11.209, 9.449, 4.007, 3.883)
619 | rotation <- c(58.64, -243.02, 1, 1.03,
620 |               0.41, 0.43, -0.72, 0.67)
621 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
622 | 
623 | # Create a data frame from the vectors
624 | planets_df <- data.frame(name, type, diameter, rotation, rings)
625 | 
626 | rings_vector <- planets_df$rings
627 | ```
628 | 
629 | `@sample_code`
630 | ```{r}
631 | # planets_df and rings_vector are pre-loaded in your workspace
632 | 
633 | # Adapt the code to select all columns for planets with rings
634 | planets_df[rings_vector, "name"]
635 | ```
636 | 
637 | `@solution`
638 | ```{r}
639 | # planets_df and rings_vector are pre-loaded in your workspace
640 | 
641 | # Adapt the code to select all columns for planets with rings
642 | planets_df[rings_vector, ]
643 | ```
644 | 
645 | `@sct`
646 | ```{r}
647 | msg <- "Do not remove or overwrite `planets_df` or `rings_vector`!"
648 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
649 | 
650 | ex() %>% check_object("rings_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
651 | 
652 | ex() %>% check_output_expr('planets_df[rings_vector, ]', missing_msg = "Have you correctly adapted the code to select _all_ columns for the planets that have rings? You can use `planets_df[rings_vector, ]`. Make sure to include the comma here, it's crucial!")
653 | 
654 | success_msg("Wonderful! This is a rather tedious solution. The next exercise will teach you how to do it in a more concise way.")
655 | ```
656 | 
657 | ---
658 | 
659 | ## Only planets with rings but shorter
660 | 
661 | ```yaml
662 | type: NormalExercise
663 | key: a4a8b72a74097196eb2f8a28b056987aae834565
664 | xp: 100
665 | skills:
666 |   - 1
667 | ```
668 | 
669 | So what exactly did you learn in the previous exercises? You selected a subset from a data frame (`planets_df`) based on whether or not a certain condition was true (rings or no rings), and you managed to pull out all relevant data. Pretty awesome! By now, NASA is probably already flirting with your CV ;-). 
670 | 
671 | Now, let us move up one level and use the function `subset()`. You should see the `subset()` function as a short-cut to do exactly the same as what you did in the previous exercises. 
672 | 
673 | ```
674 | subset(my_df, subset = some_condition)
675 | ``` 
676 | 
677 | The first argument of `subset()` specifies the dataset for which you want a subset. By adding the second argument, you give R the necessary information and conditions to select the correct subset. 
678 | 
679 | The code below will give the exact same result as you got in the previous exercise, but this time, you didn't need the `rings_vector`!
680 | 
681 | ```
682 | subset(planets_df, subset = rings)
683 | ```
684 | 
685 | `@instructions`
686 | Use `subset()` on `planets_df` to select planets that have a diameter smaller than Earth. Because the `diameter` variable is a relative measure of the planet's diameter w.r.t that of planet Earth, your condition is `diameter < 1`.
687 | 
688 | `@hint`
689 | `subset(planets_df, subset = ...)` almost solves it; can you fill in the `...`?
690 | 
691 | `@pre_exercise_code`
692 | ```{r}
693 | # Definition of vectors
694 | name <- c("Mercury", "Venus", "Earth", 
695 |           "Mars", "Jupiter", "Saturn", 
696 |           "Uranus", "Neptune")
697 | type <- c("Terrestrial planet", 
698 |           "Terrestrial planet", 
699 |           "Terrestrial planet", 
700 |           "Terrestrial planet", "Gas giant", 
701 |           "Gas giant", "Gas giant", "Gas giant")
702 | diameter <- c(0.382, 0.949, 1, 0.532, 
703 |               11.209, 9.449, 4.007, 3.883)
704 | rotation <- c(58.64, -243.02, 1, 1.03,
705 |               0.41, 0.43, -0.72, 0.67)
706 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
707 | 
708 | # Create a data frame from the vectors
709 | planets_df <- data.frame(name, type, diameter, rotation, rings)
710 | ```
711 | 
712 | `@sample_code`
713 | ```{r}
714 | # planets_df is pre-loaded in your workspace
715 | 
716 | # Select planets with diameter < 1
717 | 
718 | ```
719 | 
720 | `@solution`
721 | ```{r}
722 | # planets_df is pre-loaded in your workspace
723 | 
724 | # Select planets with diameter < 1
725 | subset(planets_df, subset = diameter < 1)
726 | ```
727 | 
728 | `@sct`
729 | ```{r}
730 | msg = "Do not remove or overwrite the `planets_df` data frame!"
731 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
732 | 
733 | ex()  %>% check_correct({
734 |   ex() %>% check_output_expr("subset(planets_df, subset = diameter < 1)", missing_msg = "Have you correctly specified the `subset = ...` part inside `subset()`. The condition in this case is `diameter < 1`. Simply print out the result.")  
735 | 
736 | }, {
737 |   ex() %>% check_function("subset") %>% check_arg('x') %>% check_equal(incorrect_msg = "The first argument you pass to `subset()` should be `planets_df`.")
738 | 
739 | })
740 | success_msg("Great! Not only is the `subset()` function more concise, it is probably also more understandable for people who read your code. Continue to the next exercise.");
741 | ```
742 | 
743 | ---
744 | 
745 | ## Sorting
746 | 
747 | ```yaml
748 | type: NormalExercise
749 | key: 6a6fe74d3917c37380f7ac616ce084aa7814fb8a
750 | xp: 100
751 | skills:
752 |   - 1
753 | ```
754 | 
755 | Making and creating rankings is one of mankind's favorite affairs. These rankings can be useful (best universities in the world), entertaining (most influential movie stars) or pointless (best 007 look-a-like).
756 | 
757 | In data analysis you can sort your data according to a certain variable in the dataset. In R, this is done with the help of the function `order()`. 
758 | 
759 | `order()` is a function that gives you the ranked position of each element when it is applied on a variable, such as a vector for example:
760 | 
761 | ```
762 | a <- c(100, 10, 1000)
763 | order(a)
764 | [1] 2 1 3
765 | ```
766 | 
767 | 10, which is the second element in `a`, is the smallest element, so 2 comes first in the output of `order(a)`. 100, which is the first element in `a` is the second smallest element, so 1 comes second in the output of `order(a)`.
768 | 
769 | This means we can use the output of `order(a)` to reshuffle `a`:
770 |     
771 | ```
772 | a[order(a)]
773 | [1]   10  100 1000
774 | ```
775 | 
776 | `@instructions`
777 | Experiment with the `order()` function in the console. Submit the answer when you are ready to continue.
778 | 
779 | `@hint`
780 | Just play with the `order()` function in the console!
781 | 
782 | `@pre_exercise_code`
783 | ```{r}
784 | # no pec
785 | ```
786 | 
787 | `@sample_code`
788 | ```{r}
789 | # Play around with the order function in the console
790 | ```
791 | 
792 | `@solution`
793 | ```{r}
794 | # Play around with the order function in the console
795 | ```
796 | 
797 | `@sct`
798 | ```{r}
799 | success_msg("Great! Now let's use the `order()` function to sort your data frame!")
800 | ```
801 | 
802 | ---
803 | 
804 | ## Sorting your data frame
805 | 
806 | ```yaml
807 | type: NormalExercise
808 | key: fa88b58bf2cf62e0c181dfdcbdd2e1ddeac66807
809 | xp: 100
810 | skills:
811 |   - 1
812 | ```
813 | 
814 | Alright, now that you understand the `order()` function, let us do something useful with it. You would like to rearrange your data frame such that it starts with the smallest planet and ends with the largest one. A sort on the `diameter` column.
815 | 
816 | `@instructions`
817 | - Call `order()` on `planets_df$diameter` (the `diameter` column of `planets_df`). Store the result as `positions`.
818 | - Now reshuffle `planets_df` with the `positions` vector as row indexes inside square brackets. Keep all columns. Simply print out the result.
819 | 
820 | `@hint`
821 | - Use `order(planets_df$diameter)` to create `positions`.
822 | - Now, you can use `positions` inside square brackets: `planets_df[...]`; can you fill in the `...`?
823 | 
824 | `@pre_exercise_code`
825 | ```{r}
826 | # Definition of vectors
827 | name <- c("Mercury", "Venus", "Earth", 
828 |           "Mars", "Jupiter", "Saturn", 
829 |           "Uranus", "Neptune")
830 | type <- c("Terrestrial planet", 
831 |           "Terrestrial planet", 
832 |           "Terrestrial planet", 
833 |           "Terrestrial planet", "Gas giant", 
834 |           "Gas giant", "Gas giant", "Gas giant")
835 | diameter <- c(0.382, 0.949, 1, 0.532, 
836 |               11.209, 9.449, 4.007, 3.883)
837 | rotation <- c(58.64, -243.02, 1, 1.03,
838 |               0.41, 0.43, -0.72, 0.67)
839 | rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
840 | 
841 | # Create a data frame from the vectors
842 | planets_df <- data.frame(name, type, diameter, rotation, rings)
843 | ```
844 | 
845 | `@sample_code`
846 | ```{r}
847 | # planets_df is pre-loaded in your workspace
848 | 
849 | # Use order() to create positions
850 | positions <-  
851 | 
852 | # Use positions to sort planets_df
853 | 
854 | ```
855 | 
856 | `@solution`
857 | ```{r}
858 | # planets_df is pre-loaded in your workspace
859 | 
860 | # Use order() to create positions
861 | positions <- order(planets_df$diameter)
862 | 
863 | # Use positions to sort planets_df
864 | planets_df[positions, ]
865 | ```
866 | 
867 | `@sct`
868 | ```{r}
869 | msg = "Do not remove or overwrite the `planets_df` data frame!"
870 | ex() %>% check_object("planets_df", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
871 | 
872 | ex() %>% check_object("positions") %>% check_equal(incorrect_msg = "Have you correctly calculated the `positions` variable? You can use `order(planets_df$diameter)`.")
873 | 
874 | ex() %>% check_output_expr("planets_df[positions,]",missing_msg = "Use `planets_df[positions, ]` to sort `planets_df`; the comma inside the square brackets is crucial!")
875 | 
876 | success_msg("Wonderful! This exercise concludes the chapter on data frames. Remember that data frames are extremely important in R, you will need them all the time. Another very often used data structure is the list. This will be the subject of the next chapter!")
877 | ```
878 | 


--------------------------------------------------------------------------------
/chapter2.Rmd:
--------------------------------------------------------------------------------
   1 | ---
   2 | title_meta: Chapter 2
   3 | title: Vectors
   4 | description: >-
   5 |   We take you on a trip to Vegas, where you will learn how to analyze your
   6 |   gambling results using vectors in R. After completing this chapter, you will
   7 |   be able to create vectors in R, name them, select elements from them, and
   8 |   compare different vectors.
   9 | ---
  10 | 
  11 | ## Create a vector
  12 | 
  13 | ```yaml
  14 | type: NormalExercise
  15 | key: d9b453dbdd
  16 | xp: 100
  17 | skills:
  18 |   - 1
  19 | ```
  20 | 
  21 | Feeling lucky? You better, because this chapter takes you on a trip to the City of Sins, also known as *Statisticians Paradise*!
  22 | 
  23 | Thanks to R and your new data-analytical skills, you will learn how to uplift your performance at the tables and fire off your career as a professional gambler. This chapter will show how you can easily keep track of your betting progress and how you can do some simple analyses on past actions. Next stop, Vegas Baby... VEGAS!!
  24 | 
  25 | `@instructions`
  26 | - Do you still remember what you have learned in the first chapter? Assign the value `"Go!"` to the variable `vegas`. Remember: R is case sensitive!
  27 | 
  28 | `@hint`
  29 | Just type the following line in the editor:
  30 | ```
  31 | vegas <- "Go!"
  32 | ```
  33 | 
  34 | `@pre_exercise_code`
  35 | ```{r}
  36 | # no pec
  37 | ```
  38 | 
  39 | `@sample_code`
  40 | ```{r}
  41 | # Define the variable vegas
  42 | vegas <- 
  43 | ```
  44 | 
  45 | `@solution`
  46 | ```{r}
  47 | # Define the variable vegas
  48 | vegas <- "Go!"
  49 | ```
  50 | 
  51 | `@sct`
  52 | ```{r}
  53 | ex() %>% check_object("vegas") %>% check_equal(incorrect_msg = "Make sure that you assign the correct value to `vegas`. Do not forget that R is case sensitive!")
  54 | 
  55 | success_msg("Great! Head over to the next exercise.")
  56 | ```
  57 | 
  58 | ---
  59 | 
  60 | ## Create a vector (2)
  61 | 
  62 | ```yaml
  63 | type: NormalExercise
  64 | key: fd427db76f
  65 | xp: 100
  66 | skills:
  67 |   - 1
  68 | ```
  69 | 
  70 | Let us focus first! 
  71 | 
  72 | On your way from rags to riches, you will make extensive use of vectors. Vectors are one-dimension arrays that can hold numeric data, character data, or logical data. In other words, a vector is a simple tool to store data. For example, you can store your daily gains and losses in the casinos. 
  73 | 
  74 | In R, you create a vector with the combine function `c()`. You place the vector elements separated by a comma between the parentheses. For example:
  75 | 
  76 | ```
  77 | numeric_vector <- c(1, 2, 3)
  78 | character_vector <- c("a", "b", "c")
  79 | ```
  80 | 
  81 | Once you have created these vectors in R, you can use them to do calculations.
  82 | 
  83 | `@instructions`
  84 | Complete the code such that `boolean_vector` contains the three elements: `TRUE`, `FALSE` and `TRUE` (in that order).
  85 | 
  86 | `@hint`
  87 | Assign `c(TRUE, FALSE, TRUE)` to the variable `boolean_vector` with the `<-` operator.
  88 | 
  89 | `@pre_exercise_code`
  90 | ```{r}
  91 | # no pec
  92 | ```
  93 | 
  94 | `@sample_code`
  95 | ```{r}
  96 | numeric_vector <- c(1, 10, 49)
  97 | character_vector <- c("a", "b", "c")
  98 | 
  99 | # Complete the code for boolean_vector
 100 | boolean_vector <-
 101 | ```
 102 | 
 103 | `@solution`
 104 | ```{r}
 105 | numeric_vector <- c(1, 10, 49)
 106 | character_vector <- c("a", "b", "c")
 107 | 
 108 | # Complete the code for boolean_vector
 109 | boolean_vector <- c(TRUE, FALSE, TRUE)
 110 | ```
 111 | 
 112 | `@sct`
 113 | ```{r}
 114 | msg <- "Do not change the code that defined `numeric_vector` and `character_vector`!"
 115 | ex() %>% check_object("numeric_vector", undefined_msg = msg) %>% check_equal(, incorrect_msg = msg)
 116 | 
 117 | ex() %>% check_object("character_vector", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
 118 | 
 119 | ex() %>% check_object("boolean_vector") %>% check_equal(incorrect_msg = "Make sure that you assign the correct values to `boolean_vector`. Use `c(TRUE, FALSE, TRUE)`. Don't place quotes around `TRUE` and `FALSE`! Also, make sure to adopt the same order as listed in the instructions.")
 120 | 
 121 | 
 122 | success_msg("Perfect! Notice that adding a space behind the commas in the `c()` function improves the readability of your code. Let's practice some more with vector creation in the next exercise.")
 123 | ```
 124 | 
 125 | ---
 126 | 
 127 | ## Create a vector (3)
 128 | 
 129 | ```yaml
 130 | type: NormalExercise
 131 | key: 9f41229dbc
 132 | xp: 100
 133 | skills:
 134 |   - 1
 135 | ```
 136 | 
 137 | After one week in Las Vegas and still zero Ferraris in your garage, you decide that it is time to start using your data analytical superpowers.
 138 | 
 139 | Before doing a first analysis, you decide to first collect all the winnings and losses for the last week: 
 140 | 
 141 | For `poker_vector`: 
 142 | 
 143 | - On Monday you won $140
 144 | - Tuesday you lost $50
 145 | - Wednesday you won $20 
 146 | - Thursday you lost $120
 147 | - Friday you won $240
 148 | 
 149 | For `roulette_vector`: 
 150 | 
 151 | - On Monday you lost $24
 152 | - Tuesday you lost $50
 153 | - Wednesday you won $100
 154 | - Thursday you lost $350
 155 | - Friday you won $10
 156 | 
 157 | You only played poker and roulette, since there was a delegation of mediums that occupied the craps tables. To be able to use this data in R, you decide to create the variables `poker_vector` and `roulette_vector`.
 158 | 
 159 | `@instructions`
 160 | Assign the winnings/losses for roulette to the variable `roulette_vector`. You lost $24, then lost $50, won $100, lost $350, and won $10.
 161 | 
 162 | `@hint`
 163 | To help you with this step, the editor already contains the code for creating `poker_vector`. Assign the correct values to `roulette_vector` based on the numbers in the assignment. Do not forget that losses are negative numbers.
 164 | 
 165 | `@pre_exercise_code`
 166 | ```{r}
 167 | 
 168 | ```
 169 | 
 170 | `@sample_code`
 171 | ```{r}
 172 | # Poker winnings from Monday to Friday
 173 | poker_vector <- c(140, -50, 20, -120, 240)
 174 | 
 175 | # Roulette winnings from Monday to Friday
 176 | roulette_vector <-  
 177 | ```
 178 | 
 179 | `@solution`
 180 | ```{r}
 181 | # Poker winnings from Monday to Friday
 182 | poker_vector <- c(140, -50, 20, -120, 240)
 183 | 
 184 | # Roulette winnings from Monday to Friday
 185 | roulette_vector <- c(-24, -50, 100, -350, 10)
 186 | ```
 187 | 
 188 | `@sct`
 189 | ```{r}
 190 | ex() %>% check_object("poker_vector") %>% check_equal(incorrect_msg = "Make sure that you assign the correct values to `poker_vector`.")
 191 | 
 192 | ex() %>% check_object("roulette_vector") %>% check_equal(incorrect_msg = "Make sure that you assign the correct values to `roulette_vector`. Make sure to adopt the correct order!")
 193 | 
 194 | success_msg("Very good! To check out the contents of your vectors, remember that you can always simply type the variable in the console and hit Enter. Proceed to the next exercise!")
 195 | ```
 196 | 
 197 | ---
 198 | 
 199 | ## Naming a vector
 200 | 
 201 | ```yaml
 202 | type: NormalExercise
 203 | key: 3b0b80b192
 204 | xp: 100
 205 | skills:
 206 |   - 1
 207 | ```
 208 | 
 209 | As a data analyst, it is important to have a clear view on the data that you are using. Understanding what each element refers to is therefore essential. 
 210 | 
 211 | In the previous exercise, we created a vector with your winnings over the week. Each vector element refers to a day of the week but it is hard to tell which element belongs to which day. It would be nice if you could show that in the vector itself. 
 212 | 
 213 | You can give a name to the elements of a vector with the `names()` function. Have a look at this example:
 214 | 
 215 | ```
 216 | some_vector <- c("John Doe", "poker player")
 217 | names(some_vector) <- c("Name", "Profession")
 218 | ```
 219 | 
 220 | This code first creates a vector `some_vector` and then gives the two elements a name. The first element is assigned the name `Name`, while the second element is labeled `Profession`. Printing the contents to the console yields following output:
 221 | 
 222 | ```
 223 |           Name     Profession 
 224 |     "John Doe" "poker player" 
 225 | ```
 226 | 
 227 | `@instructions`
 228 | - The code in the editor names the elements in `poker_vector` with the days of the week. Add code to do the same thing for `roulette_vector`.
 229 | 
 230 | `@hint`
 231 | You can use `names(roulette_vector)` to set the names of the variable `roulette_vector`. Make sure to use the same vector with the days of the week as names. Remember that R is case sensitive!
 232 | 
 233 | `@pre_exercise_code`
 234 | ```{r}
 235 | 
 236 | ```
 237 | 
 238 | `@sample_code`
 239 | ```{r}
 240 | # Poker winnings from Monday to Friday
 241 | poker_vector <- c(140, -50, 20, -120, 240)
 242 | 
 243 | # Roulette winnings from Monday to Friday
 244 | roulette_vector <- c(-24, -50, 100, -350, 10)
 245 | 
 246 | # Assign days as names of poker_vector
 247 | names(poker_vector) <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 248 | 
 249 | # Assign days as names of roulette_vector
 250 | 
 251 | ```
 252 | 
 253 | `@solution`
 254 | ```{r}
 255 | # Poker winnings from Monday to Friday
 256 | poker_vector <- c(140, -50, 20, -120, 240)
 257 | 
 258 | # Roulette winnings from Monday to Friday
 259 | roulette_vector <- c(-24, -50, 100, -350, 10)
 260 | 
 261 | # Assign days as names of poker_vector
 262 | names(poker_vector) <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 263 | 
 264 | # Assign days as names of roulette_vector
 265 | names(roulette_vector) <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 266 | ```
 267 | 
 268 | `@sct`
 269 | ```{r}
 270 | ex() %>% check_object("poker_vector") %>% check_equal(incorrect_msg = "Do not change the values inside `poker_vector`; they were already coded for you.")
 271 | 
 272 | ex() %>% check_object("roulette_vector") %>% check_equal(incorrect_msg = "Do not change the values inside `roulette_vector`; they were already coded for you.")
 273 | 
 274 | ex() %>% check_object("poker_vector") %>% check_equal(eq_condition = 'equal', incorrect_msg = "Do not change the code that names the elements in `poker_vector`; focus on `roulette_vector`!")
 275 | 
 276 | ex() %>% check_object("roulette_vector") %>% check_equal(eq_condition = 'equal',incorrect_msg = "Make sure that you assign the correct names vector to `roulette_vector`. Use the exact same vector as the one that was used to name `poker_vector`.")
 277 | 
 278 | 
 279 | success_msg("Well done! Continue to the next exercise.")
 280 | ```
 281 | 
 282 | ---
 283 | 
 284 | ## Naming a vector (2)
 285 | 
 286 | ```yaml
 287 | type: NormalExercise
 288 | key: 6858c65a4a
 289 | xp: 100
 290 | skills:
 291 |   - 1
 292 | ```
 293 | 
 294 | If you want to become a good statistician, you have to become efficient.
 295 | 
 296 | In the previous exercises you probably experienced that it is boring and frustrating to type and retype information such as the days of the week. However, when you look at it from a higher perspective, there is a more efficient way to do this, namely, to assign the days of the week vector to a **variable**! 
 297 | 
 298 | Just like you did with your poker and roulette returns, you can also create a variable that contains the days of the week. This way you can use and re-use it.
 299 | 
 300 | `@instructions`
 301 | - A variable `days_vector` that contains the days of the week has already been created for you.
 302 | - Use `days_vector` to set the names of `poker_vector` and `roulette_vector`.
 303 | 
 304 | `@hint`
 305 | You can use `names(poker_vector) <- days_vector` to set the names of the elements `poker_vector`. Do a similar thing for `roulette_vector`.
 306 | 
 307 | `@pre_exercise_code`
 308 | ```{r}
 309 | # no pec
 310 | ```
 311 | 
 312 | `@sample_code`
 313 | ```{r}
 314 | # Poker winnings from Monday to Friday
 315 | poker_vector <- c(140, -50, 20, -120, 240)
 316 | 
 317 | # Roulette winnings from Monday to Friday
 318 | roulette_vector <- c(-24, -50, 100, -350, 10)
 319 | 
 320 | # The variable days_vector
 321 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 322 |  
 323 | # Assign the names of the day to roulette_vector and poker_vector
 324 | names(poker_vector) <-   
 325 | names(roulette_vector) <-
 326 | ```
 327 | 
 328 | `@solution`
 329 | ```{r}
 330 | # Poker winnings from Monday to Friday
 331 | poker_vector <- c(140, -50, 20, -120, 240)
 332 | 
 333 | # Roulette winnings from Monday to Friday
 334 | roulette_vector <- c(-24, -50, 100, -350, 10)
 335 | 
 336 | # The variable days_vector
 337 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 338 | 
 339 | # Assign the names of the day to roulette_vector and poker_vector
 340 | names(poker_vector) <- days_vector
 341 | names(roulette_vector) <- days_vector
 342 | ```
 343 | 
 344 | `@sct`
 345 | ```{r}
 346 | msg <- "Do not changes the predefined variables `poker_vector`, `roulette_vector` or `days_vector`."
 347 | 
 348 | ex() %>% check_object("poker_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 349 | 
 350 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 351 | 
 352 | ex() %>% check_object("days_vector", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
 353 | 
 354 | ex() %>% check_object("poker_vector") %>% check_equal(incorrect_msg = "Make sure that you assign `days_vector` to the names of `poker_vector`.", eq_condition = "equal",)
 355 | 
 356 | ex() %>% check_object("roulette_vector") %>% check_equal(eq_condition = "equal", incorrect_msg = "Make sure that you assign `days_vector` to the names of `roulette_vector`.")
 357 | 
 358 | 
 359 | success_msg("Nice one! A word of advice: try to avoid code duplication at all times. Continue to the next exercise and learn how to do arithmetic with vectors!")
 360 | ```
 361 | 
 362 | ---
 363 | 
 364 | ## Calculating total winnings
 365 | 
 366 | ```yaml
 367 | type: NormalExercise
 368 | key: da995f099f
 369 | xp: 100
 370 | skills:
 371 |   - 1
 372 | ```
 373 | 
 374 | Now that you have the poker and roulette winnings nicely as named vectors, you can start doing some data analytical magic. 
 375 | 
 376 | You want to find out the following type of information:
 377 | 
 378 | - How much has been your overall profit or loss per day of the week?
 379 | - Have you lost money over the week in total?
 380 | - Are you winning/losing money on poker or on roulette?
 381 | 
 382 | To get the answers, you have to do arithmetic calculations on vectors. 
 383 | 
 384 | It is important to know that if you sum two vectors in R, it takes the element-wise sum. For example, the following three statements are completely equivalent:
 385 | 
 386 | ```
 387 | c(1, 2, 3) + c(4, 5, 6)
 388 | c(1 + 4, 2 + 5, 3 + 6)
 389 | c(5, 7, 9)
 390 | ```
 391 | 
 392 | You can also do the calculations with variables that represent vectors:
 393 | 
 394 | ```
 395 | a <- c(1, 2, 3) 
 396 | b <- c(4, 5, 6)
 397 | c <- a + b
 398 | ```
 399 | 
 400 | `@instructions`
 401 | - Take the sum of the variables `A_vector` and `B_vector` and assign it to `total_vector`.
 402 | - Inspect the result by printing out `total_vector`.
 403 | 
 404 | `@hint`
 405 | Use the `+` operator to sum `A_vector` and `B_vector`. Use `<-` to assign the result to `total_vector`.
 406 | 
 407 | `@pre_exercise_code`
 408 | ```{r}
 409 | # no pec
 410 | ```
 411 | 
 412 | `@sample_code`
 413 | ```{r}
 414 | A_vector <- c(1, 2, 3)
 415 | B_vector <- c(4, 5, 6)
 416 | 
 417 | # Take the sum of A_vector and B_vector
 418 | total_vector <- 
 419 |   
 420 | # Print out total_vector
 421 | 
 422 | ```
 423 | 
 424 | `@solution`
 425 | ```{r}
 426 | A_vector <- c(1, 2, 3)
 427 | B_vector <- c(4, 5, 6)
 428 | 
 429 | # Take the sum of A_vector and B_vector
 430 | total_vector <- A_vector + B_vector
 431 | 
 432 | # Print out total_vector
 433 | total_vector
 434 | ```
 435 | 
 436 | `@sct`
 437 | ```{r}
 438 | msg <- "Do not change the contents of `A_vector` or `B_vector`!"
 439 | ex() %>% check_object("A_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 440 | 
 441 | ex() %>% check_object("B_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 442 | 
 443 | ex() %>% check_object("total_vector") %>% check_equal(incorrect_msg = "Make sure that `total_vector` contains the sum of `A_vector` and `B_vector`.")
 444 | 
 445 | ex() %>% check_output_expr("total_vector", missing_msg = "Don't forget to print out `total_vector`! Simply write `total_vector` on a new line.")
 446 | 
 447 | success_msg("Good job! Continue to the next exercise.")
 448 | ```
 449 | 
 450 | ---
 451 | 
 452 | ## Calculating total winnings (2)
 453 | 
 454 | ```yaml
 455 | type: NormalExercise
 456 | key: 2969d8ed65
 457 | xp: 100
 458 | skills:
 459 |   - 1
 460 | ```
 461 | 
 462 | Now you understand how R does arithmetic with vectors, it is time to get those Ferraris in your garage! First, you need to understand what the overall profit or loss per day of the week was. The total daily profit is the sum of the profit/loss you realized on poker per day, and the profit/loss you realized on roulette per day. 
 463 | 
 464 | In R, this is just the sum of `roulette_vector` and `poker_vector`.
 465 | 
 466 | `@instructions`
 467 | Assign to the variable `total_daily` how much you won or lost on each day in total (poker and roulette combined).
 468 | 
 469 | `@hint`
 470 | Similar to the previous exercise, assign the sum of two vectors to a new variable, `total_daily`.
 471 | 
 472 | `@pre_exercise_code`
 473 | ```{r}
 474 | # no pec
 475 | ```
 476 | 
 477 | `@sample_code`
 478 | ```{r}
 479 | # Poker and roulette winnings from Monday to Friday:
 480 | poker_vector <- c(140, -50, 20, -120, 240)
 481 | roulette_vector <- c(-24, -50, 100, -350, 10)
 482 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 483 | names(poker_vector) <- days_vector
 484 | names(roulette_vector) <- days_vector
 485 | 
 486 | # Assign to total_daily how much you won/lost on each day
 487 | total_daily <- 
 488 | ```
 489 | 
 490 | `@solution`
 491 | ```{r}
 492 | # Poker and roulette winnings from Monday to Friday:
 493 | poker_vector <- c(140, -50, 20, -120, 240)
 494 | roulette_vector <- c(-24, -50, 100, -350, 10)
 495 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 496 | names(poker_vector) <- days_vector
 497 | names(roulette_vector) <- days_vector
 498 | 
 499 | # Assign to total_daily how much you won/lost on each day
 500 | total_daily <- poker_vector + roulette_vector
 501 | ```
 502 | 
 503 | `@sct`
 504 | ```{r}
 505 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 506 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal( incorrect_msg = msg)
 507 | 
 508 | ex() %>% check_object("poker_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
 509 | 
 510 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
 511 | 
 512 | ex() %>% check_object("total_daily") %>% check_equal(incorrect_msg = "Make sure that you assign the sum of `poker_vector` and `roulette_vector` to `total_daily`.")
 513 | 
 514 | 
 515 | success_msg("Great! Continue to the next exercise.")
 516 | ```
 517 | 
 518 | ---
 519 | 
 520 | ## Calculating total winnings (3)
 521 | 
 522 | ```yaml
 523 | type: NormalExercise
 524 | key: e66a56b9f0
 525 | xp: 100
 526 | skills:
 527 |   - 1
 528 | ```
 529 | 
 530 | Based on the previous analysis, it looks like you had a mix of good and bad days. This is not what your ego expected, and you wonder if there may be a very tiny chance you have lost money over the week in total? 
 531 | 
 532 | A function that helps you to answer this question is `sum()`. It calculates the sum of all elements of a vector. For example, to calculate the total amount of money you have lost/won with poker you do: 
 533 | 
 534 | ```
 535 | total_poker <- sum(poker_vector)
 536 | ```
 537 | 
 538 | `@instructions`
 539 | - Calculate the total amount of money that you have won/lost with roulette and assign to the variable `total_roulette`.
 540 | - Now that you have the totals for roulette and poker, you can easily calculate `total_week` (which is the sum of all gains and losses of the week).
 541 | - Print out `total_week`.
 542 | 
 543 | `@hint`
 544 | Use the `sum()` function to get the total of the `roulette_vector`. `total_week` is then the sum of `total_roulette` and `total_poker`.
 545 | 
 546 | `@pre_exercise_code`
 547 | ```{r}
 548 | # no pec
 549 | ```
 550 | 
 551 | `@sample_code`
 552 | ```{r}
 553 | # Poker and roulette winnings from Monday to Friday:
 554 | poker_vector <- c(140, -50, 20, -120, 240)
 555 | roulette_vector <- c(-24, -50, 100, -350, 10)
 556 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 557 | names(poker_vector) <- days_vector
 558 | names(roulette_vector) <- days_vector
 559 | 
 560 | # Total winnings with poker
 561 | total_poker <- sum(poker_vector)
 562 | 
 563 | # Total winnings with roulette
 564 | total_roulette <-  
 565 | 
 566 | # Total winnings overall
 567 | total_week <- 
 568 | 
 569 | # Print out total_week
 570 |   
 571 | ```
 572 | 
 573 | `@solution`
 574 | ```{r}
 575 | # Poker and roulette winnings from Monday to Friday:
 576 | poker_vector <- c(140, -50, 20, -120, 240)
 577 | roulette_vector <- c(-24, -50, 100, -350, 10)
 578 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 579 | names(poker_vector) <- days_vector
 580 | names(roulette_vector) <- days_vector
 581 | 
 582 | # Total winnings with poker
 583 | total_poker <- sum(poker_vector)
 584 | 
 585 | # Total winnings with roulette
 586 | total_roulette <-  sum(roulette_vector)
 587 | 
 588 | # Total winnings overall
 589 | total_week <- total_roulette + total_poker
 590 | 
 591 | # Print out total_week
 592 | total_week
 593 | ```
 594 | 
 595 | `@sct`
 596 | ```{r}
 597 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 598 | ex()  %>% check_object("days_vector", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
 599 | ex() %>% check_object("poker_vector",  undefined_msg = msg, ) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 600 | 
 601 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 602 | 
 603 | 
 604 | ex() %>% check_object("total_poker") %>% check_equal(incorrect_msg = "Make sure that you assign to `total_poker` the sum of the `poker_vector`.")
 605 | 
 606 | ex() %>% check_object("total_roulette") %>% check_equal(incorrect_msg = "Make sure that you assign to `total_roulette` the sum of the `roulette_vector`.")
 607 | 
 608 | ex() %>% check_object("total_week") %>% check_equal(incorrect_msg = "Make sure that you assign to `total_week` the sum of the other two total vectors: `total_roulette` and `total_poker`.")
 609 | 
 610 | ex() %>% check_output_expr("total_week", missing_msg = "Don't forget to write `total_week` on a new line to print out the variable.")
 611 | 
 612 | success_msg("Well done. This is pretty bad news...")
 613 | ```
 614 | 
 615 | ---
 616 | 
 617 | ## Comparing total winnings
 618 | 
 619 | ```yaml
 620 | type: NormalExercise
 621 | key: f532f5332d
 622 | xp: 100
 623 | skills:
 624 |   - 1
 625 | ```
 626 | 
 627 | Oops, it seems like you are losing money. Time to rethink and adapt your strategy! This will require some deeper analysis... 
 628 | 
 629 | After a short brainstorm in your hotel's jacuzzi, you realize that a possible explanation might be that your skills in roulette are not as well developed as your skills in poker. So maybe your total gains in poker are higher (or `>` ) than in roulette.
 630 | 
 631 | `@instructions`
 632 | - Calculate `total_poker` and `total_roulette` as in the previous exercise. Use the `sum()` function twice.
 633 | - Check if your total gains in poker are higher than for roulette by using a comparison. Simply print out the result of this comparison. What do you conclude, should you focus on roulette or on poker?
 634 | 
 635 | `@hint`
 636 | - You partly calculated the answer to this question in the previous exercise already!
 637 | - To check if 6 is larger than 5, you type `6 > 5`. This returns a logical value (`TRUE` or `FALSE`).
 638 | 
 639 | `@pre_exercise_code`
 640 | ```{r}
 641 | # no pec
 642 | ```
 643 | 
 644 | `@sample_code`
 645 | ```{r}
 646 | # Poker and roulette winnings from Monday to Friday:
 647 | poker_vector <- c(140, -50, 20, -120, 240)
 648 | roulette_vector <- c(-24, -50, 100, -350, 10)
 649 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 650 | names(poker_vector) <- days_vector
 651 | names(roulette_vector) <- days_vector
 652 | 
 653 | # Calculate total gains for poker and roulette
 654 | total_poker <-
 655 | total_roulette <-
 656 | 
 657 | # Check if you realized higher total gains in poker than in roulette
 658 | 
 659 | ```
 660 | 
 661 | `@solution`
 662 | ```{r}
 663 | # Poker and roulette winnings from Monday to Friday:
 664 | poker_vector <- c(140, -50, 20, -120, 240)
 665 | roulette_vector <- c(-24, -50, 100, -350, 10)
 666 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 667 | names(poker_vector) <- days_vector
 668 | names(roulette_vector) <- days_vector
 669 | 
 670 | # Calculate total gains for poker and roulette
 671 | total_poker <- sum(poker_vector)
 672 | total_roulette <- sum(roulette_vector)
 673 | 
 674 | # Check if you realized higher total gains in poker than in roulette
 675 | total_poker > total_roulette
 676 | ```
 677 | 
 678 | `@sct`
 679 | ```{r}
 680 | msg <- "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 681 | ex()  %>% check_object("days_vector", undefined_msg = msg)  %>% check_equal(incorrect_msg = msg)
 682 | 
 683 | ex() %>% check_object("poker_vector", undefined_msg = msg)  %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
 684 | 
 685 | ex() %>% check_object("roulette_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 686 | 
 687 | 
 688 | ex()  %>% check_object("total_poker")  %>% check_equal(incorrect_msg = "Make sure that you assign to `total_poker` the sum of the `poker_vector`. Use `sum()`.")
 689 | ex()  %>% check_object("total_roulette")  %>% check_equal(incorrect_msg = "Make sure that you assign to `total_roulette` the sum of the `roulette_vector`. Use `sum()`.")
 690 | ex() %>% check_output_expr("total_poker > total_roulette",missing_msg = "Have you correctly carried out the comparison? To check if `total_poker` is greater than `total_roulette`, you can use `total_poker > total_roulette`.")
 691 | 
 692 | success_msg("Good job! Continue to the next exercise.")
 693 | ```
 694 | 
 695 | ---
 696 | 
 697 | ## Vector selection: the good times
 698 | 
 699 | ```yaml
 700 | type: NormalExercise
 701 | key: 8d78be44e9
 702 | xp: 100
 703 | skills:
 704 |   - 1
 705 | ```
 706 | 
 707 | Your hunch seemed to be right. It appears that the poker game is more your cup of tea than roulette. 
 708 | 
 709 | Another possible route for investigation is your performance at the beginning of the working week compared to the end of it. You did have a couple of Margarita cocktails at the end of the week... 
 710 | 
 711 | To answer that question, you only want to focus on a selection of the `total_vector`. In other words, our goal is to select specific elements of the vector. To select elements of a vector (and later matrices, data frames, ...), you can use square brackets. Between the square brackets, you indicate what elements to select. For example, to select the first element of the vector, you type `poker_vector[1]`. To select the second element of the vector, you type `poker_vector[2]`, etc. Notice that the first element in a vector has index 1, not 0 as in many other programming languages.
 712 | 
 713 | `@instructions`
 714 | Assign the poker results of <span translate="no">Wednesday</span> to the variable `poker_wednesday`.
 715 | 
 716 | `@hint`
 717 | Wednesday is the third element of `poker_vector`, and can thus be selected with `poker_vector[3]`.
 718 | 
 719 | `@pre_exercise_code`
 720 | ```{r}
 721 | # no pec
 722 | ```
 723 | 
 724 | `@sample_code`
 725 | ```{r}
 726 | # Poker and roulette winnings from Monday to Friday:
 727 | poker_vector <- c(140, -50, 20, -120, 240)
 728 | roulette_vector <- c(-24, -50, 100, -350, 10)
 729 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 730 | names(poker_vector) <- days_vector
 731 | names(roulette_vector) <- days_vector
 732 | 
 733 | # Define a new variable based on a selection
 734 | poker_wednesday <- 
 735 | ```
 736 | 
 737 | `@solution`
 738 | ```{r}
 739 | # Poker and roulette winnings from Monday to Friday:
 740 | poker_vector <- c(140, -50, 20, -120, 240)
 741 | roulette_vector <- c(-24, -50, 100, -350, 10)
 742 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 743 | names(poker_vector) <- days_vector
 744 | names(roulette_vector) <- days_vector
 745 | 
 746 | # Define a new variable based on a selection
 747 | poker_wednesday <- poker_vector[3]
 748 | ```
 749 | 
 750 | `@sct`
 751 | ```{r}
 752 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 753 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 754 | 
 755 | ex() %>% check_object("poker_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 756 | 
 757 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 758 | 
 759 | ex() %>% check_object("poker_wednesday", undefined_msg = "Please make sure to define a variable `poker_wednesday`.") %>% check_equal(incorrect_msg = "It looks like `poker_wednesday` does not contain the correct value of the `poker_vector`.")
 760 | 
 761 | 
 762 | success_msg("Great! R also makes it possible to select multiple elements from a vector at once. Learn how in the next exercise!")
 763 | ```
 764 | 
 765 | ---
 766 | 
 767 | ## Vector selection: the good times (2)
 768 | 
 769 | ```yaml
 770 | type: NormalExercise
 771 | key: '1351521670'
 772 | xp: 100
 773 | skills:
 774 |   - 1
 775 | ```
 776 | 
 777 | How about analyzing your midweek results? 
 778 | 
 779 | To select multiple elements from a vector, you can add square brackets at the end of it. You can indicate between the brackets what elements should be selected. For example: suppose you want to select the first and the fifth day of the week: use the vector `c(1, 5)` between the square brackets. For example, the code below selects the first and fifth element of `poker_vector`:
 780 | 
 781 | ```
 782 | poker_vector[c(1, 5)]
 783 | ```
 784 | 
 785 | `@instructions`
 786 | Assign the poker results of <span translate="no">Tuesday, Wednesday</span> and <span translate="no">Thursday</span> to the variable `poker_midweek`.
 787 | 
 788 | `@hint`
 789 | Use the vector `c(2, 3, 4)` between square brackets to select the correct elements of `poker_vector`.
 790 | 
 791 | `@pre_exercise_code`
 792 | ```{r}
 793 | # no pec
 794 | ```
 795 | 
 796 | `@sample_code`
 797 | ```{r}
 798 | # Poker and roulette winnings from Monday to Friday:
 799 | poker_vector <- c(140, -50, 20, -120, 240)
 800 | roulette_vector <- c(-24, -50, 100, -350, 10)
 801 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 802 | names(poker_vector) <- days_vector
 803 | names(roulette_vector) <- days_vector
 804 | 
 805 | # Define a new variable based on a selection
 806 | poker_midweek <- 
 807 | ```
 808 | 
 809 | `@solution`
 810 | ```{r}
 811 | # Poker and roulette winnings from Monday to Friday:
 812 | poker_vector <- c(140, -50, 20, -120, 240)
 813 | roulette_vector <- c(-24, -50, 100, -350, 10)
 814 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 815 | names(poker_vector) <- days_vector
 816 | names(roulette_vector) <- days_vector
 817 | 
 818 | # Define a new variable based on a selection
 819 | poker_midweek <- poker_vector[c(2, 3, 4)]
 820 | ```
 821 | 
 822 | `@sct`
 823 | ```{r}
 824 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 825 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 826 | 
 827 | ex() %>% check_object("poker_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 828 | 
 829 | ex() %>% check_object("roulette_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 830 | 
 831 | ex() %>% check_object("poker_midweek") %>% check_equal(incorrect_msg = "It looks like `poker_midweek` does not contain the correct values from `poker_vector`. You can use the vector `c(2, 3, 4)` inside square brackets.")
 832 | 
 833 | success_msg("Well done! Continue to the next exercise to specialize in vector selection some more!");
 834 | ```
 835 | 
 836 | ---
 837 | 
 838 | ## Vector selection: the good times (3)
 839 | 
 840 | ```yaml
 841 | type: NormalExercise
 842 | key: 27976b79f4
 843 | xp: 100
 844 | skills:
 845 |   - 1
 846 | ```
 847 | 
 848 | Selecting multiple elements of `poker_vector` with `c(2, 3, 4)` is not very convenient. Statisticians created an easier way to do this: `c(2, 3, 4)` can be abbreviated to`2:4`, which generates a vector with all natural numbers from 2 up to 4.
 849 | 
 850 | So, another way to find the mid-week results is `poker_vector[2:4]`. Notice how the vector `2:4` is placed between the square brackets to select element 2 up to 4.
 851 | 
 852 | `@instructions`
 853 | Assign to `roulette_selection_vector` the roulette results from <span translate="no">Tuesday</span> up to <span translate="no">Friday</span>; make use of `:` if it makes things easier for you.
 854 | 
 855 | `@hint`
 856 | Assign a selection of `roulette_vector` to `roulette_selection_vector` by placing `2:5` between square brackets.
 857 | 
 858 | `@pre_exercise_code`
 859 | ```{r}
 860 | 
 861 | ```
 862 | 
 863 | `@sample_code`
 864 | ```{r}
 865 | # Poker and roulette winnings from Monday to Friday:
 866 | poker_vector <- c(140, -50, 20, -120, 240)
 867 | roulette_vector <- c(-24, -50, 100, -350, 10)
 868 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 869 | names(poker_vector) <- days_vector
 870 | names(roulette_vector) <- days_vector
 871 | 
 872 | # Define a new variable based on a selection
 873 | roulette_selection_vector <- 
 874 | ```
 875 | 
 876 | `@solution`
 877 | ```{r}
 878 | # Poker and roulette winnings from Monday to Friday:
 879 | poker_vector <- c(140, -50, 20, -120, 240)
 880 | roulette_vector <- c(-24, -50, 100, -350, 10)
 881 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 882 | names(poker_vector) <- days_vector
 883 | names(roulette_vector) <- days_vector
 884 | 
 885 | # Define a new variable based on a selection
 886 | roulette_selection_vector <- roulette_vector[2:5]
 887 | ```
 888 | 
 889 | `@sct`
 890 | ```{r}
 891 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 892 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 893 | 
 894 | ex() %>% check_object("poker_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 895 | 
 896 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal( eq_condition = "equal",incorrect_msg = msg)
 897 | 
 898 | ex() %>% check_object("roulette_selection_vector", undefined_msg = "Please make sure to define a variable `roulette_selection_vector`.") %>% check_equal(incorrect_msg = "It looks like `roulette_selection_vector` does not contain the correct selection from `roulette_vector`. Make sure to to use the right indexes.")
 899 | 
 900 | success_msg("Awesome! The colon operator is extremely useful and very often used in R programming, so remember it well. Proceed to the next exercise.")
 901 | ```
 902 | 
 903 | ---
 904 | 
 905 | ## Vector selection: the good times (4)
 906 | 
 907 | ```yaml
 908 | type: NormalExercise
 909 | key: e6c263ddee
 910 | xp: 100
 911 | skills:
 912 |   - 1
 913 | ```
 914 | 
 915 | Another way to tackle the previous exercise is by using the names of the vector elements (<span translate="no">Monday, Tuesday,</span> ...) instead of their numeric positions. For example, 
 916 | 
 917 | ```
 918 | poker_vector["Monday"]
 919 | ```
 920 | 
 921 | will select the first element of `poker_vector` since `"Monday"` is the name of that first element.
 922 | 
 923 | Just like you did in the previous exercise with numerics, you can also use the element names to select multiple elements, for example: 
 924 | 
 925 | ```
 926 | poker_vector[c("Monday","Tuesday")]
 927 | ```
 928 | 
 929 | `@instructions`
 930 | - Select the first three elements in `poker_vector` by using their names: `"Monday"`, `"Tuesday"` and `"Wednesday"`. Assign the result of the selection to `poker_start`.
 931 | - Calculate the average of the values in `poker_start` with the `mean()` function. Simply print out the result so you can inspect it.
 932 | 
 933 | `@hint`
 934 | - You can use `c("Monday", "Tuesday", "Wednesday")` inside square brackets to subset `poker_vector` appropriately.
 935 | - You can use `mean(poker_start)` to get the mean of the elements in `poker_start`. You do not need the mean of all poker elements, but only of the first three days.
 936 | 
 937 | `@pre_exercise_code`
 938 | ```{r}
 939 | # no pec
 940 | ```
 941 | 
 942 | `@sample_code`
 943 | ```{r}
 944 | # Poker and roulette winnings from Monday to Friday:
 945 | poker_vector <- c(140, -50, 20, -120, 240)
 946 | roulette_vector <- c(-24, -50, 100, -350, 10)
 947 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 948 | names(poker_vector) <- days_vector
 949 | names(roulette_vector) <- days_vector
 950 | 
 951 | # Select poker results for Monday, Tuesday and Wednesday
 952 | poker_start <- 
 953 |   
 954 | # Calculate the average of the elements in poker_start
 955 | 
 956 | ```
 957 | 
 958 | `@solution`
 959 | ```{r}
 960 | # Poker and roulette winnings from Monday to Friday:
 961 | poker_vector <- c(140, -50, 20, -120, 240)
 962 | roulette_vector <- c(-24, -50, 100, -350, 10)
 963 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
 964 | names(poker_vector) <- days_vector
 965 | names(roulette_vector) <- days_vector
 966 | 
 967 | # Select poker results for Monday, Tuesday and Wednesday
 968 | poker_start <- poker_vector[c("Monday", "Tuesday", "Wednesday")]
 969 |   
 970 | # Calculate the average of the elements in poker_start
 971 | mean(poker_start)
 972 | ```
 973 | 
 974 | `@sct`
 975 | ```{r}
 976 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
 977 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
 978 | 
 979 | ex() %>% check_object("poker_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
 980 | 
 981 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
 982 | 
 983 | ex() %>% check_object("poker_start") %>% check_equal(incorrect_msg = "It looks like `poker_start` does not contain the first three values of `poker_vector`. You can use `c(\"Monday\", \"Tuesday\", \"Wednesday\")` inside square brackets to do this.")
 984 | 
 985 | ex() %>% check_output_expr("mean(poker_start)", missing_msg = "Have you correctly calculated the average of the values in `poker_start` and printed it out? Use `mean(poker_start)`.")
 986 | 
 987 | success_msg("Good job! Apart from subsetting vectors by index or by name, you can also subset vectors by comparison. The next exercises will show you how!")
 988 | ```
 989 | 
 990 | ---
 991 | 
 992 | ## Selection by comparison - Step 1
 993 | 
 994 | ```yaml
 995 | type: NormalExercise
 996 | key: f0f619c901
 997 | xp: 100
 998 | skills:
 999 |   - 1
1000 | ```
1001 | 
1002 | By making use of comparison operators, we can approach the previous question in a more proactive way. 
1003 | 
1004 | The (logical) comparison operators known to R are:
1005 | 
1006 | - `<` for less than
1007 | - `>` for greater than
1008 | - `<=` for less than or equal to
1009 | - `>=` for greater than or equal to
1010 | - `==` for equal to each other
1011 | - `!=` not equal to each other
1012 | 
1013 | As seen in the previous chapter, stating `6 > 5` returns `TRUE`. The nice thing about R is that you can use these comparison operators also on vectors. For example:
1014 | 
1015 | ```
1016 | c(4, 5, 6) > 5
1017 | [1] FALSE FALSE TRUE
1018 | ```
1019 | 
1020 | This command tests for every element of the vector if the condition stated by the comparison operator is `TRUE` or `FALSE`.
1021 | 
1022 | `@instructions`
1023 | - Check which elements in `poker_vector` are positive (i.e. > 0) and assign this to `selection_vector`. 
1024 | - Print out `selection_vector` so you can inspect it. The printout tells you whether you won (`TRUE`) or lost (`FALSE`) any money for each day.
1025 | 
1026 | `@hint`
1027 | In order to check for which days your poker gains are positive, R should check for each element of `poker_vector` whether it is larger than zero. `some_vector > 0` is the way to tell R what you are after.
1028 | 
1029 | `@pre_exercise_code`
1030 | ```{r}
1031 | # no pec
1032 | ```
1033 | 
1034 | `@sample_code`
1035 | ```{r}
1036 | # Poker and roulette winnings from Monday to Friday:
1037 | poker_vector <- c(140, -50, 20, -120, 240)
1038 | roulette_vector <- c(-24, -50, 100, -350, 10)
1039 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
1040 | names(poker_vector) <- days_vector
1041 | names(roulette_vector) <- days_vector
1042 | 
1043 | # Which days did you make money on poker?
1044 | selection_vector <- 
1045 |   
1046 | # Print out selection_vector
1047 | 
1048 | ```
1049 | 
1050 | `@solution`
1051 | ```{r}
1052 | # Poker and roulette winnings from Monday to Friday:
1053 | poker_vector <- c(140, -50, 20, -120, 240)
1054 | roulette_vector <- c(-24, -50, 100, -350, 10)
1055 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
1056 | names(poker_vector) <- days_vector
1057 | names(roulette_vector) <- days_vector
1058 | 
1059 | # Which days did you make money on poker?
1060 | selection_vector <- poker_vector > 0
1061 |   
1062 | # Print out selection_vector
1063 | selection_vector
1064 | ```
1065 | 
1066 | `@sct`
1067 | ```{r}
1068 | msg <- "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
1069 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
1070 | 
1071 | ex() %>% check_object("poker_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
1072 | 
1073 | ex() %>% check_object("roulette_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
1074 | 
1075 | ex() %>% check_object("selection_vector") %>% check_equal(incorrect_msg = "It looks like `selection_vector` does not contain the correct result. Remember that R uses element wise operations for vectors.")
1076 | 
1077 | ex() %>% check_output_expr("selection_vector", missing_msg = "Don't forget to print out `selection_vector` by writing the variable name on a new line.")
1078 | 
1079 | success_msg("Great!")
1080 | ```
1081 | 
1082 | ---
1083 | 
1084 | ## Selection by comparison - Step 2
1085 | 
1086 | ```yaml
1087 | type: NormalExercise
1088 | key: 2754fc5cd4
1089 | xp: 100
1090 | skills:
1091 |   - 1
1092 | ```
1093 | 
1094 | Working with comparisons will make your data analytical life easier. Instead of selecting a subset of days to investigate yourself (like before), you can simply ask R to return only those days where you realized a positive return for poker. 
1095 | 
1096 | In the previous exercises you used `selection_vector <- poker_vector > 0` to find the days on which you had a positive poker return. Now, you would like to know not only the days on which you won, but also how much you won on those days. 
1097 | 
1098 | You can select the desired elements, by putting `selection_vector` between the square brackets that follow `poker_vector`:
1099 | 
1100 | ```
1101 | poker_vector[selection_vector]
1102 | ```
1103 | 
1104 | R knows what to do when you pass a logical vector in square brackets: it will only select the elements that correspond to `TRUE` in `selection_vector`.
1105 | 
1106 | `@instructions`
1107 | Use `selection_vector` in square brackets to assign the amounts that you won on the profitable days to the variable `poker_winning_days`.
1108 | 
1109 | `@hint`
1110 | Use `poker_vector[selection_vector]` to select the desired elements from `poker_vector`, and assign the result to `poker_winning_days`.
1111 | 
1112 | `@pre_exercise_code`
1113 | ```{r}
1114 | # no pec
1115 | ```
1116 | 
1117 | `@sample_code`
1118 | ```{r}
1119 | # Poker and roulette winnings from Monday to Friday:
1120 | poker_vector <- c(140, -50, 20, -120, 240)
1121 | roulette_vector <- c(-24, -50, 100, -350, 10)
1122 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
1123 | names(poker_vector) <- days_vector
1124 | names(roulette_vector) <- days_vector
1125 | 
1126 | # Which days did you make money on poker?
1127 | selection_vector <- poker_vector > 0
1128 | 
1129 | # Select from poker_vector these days
1130 | poker_winning_days <- 
1131 | ```
1132 | 
1133 | `@solution`
1134 | ```{r}
1135 | # Poker and roulette winnings from Monday to Friday:
1136 | poker_vector <- c(140, -50, 20, -120, 240)
1137 | roulette_vector <- c(-24, -50, 100, -350, 10)
1138 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
1139 | names(poker_vector) <- days_vector
1140 | names(roulette_vector) <- days_vector
1141 | 
1142 | # Which days did you make money on poker?
1143 | selection_vector <- poker_vector > 0
1144 | 
1145 | # Select from poker_vector these days
1146 | poker_winning_days <- poker_vector[selection_vector]
1147 | ```
1148 | 
1149 | `@sct`
1150 | ```{r}
1151 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
1152 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
1153 | 
1154 | ex() %>% check_object("poker_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
1155 | 
1156 | ex() %>% check_object("roulette_vector",  undefined_msg = msg) %>% check_equal(eq_condition = "equal",incorrect_msg = msg)
1157 | 
1158 | ex() %>% check_object("selection_vector") %>% check_equal(incorrect_msg = "Don't change the way `selection_vector` is calculated.")
1159 | 
1160 | ex() %>% check_object("poker_winning_days") %>% check_equal(incorrect_msg = "It looks like `poker_winning_days` does not contain the correct result. Use `poker_vector[selection_vector]`.")
1161 | 
1162 | success_msg("Good job! Continue to the next exercise.")
1163 | ```
1164 | 
1165 | ---
1166 | 
1167 | ## Advanced selection
1168 | 
1169 | ```yaml
1170 | type: NormalExercise
1171 | key: 59e8dcbbd5
1172 | xp: 100
1173 | skills:
1174 |   - 1
1175 | ```
1176 | 
1177 | Just like you did for poker, you also want to know those days where you realized a positive return for roulette.
1178 | 
1179 | `@instructions`
1180 | - Create the variable `selection_vector`, this time to see if you made profit with roulette for different days.
1181 | - Assign the amounts that you made on the days that you ended positively for roulette to the variable `roulette_winning_days`. This vector thus contains the positive winnings of `roulette_vector`.
1182 | 
1183 | `@hint`
1184 | Once you've correctly calculated `selection_vector`, you can again use `roulette_vector[selection_vector]` to select the positive results from `roulette_vector`.
1185 | 
1186 | `@pre_exercise_code`
1187 | ```{r}
1188 | # no pec
1189 | ```
1190 | 
1191 | `@sample_code`
1192 | ```{r}
1193 | # Poker and roulette winnings from Monday to Friday:
1194 | poker_vector <- c(140, -50, 20, -120, 240)
1195 | roulette_vector <- c(-24, -50, 100, -350, 10)
1196 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
1197 | names(poker_vector) <- days_vector
1198 | names(roulette_vector) <- days_vector
1199 | 
1200 | # Which days did you make money on roulette?
1201 | selection_vector <-
1202 | 
1203 | # Select from roulette_vector these days
1204 | roulette_winning_days <- 
1205 | ```
1206 | 
1207 | `@solution`
1208 | ```{r}
1209 | # Poker and roulette winnings from Monday to Friday:
1210 | poker_vector <- c(140, -50, 20, -120, 240)
1211 | roulette_vector <- c(-24, -50, 100, -350, 10)
1212 | days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
1213 | names(poker_vector) <- days_vector
1214 | names(roulette_vector) <- days_vector
1215 | 
1216 | # Which days did you make money on roulette?
1217 | selection_vector <- roulette_vector > 0
1218 | 
1219 | # Select from roulette_vector these days
1220 | roulette_winning_days <- roulette_vector[selection_vector]
1221 | ```
1222 | 
1223 | `@sct`
1224 | ```{r}
1225 | msg = "Do not change anything about the definition and naming of `poker_vector` and `roulette_vector`."
1226 | 
1227 | ex() %>% check_object("days_vector", undefined_msg = msg) %>% check_equal(incorrect_msg = msg)
1228 | 
1229 | ex() %>% check_object("poker_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
1230 | 
1231 | ex() %>% check_object("roulette_vector", undefined_msg = msg) %>% check_equal(eq_condition = "equal", incorrect_msg = msg)
1232 | 
1233 | ex() %>% check_object("selection_vector") %>% check_equal(incorrect_msg = "It looks like `selection_vector` does not contain the correct result. Use `roulette_vector > 0`.")
1234 | 
1235 | ex() %>% check_object("roulette_winning_days") %>% check_equal(incorrect_msg = "It looks like `roulette_winning_days` does not contain the correct result. Use `roulette_vector[selection_vector]`.")
1236 | 
1237 | 
1238 | success_msg("Great! This exercise concludes the chapter on vectors. The next chapter will introduce you to the two-dimensional version of vectors: matrices.")
1239 | ```
1240 | 


--------------------------------------------------------------------------------