├── IDI-stuff
├── MOH-SQL-code.sql
├── example-data-dictionaries
│ ├── ACC_Clean.pdf
│ └── DIA_Clean.pdf
└── useful-papers
│ ├── linking-Methodology.pdf
│ ├── prototype-spine-creation.pdf
│ └── use-of-IDI.pdf
├── LICENSE
├── R
├── README.md
├── beginners.zip
├── connecting-R
│ ├── README.md
│ ├── databases-in-R.Rmd
│ ├── databases-in-R.html
│ └── remote-MySQL.R
├── example-R.zip
├── examples
│ └── bioinformatics
│ │ ├── analysis.R
│ │ ├── app.R
│ │ ├── automation.R
│ │ ├── create_variants_database.sql
│ │ ├── data
│ │ ├── new_records
│ │ │ └── new_records.csv
│ │ ├── patient_variants.csv
│ │ ├── variants.sqlite
│ │ └── variants_dodgy.sqlite
│ │ ├── functions.R
│ │ └── start_here.R
├── intro-to-R.R
├── sqlite-R
│ ├── MyDB.sqlite
│ ├── README.md
│ ├── Sandpit.sqlite
│ └── sqlite.R
└── tidy-data.R
├── README.md
├── create-database
├── MySQL
│ ├── MySQL-database.sql
│ └── README.md
├── SQL-data-definition-examples.sql
├── SQLFiddle
│ ├── README.md
│ ├── T-SQL-analytics-schema.sql
│ ├── T-SQL-ape-schema.sql
│ └── T-SQL-notes-schema.sql
└── T-SQL
│ ├── .Rhistory
│ ├── README.md
│ └── T-SQL-database.sql
├── other-languages.md
├── scripts-from-notes
├── README.md
├── bayes-vs-frequentist.sql
└── manipulate-tables.sql
└── textbook-and-slides
├── README.md
├── SQL_Course_Slides_Day_1.pdf
├── SQL_Course_Slides_Day_2.pdf
├── SQL_Course_Slides_Day_3.pdf
├── SQL_Course_Slides_Day_4.pdf
└── SQL_Course_Textbook.pdf
/IDI-stuff/MOH-SQL-code.sql:
--------------------------------------------------------------------------------
1 | -- -----------------------------------
2 |
3 | -- Here I provide queries that are complicated by the presence of long table and column names.
4 |
5 | -- These queries were provided by Stats NZ as example queries for the New Zealand Integrated Data Infrastructure (not for the course practice databases).
6 |
7 | -- Below the first two queries, I have provided simplified versions of the same query.
8 |
9 | -- The only changes made for the simplified version are:
10 | --
11 | -- 1. assignment of aliases for table and column names
12 | --
13 | -- 2. changing indentation and adding/removing newlines
14 |
15 | -- Note the queries are only 'simplified' if you know how the WITH clause works!
16 | -- The subquery in the WITH clause is only assigning aliases.
17 | -- The query below the WITH clause is doing all the 'work'.
18 |
19 | -- I have only simplifed the first three. You can use the remaining queries to practice with (try to simplify them in the same manner).
20 |
21 | -- -----------------------------------
22 | -- -----------------------------------
23 |
24 | --Clients seen by snz_uid
25 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear ,
26 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid
27 | FROM IDI_Clean_20181020.moh_clean.PRIMHD
28 | WHERE IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
29 | GROUP BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
30 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid
31 |
32 | -- simplified version
33 | WITH Shortened AS (
34 | SELECT M.moh_mhd_activity_start_date AS astart,
35 | M.moh_clean.PRIMHD.snz_moh_uid AS muid,
36 | M.moh_mhd_activity_type_code AS activity
37 | FROM IDI_Clean_20181020.moh_clean.PRIMHD AS M
38 | )
39 | SELECT year(astart) AS StartYear,
40 | FROM Shortened
41 | WHERE activity != 'T35'
42 | GROUP BY year(astart), muid
43 |
44 | -- -----------------------------------
45 | -- -----------------------------------
46 |
47 | --Service users by snz_moh_uid
48 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
49 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid
50 | FROM IDI_Clean_20181020.moh_clean.PRIMHD
51 | WHERE IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
52 | GROUP BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
53 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid
54 | ORDER BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
55 |
56 | -- simplified version
57 | WITH Shortened AS (
58 | SELECT M.moh_mhd_activity_start_date AS astart,
59 | M.snz_moh_uid AS muid,
60 | M.moh_mhd_activity_type_code AS activity
61 | FROM IDI_Clean_20181020.moh_clean.PRIMHD AS M
62 | )
63 | SELECT year(astart) AS StartYear, M.muid
64 | FROM Shortened
65 | WHERE M.activity != 'T35'
66 | GROUP BY year(astart), M.muid
67 | ORDER BY year(astart);
68 |
69 | -- -----------------------------------
70 | -- -----------------------------------
71 |
72 | --Service users/AoD service users
73 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
74 | count (DISTINCT IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) AS 'Service Users',
75 | count (DISTINCT (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or
76 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48'))
77 | then IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid else NULL end)) AS 'AoD Service Users'
78 | FROM IDI_Clean_20181020.moh_clean.PRIMHD
79 | WHERE IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
80 | GROUP BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date);
81 |
82 | -- simplified version
83 | WITH Shortened AS (
84 | SELECT M.moh_mhd_activity_start_date AS astart,
85 | M.snz_moh_uid AS muid,
86 | M.moh_mhd_team_type_code AS team,
87 | M.moh_mhd_activity_type_code AS activity
88 | FROM IDI_Clean_20181020.moh_clean.PRIMHD M
89 | )
90 | SELECT year(astart) AS Year,
91 | count(DISTINCT muid) AS 'Service Users',
92 | count(DISTINCT(case when (team in ('03', '11') or activity in ('T16', 'T17', 'T18', 'T19', 'T20','T48'))
93 | then muid else NULL end)) AS 'AoD Service Users'
94 | FROM Shortened
95 | WHERE type != 'T35'
96 | GROUP BY year(astart);
97 |
98 | -- -----------------------------------
99 | -- -----------------------------------
100 |
101 | --Service users by DHB/non-DHB
102 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear ,
103 | case when [IDI_Metadata].[clean_read_CLASSIFICATIONS].[moh_primhd_organisation_code].[ORGANISATION_TYPE] = 'District Health Board (DHB)' then 'DHB'
104 | else 'non-DHB' end 'Organisation type',
105 | count (distinct IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) 'Count of service users'
106 | FROM
107 | IDI_Clean_20181020.moh_clean.PRIMHD
108 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on
109 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID
110 | WHERE
111 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
112 | GROUP BY
113 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
114 | case when [IDI_Metadata].[clean_read_CLASSIFICATIONS].[moh_primhd_organisation_code].[ORGANISATION_TYPE] = 'District Health Board (DHB)' then 'DHB'
115 | else 'non-DHB' end
116 | ORDER BY
117 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
118 |
119 | -- -----------------------------------
120 | -- -----------------------------------
121 |
122 | --Face to face service users (clients seen)
123 | --Clients seen/AoD clients seen
124 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
125 | count (distinct IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) AS 'Clients Seen',
126 | count ( distinct case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or
127 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) then
128 | (IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) else NULL end) AS 'AoD_Clients_Seen'
129 | FROM
130 | IDI_Clean_20181020.moh_clean.PRIMHD
131 | WHERE
132 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37','T08')
133 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM')
134 | GROUP BY
135 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date);
136 |
137 | -- -----------------------------------
138 | -- -----------------------------------
139 |
140 | --Clients seen by DHB/non-DHB
141 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
142 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_TYPE = 'District Health Board (DHB)'then 'DHB'
143 | else 'non-DHB' end 'Organisation type',
144 | count (DISTINCT snz_moh_uid) AS 'Clients seen'
145 | FROM
146 | IDI_Clean_20181020.moh_clean.PRIMHD
147 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on
148 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID
149 | WHERE
150 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08')
151 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM')
152 | GROUP BY
153 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
154 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_TYPE = 'District Health Board (DHB)'then 'DHB'
155 | else 'non-DHB' end
156 | ORDER BY
157 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date )
158 |
159 | -- -----------------------------------
160 | -- -----------------------------------
161 |
162 | --Contacts
163 | --AoD Contacts/all contacts
164 | SELECT
165 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
166 | sum (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or
167 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48'))
168 | then IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr else NULL end) AS 'AoD Contacts',
169 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'All Contacts'
170 | FROM
171 | IDI_Clean_20181020.moh_clean.PRIMHD
172 | WHERE
173 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
174 | AND
175 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON'
176 | GROUP BY
177 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
178 | ORDER BY
179 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
180 |
181 | -- -----------------------------------
182 | -- -----------------------------------
183 |
184 | --All face to face contacts/AoD face to face contacts
185 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
186 | sum (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) AS 'All F2F contacts',
187 | sum (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or
188 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) then
189 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr else NULL end) AS 'AoD F2F Contacts'
190 | FROM
191 | IDI_Clean_20181020.moh_clean.PRIMHD
192 | WHERE
193 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08')
194 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM')
195 | and IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON'
196 | GROUP BY
197 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date);
198 |
199 | -- -----------------------------------
200 | -- -----------------------------------
201 |
202 | --People with face to face contacts (snz_uid)
203 | SELECT distinct(snz_moh_uid),
204 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear
205 | FROM
206 | IDI_Clean_20181020.moh_clean.PRIMHD
207 | WHERE
208 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08')
209 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM')
210 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON'
211 | ORDER BY
212 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date);
213 |
214 | -- -----------------------------------
215 | -- -----------------------------------
216 |
217 | --People with face to face contacts by DHB/non-DHB
218 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
219 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end 'Organisation type',
220 | count (DISTINCT snz_moh_uid) AS 'Service users face to face contacts'
221 | FROM
222 | IDI_Clean_20181020.moh_clean.PRIMHD
223 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on
224 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID
225 | WHERE
226 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08')
227 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM')
228 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON'
229 | GROUP BY
230 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
231 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end
232 | ORDER BY
233 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
234 |
235 | -- -----------------------------------
236 | -- -----------------------------------
237 |
238 | --Bednights
239 | Sum of bednights/AoD bednights
240 | SELECT
241 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
242 | sum (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or
243 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48'))
244 | then IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr else NULL end) AS 'AoD_Bed_Nights',
245 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'All Bed Nights'
246 | FROM
247 | IDI_Clean_20181020.moh_clean.PRIMHD
248 | WHERE
249 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'BED'
250 | GROUP BY
251 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
252 | ORDER BY
253 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date);
254 |
255 | -- -----------------------------------
256 | -- -----------------------------------
257 |
258 | --Bednights by snz_moh_uid
259 | SELECT
260 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
261 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid,
262 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'Bed Nights'
263 | FROM
264 | IDI_Clean_20181020.moh_clean.PRIMHD
265 | WHERE
266 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
267 | AND
268 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'BED'
269 | GROUP BY
270 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
271 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid
272 | ORDER BY
273 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
274 |
275 | -- -----------------------------------
276 | -- -----------------------------------
277 |
278 | --Bednights by DHB/non-DHB
279 | SELECT
280 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear,
281 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end 'Organisation type',
282 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'Bed Nights'
283 | FROM
284 | IDI_Clean_20181020.moh_clean.PRIMHD
285 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on
286 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID
287 | WHERE
288 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35'
289 | AND
290 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'BED'
291 | GROUP BY
292 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date),
293 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end
294 | ORDER BY
295 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date)
296 |
--------------------------------------------------------------------------------
/IDI-stuff/example-data-dictionaries/ACC_Clean.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/example-data-dictionaries/ACC_Clean.pdf
--------------------------------------------------------------------------------
/IDI-stuff/example-data-dictionaries/DIA_Clean.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/example-data-dictionaries/DIA_Clean.pdf
--------------------------------------------------------------------------------
/IDI-stuff/useful-papers/linking-Methodology.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/useful-papers/linking-Methodology.pdf
--------------------------------------------------------------------------------
/IDI-stuff/useful-papers/prototype-spine-creation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/useful-papers/prototype-spine-creation.pdf
--------------------------------------------------------------------------------
/IDI-stuff/useful-papers/use-of-IDI.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/useful-papers/use-of-IDI.pdf
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/R/README.md:
--------------------------------------------------------------------------------
1 | # This folder contains scripts and guides related to R
2 |
3 | ### Beginners
4 |
5 | For beginners, I've prepared the below zip file containing four scripts for you to run through in order (0,1,2,3). These four scripts contain the major takeaways from day 4.
6 |
7 | * [(zip file) beginner scripts](beginners.zip)
8 |
9 | ### Connecting remotely
10 |
11 | The above-mentioned beginner scripts create a SQLite connection object (which we call `con`). You can also create connection objects to remove or local MySQL or T-SQL servers. The guide below will show you how to do that. The connection objects you create can be used in exactly the same ways demontrated in the beginner scripts `con`.
12 |
13 | * [General guide for connecting to local or remote server](https://htmlpreview.github.io/?https://github.com/frycast/SQL_course/blob/master/R/connecting-R/databases-in-R.html)
14 |
15 | ### Other scripts
16 |
17 | For the links that say "R script", you can right-click (or control-click) the link, and then click "save link as".
18 |
19 | * [(R script) Intro tutorial for programming in R](https://github.com/frycast/SQL_course/raw/master/R/intro-to-R.R)
20 | * [(R script) Connecting to a remote MySQL server](https://github.com/frycast/SQL_course/raw/master/R/connecting-R/remote-MySQL.R)
21 | * [A SQLite copy of the Sandpit database](sqlite-R/Sandpit.sqlite)
22 | * [(R script) Tidy data in R](https://github.com/frycast/SQL_course/raw/master/R/tidy-data.R)
23 | * [The 'umbrella' example project directory (includes RMarkdown code for the day 4 slides)](https://github.com/frycast/umbrella)
24 | * [More on using SQLite in R](sqlite-R)
25 |
--------------------------------------------------------------------------------
/R/beginners.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/beginners.zip
--------------------------------------------------------------------------------
/R/connecting-R/README.md:
--------------------------------------------------------------------------------
1 | # Connecting to R
2 |
3 | This folder contains a guide for connecting to a MySQL or T-SQL server in R.
4 |
5 | * [Click here to view the guide](https://htmlpreview.github.io/?https://github.com/frycast/SQL_course/blob/master/R/connecting-R/databases-in-R.html)
6 |
--------------------------------------------------------------------------------
/R/connecting-R/databases-in-R.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Connecting to SQL databases with R"
3 | author: Daniel Fryer
4 | date: November 11, 2021
5 | output:
6 | prettydoc::html_pretty:
7 | theme: leonids
8 | highlight: github
9 | ---
10 |
11 | ```{r setup, include=FALSE}
12 | knitr::opts_chunk$set(echo = TRUE)
13 | ```
14 |
15 | ## Introduction
16 |
17 | Setting up and connecting to SQL servers can get tricky, with all the authentication methods and security measures, hardware differences, etc.
18 |
19 | This is a general guide, for both MySQL and T-SQL. We will learn to:
20 |
21 | 1. Create and connect to a local server.
22 | 2. Connect to a remote database.
23 |
24 | If you're here to get connected to a remote MySQL database,
25 | then I've prepared an R script to get you started.
26 | [Click here to access the MySQL connection script](./remote-MySQL.R).
27 |
28 | ## 1. Create and connect to a local server
29 |
30 | Before connecting to a local server in R, you should create the server, install a popular SQL code editor, and insert some practice data. For this, I've created the two guides linked below. Please go through one of them before continuing.
31 |
32 | * [For T-SQL installation (easier on windows) click here](https://github.com/frycast/SQL_course/tree/master/create-database/T-SQL)
33 |
34 | * [For MySQL installation (easier on macOS) click here](https://github.com/frycast/SQL_course/tree/master/create-database/MySQL)
35 |
36 | You'll also need to have R and RStudio set up on your computer. Here is a good tutorial for that:
37 |
38 | * [Click here for the ModernDive R installation tutorial](https://moderndive.netlify.app/1-getting-started.html#installing)
39 |
40 | Once the SQL server and R are both set up, sometimes the rest will go really smoothly, but sometimes it won't. If you run into trouble, you might need to use a search engine and/or browse through forums to find some suggestions. If you're a student taking one of my SQL courses, you can contact me any time for help!
41 |
42 | #### Connect to a local MySQL server
43 |
44 | First, run the code below to install the required R package:
45 |
46 | ```{r, eval=FALSE}
47 | install.packages("RMySQL")
48 | library(RMySQL)
49 | ```
50 |
51 | Then, the following R code should get you connected. Note that the database named 'Sandpit' was created within the above MySQL installation guide.
52 |
53 | ```{r, eval=FALSE}
54 | con <- DBI::dbConnect(
55 | RMySQL::MySQL(),
56 | dbname = "Sandpit",
57 | host = "localhost")
58 | ```
59 |
60 | If the above causes an error that mentions `caching_sha2_password could not be loaded`, then connect to localhost using MySQL Workbench (or Sequel Ace, or whichever SQL editor you have installed), and run the SQL code below. *Warning:* this creates a very insecure user account, so you should never do this on a server that is open to the public and contains sensitive or private data. I'm assuming you're setting up this database on your home computer just to experiment with fake data.
61 |
62 | ```{SQL, eval=FALSE}
63 | CREATE USER 'R'@'localhost' IDENTIFIED WITH mysql_native_password BY 'password';
64 | GRANT ALL PRIVILEGES ON *.* TO 'R'@'localhost' WITH GRANT OPTION;
65 | ```
66 |
67 | Once you've run the above SQL code, you should be able to connect with:
68 |
69 | ```{r, eval=FALSE}
70 | con <- DBI::dbConnect(
71 | RMySQL::MySQL(),
72 | dbname = "Sandpit",
73 | host = "localhost",
74 | user = "R",
75 | password = "password")
76 | ```
77 |
78 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names.
79 |
80 | ```{r, eval=FALSE}
81 | DBI::dbListTables(con)
82 | ```
83 |
84 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect:
85 |
86 | ```{r, eval=FALSE}
87 | DBI::dbDisconnect(con)
88 | ```
89 |
90 | #### Connect to a local T-SQL server
91 |
92 | First, run the code below to install the required R package:
93 |
94 | ```{r, eval=FALSE}
95 | install.packages(c("odbc","DBI"))
96 | library(odbc)
97 | library(DBI)
98 | ```
99 |
100 | Then, the following R code should get you connected. Note that the database named 'Sandpit' was created within the above T-SQL installation guide.
101 |
102 | ```{r, eval=FALSE}
103 | con <- DBI::dbConnect(odbc::odbc(),
104 | Driver = "ODBC Driver 17 for SQL Server",
105 | Server = "localhost",
106 | Database = "Sandpit",
107 | Trusted_Connection = "yes")
108 | ```
109 |
110 | If the above fails, there may be alternative drivers that can be used in place of `ODBC Driver 17 for SQL Server`. However, the other parameters will change too. A common alternative is:
111 |
112 | ```{r, eval=FALSE}
113 | con <- DBI::dbConnect(odbc::odbc(),
114 | Driver = "SQL Server",
115 | Server = "localhost",
116 | Database = "master",
117 | Trusted_Connection = "True")
118 | ```
119 |
120 | Small details make a difference. For example, the `Trusted_Connection` parameter in the second example is set to `"True"`, while in the first example it is `"yes"`. A large collection of alternative connection strings can be found on [connectionstrings.com](https://www.connectionstrings.com/sql-server/).
121 |
122 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names.
123 |
124 | ```{r, eval=FALSE}
125 | DBI::dbListTables(con)
126 | ```
127 |
128 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect:
129 |
130 | ```{r, eval=FALSE}
131 | DBI::dbDisconnect(con)
132 | ```
133 |
134 | ## 2. Connect to a remote server
135 |
136 | Connecting to a remote database means you can avoid the hassle of installing a local MySQL or T-SQL server. You will need some connection details specific to the database you plan to connect to. The types of details you need depend on whether you're connecting to a MySQL or T-SQL server.
137 |
138 | #### Connect to a remote MySQL server
139 |
140 | First, run the code below to install the required R package:
141 |
142 | ```{r, eval=FALSE}
143 | install.packages("RMySQL")
144 | library(RMySQL)
145 | ```
146 |
147 | Then, use the following to connect:
148 |
149 | ```{r, eval=FALSE}
150 | con <- DBI::dbConnect(RMySQL::MySQL(),
151 | host = "write_host_address_here",
152 | port = 0000, # Replace this number with the actual port number
153 | dbname = "write_database_name_here",
154 | user = "write_user_name_here",
155 | password = "write_your_password_here")
156 | ```
157 |
158 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names.
159 |
160 | ```{r, eval=FALSE}
161 | DBI::dbListTables(con)
162 | ```
163 |
164 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect:
165 |
166 | ```{r, eval=FALSE}
167 | DBI::dbDisconnect(con)
168 | ```
169 |
170 | #### Connect to a remote T-SQL server
171 |
172 | First, run the code below to install the required R package:
173 |
174 | ```{r, eval=FALSE}
175 | install.packages(c("odbc","DBI"))
176 | library(odbc)
177 | library(DBI)
178 | ```
179 |
180 | Then, use the following to connect:
181 |
182 | ```{r, eval=FALSE}
183 | con <- odbc::dbConnect(odbc::odbc(),
184 | Driver = "SQL Server",
185 | Server = "write_server_address_here",
186 | Database = "write_database_name_here",
187 | UID = "write_user_name_here",
188 | PWD = "write_your_password_here")
189 | ```
190 |
191 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names.
192 |
193 | ```{r, eval=FALSE}
194 | DBI::dbListTables(con)
195 | ```
196 |
197 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect:
198 |
199 | ```{r, eval=FALSE}
200 | DBI::dbDisconnect(con)
201 | ```
--------------------------------------------------------------------------------
/R/connecting-R/remote-MySQL.R:
--------------------------------------------------------------------------------
1 | # Install and load RMySQL
2 | install.packages("RMySQL")
3 | library(RMySQL)
4 |
5 | # Choose the database name and password
6 | database <- "Sandpit"
7 | password <- "insert_password_here"
8 |
9 | # Connect to the database
10 | con <- DBI::dbConnect(RMySQL::MySQL(),
11 | host = "db-intro-sql-do-user-9289996-0.b.db.ondigitalocean.com",
12 | port = 25060, # Replace this number with the actual port number
13 | dbname = database,
14 | user = "RSCloud",
15 | password = password)
16 |
17 | # List the tables
18 | DBI::dbListTables(con)
19 |
20 | ##### USE DATABASE HERE #######
21 |
22 | library(dbplyr)
23 | library(dplyr)
24 | d1 <- tbl(con, "Notes_Friends")
25 | d2 <- tbl(con, "Notes_Pets")
26 |
27 | # See:
28 | # https://dbplyr.tidyverse.org/
29 |
30 | ##############################
31 |
32 | # Disconnect
33 | DBI::dbDisconnect(con)
34 |
--------------------------------------------------------------------------------
/R/example-R.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/example-R.zip
--------------------------------------------------------------------------------
/R/examples/bioinformatics/analysis.R:
--------------------------------------------------------------------------------
1 | # This script will give an example analysis:
2 | # * How many times have we seen this variation before?
3 | # * Analyse by patient info (e.g., ethnicity)
--------------------------------------------------------------------------------
/R/examples/bioinformatics/app.R:
--------------------------------------------------------------------------------
1 | # This script will create a Shiny app that
2 | # enables users to work with the database safely.
--------------------------------------------------------------------------------
/R/examples/bioinformatics/automation.R:
--------------------------------------------------------------------------------
1 | # This script will read all csvs in data/new_records and then
2 | # insert the data into the variations database.
--------------------------------------------------------------------------------
/R/examples/bioinformatics/create_variants_database.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE variant (
2 | name text NOT NULL,
3 | variant_id integer PRIMARY KEY AUTOINCREMENT,
4 | change_from text NOT NULL,
5 | change_to text NOT NULL,
6 | position integer NOT NULL
7 | );
8 |
9 | -- -- TODO: See email for update:
10 | -- CREATE TABLE variant (
11 | -- name text NOT NULL,
12 | -- variant_id integer PRIMARY KEY AUTOINCREMENT,
13 | -- chromosome text NOT NULL,
14 | -- position int NOT NULL,
15 | -- reference text NOT NULL,
16 | -- alternate text NOT NULL,
17 | -- );
18 |
19 | WHERE chromosome = '' AND
20 |
21 | INSERT INTO variant
22 | (NAME, variant_ID, change_from, change_to, position)
23 | VALUES
24 | ('E1038G', 1, 'E', 'G', '1038'),
25 | ('D835Y', 2, 'D', 'Y', '835'),
26 | ('R396C', 3, 'R', 'C', '396'),
27 | ('V617F', 4, 'V', 'F', '617'),
28 | ('K57N', 5, 'K', 'N', '57'),
29 | ('P2514R', 6, 'P', 'R', '2514'),
30 | ('E542K', 7, 'E', 'K', '542'),
31 | ('R130G', 8, 'R', 'G', '130'),
32 | ('R661W', 9, 'R', 'W', '661'),
33 | ('R361H', 10, 'R', 'H', '361'),
34 | ('N496K', 11, 'N', 'K', '496'),
35 | ('N1020Y', 12, 'N', 'Y', '1020'),
36 | ('Y640F', 13, 'Y', 'F', '640'),
37 | ('G1664R', 14, 'G', 'R', '1664'),
38 | ('R537P', 15, 'R', 'P', '537'),
39 | ('R175H', 16, 'R', 'H', '175'),
40 | ('R167W', 17, 'R', 'W', '167'),
41 | ('R228H', 18, 'R', 'H', '228'),
42 | ('R188H', 19, 'R', 'H', '188'),
43 | ('R622W', 20, 'R', 'W', '622'),
44 | ('R623W', 21, 'R', 'W', '622'); -- not referenced
45 |
46 | CREATE TABLE Gene (
47 | name text NOT NULL,
48 | gene_id integer PRIMARY KEY AUTOINCREMENT
49 | );
50 |
51 | INSERT INTO Gene
52 | (NAME, GENE_ID)
53 | VALUES
54 | ('BRCA1', 1),
55 | ('FLT3', 2),
56 | ('GATA2', 3),
57 | ('JAK2', 4),
58 | ('MAP2K1', 5),
59 | ('NOTCH1', 6),
60 | ('PIK3CA', 7),
61 | ('PTEN', 8),
62 | ('RB1', 9),
63 | ('SMAD4', 10),
64 | ('SMC1A', 11),
65 | ('SMC3', 12),
66 | ('STAT3', 13),
67 | ('TET2', 14),
68 | ('TGFBR2', 15),
69 | ('TP53', 16),
70 | ('VHL', 17),
71 | ('XPA', 18),
72 | ('XRCC2', 19),
73 | ('ZFHX3', 20),
74 | ('STAT4', 21), -- not referenced
75 | ('P53', 22), -- not referenced
76 | ('PT E N', 23); -- not referenced
77 |
78 | CREATE TABLE Patient(
79 | patient_id integer PRIMARY KEY AUTOINCREMENT,
80 | cancer_type text,
81 | ethnicity text
82 | );
83 |
84 | INSERT INTO Patient
85 | (PATIENT_ID, CANCER_TYPE, ETHNICITY)
86 | VALUES
87 | (1, 'Breast', 'Asian'),
88 | (2, 'Ovarian', 'Caucasian'),
89 | (3, 'Acute myeloid leukemia', 'African American'),
90 | (4, 'Myelodysplastic syndrome', 'Hispanic'),
91 | (5, 'Myeloproliferative neoplasm', 'Caucasian'),
92 | (6, 'Colorectal', 'African American'),
93 | (7, 'T-cell acute lymphoblastic leukemia', 'Hispanic'),
94 | (8, 'Lung cancer', 'Caucasian'),
95 | (9, 'Prostate cancer', 'Asian'),
96 | (10, 'Skin cancer', 'Caucasian'),
97 | (11, 'Retinoblastoma', 'African American'),
98 | (12, 'Pancreatic cancer', 'Hispanic'),
99 | (13, 'Cornelia de Lange syndrome', 'Caucasian'),
100 | (14, 'Colorectal', 'Asian'),
101 | (15, 'Lymphoma African', 'American'),
102 | (16, 'Myelodysplastic syndrome', 'Hispanic'),
103 | (17, 'Hereditary nonpolyposis colorectal cancer', 'Caucasian'),
104 | (18, 'Breast cancer', 'African American' ),
105 | (19, 'Renal cell carcinoma', 'Hispanic' ),
106 | (20, 'Skin cancer', 'Caucasian' ),
107 | (21, 'Breast cancer', 'Asian' ),
108 | (22, 'Schizophrenia', 'Caucasian' ),
109 | (23, 'Lung Cancer', 'Asian' ),
110 | (24, 'Lung Cancer', 'African' ),
111 | (25, 'Breast Cancer', 'Caucasian' ),
112 | (26, 'Breast Cancer', 'Asian' ),
113 | (27, 'Lung Cancer', 'Caucasian' ),
114 | (28, 'Lung Cancer', 'African' ),
115 | (29, 'Prostate', 'Caucasian' ),
116 | (30, 'Prostate Cancer', 'Asian' ),
117 | (31, 'Lung Cancer', 'Hispanic' ),
118 | (32, 'Lung Cancer', 'Asian' ),
119 | (33, 'Lung', 'African' ),
120 | (34, 'Lung', 'Caucasian' ),
121 | (35, 'Breast Cancer', 'Caucasian' ),
122 | (36, 'Breast Cancer', 'Asian' ),
123 | (37, 'Leukemia', 'Caucasian' ),
124 | (38, 'Colorectal Cancer', 'Asian' ),
125 | (39, 'Colorectal Cancer', 'African' ),
126 | (40, 'Prostate Cancer', 'African' ),
127 | (41, 'Retinoblastoma', 'Caucasian' ),
128 | (42, 'Pancreatic Cancer', 'Hispanic' ),
129 | (43, 'Ovarian Cancer', 'African' ),
130 | (44, 'Ovarian', 'Caucasian' ),
131 | (45, 'Leukemia', 'African' ),
132 | (46, 'Myelodysplastic Syndrome', 'African'),
133 | (47, 'Myelodysplastic Syndrome', 'African'); -- not referenced
134 |
135 | CREATE TABLE Gene_variant (
136 | gene_variant_id integer PRIMARY KEY AUTOINCREMENT,
137 | gene_id text NOT NULL,
138 | variant_id text NOT NULL,
139 | pop_freq numeric,
140 | comment text,
141 | FOREIGN KEY (gene_id) REFERENCES Gene (gene_id),
142 | FOREIGN KEY (variant_id) REFERENCES variant (variant_id),
143 | UNIQUE(gene_id, variant_id)
144 | );
145 |
146 | INSERT INTO Gene_variant
147 | (GENE_variant_ID, GENE_ID, variant_ID, POP_FREQ, COMMENT)
148 | VALUES
149 | (1, 1, 1, 0.01, 'Associated with increased risk of breast and ovarian cancer'),
150 | (2, 2, 2, 0.02, 'Associated with poor prognosis in acute myeloid leukemia patients'),
151 | (3, 3, 3, 0.05, 'Associated with susceptibility to myelodysplastic syndrome'),
152 | (4, 4, 4, 0.02, 'Found in many cases of myeloproliferative neoplasms'),
153 | (5, 5, 5, 0.1, 'Associated with increased risk of colorectal cancer'),
154 | (6, 6, 6, 0.07, 'Found in many cases of T-cell acute lymphoblastic leukemia'),
155 | (7, 7, 7, 0.03, 'Frequently mutated in various cancers, including breast, colon, and lung'),
156 | (8, 8, 8, 0.08, 'Associated with increased risk of multiple types of cancer'),
157 | (9, 9, 9, 0.04, 'Frequently mutated in various types of cancer, including retinoblastoma'),
158 | (10, 10, 10, 0.01, 'Associated with increased risk of pancreatic cancer'),
159 | (11, 11, 11, 0.02, 'Associated with increased risk of Cornelia de Lange syndrome'),
160 | (12, 12, 12, 0.09, 'Associated with increased risk of colorectal cancer'),
161 | (13, 13, 13, 0.05, 'Frequently mutated in various types of cancer, including lymphoma and leukemia'),
162 | (14, 14, 14, 0.03, 'Frequently mutated in myelodysplastic syndromes and acute myeloid leukemia'),
163 | (15, 15, 15, 0.01, 'Associated with increased risk of hereditary nonpolyposis colorectal cancer'),
164 | (16, 16, 16, 0.02, 'Associated with increased risk of many types of cancer, including breast and colon'),
165 | (17, 17, 17, 0.05, 'Associated with increased risk of renal cell carcinoma'),
166 | (18, 18, 18, 0.06, 'Associated with increased risk of skin cancer and xeroderma pigmentosum'),
167 | (19, 19, 19, 0.08, 'Associated with increased risk of breast cancer and ovarian cancer'),
168 | (20, 20, 20, 0.02, 'Associated with increased risk of schizophrenia and bipolar disorder'),
169 | (21, 01, 10, 0.01, 'A study found that individuals with this variant had a higher risk of developing breast and ovarian cancer.'),
170 | (22, 01, 02, 0.03, 'This variant has been found to be associated with increased sensitivity to chemotherapy.'),
171 | (23, 01, 03, 0.02, NULL),
172 | (24, 02, 01, 0.04, 'Individuals with this variant have an increased risk of developing acute myeloid leukemia.'),
173 | (25, 02, 20, 0.06, 'A study found that this variant is associated with a higher response rate to FLT3 inhibitors in patients with acute myeloid leukemia.'),
174 | (26, 03, 01, 0.09, 'This variant has been found to be associated with an increased risk of developing myelodysplastic syndrome.'),
175 | (27, 04, 05, 0.05, 'This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.'),
176 | (28, 05, 06, 0.07, 'A study found that individuals with this variant had a higher risk of developing colorectal cancer.'),
177 | (29, 05, 07, 0.02, 'This variant has been found to be associated with a poorer response to chemotherapy in patients with colorectal cancer.'),
178 | (30, 06, 04, 0.01, 'This variant has been found to be associated with a poorer prognosis in patients with T-cell acute lymphoblastic leukemia.'),
179 | (31, 07, 09, 0.08, 'This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.'),
180 | (32, 07, 03, 0.06, 'This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.'),
181 | (33, 08, 10, 0.02, 'A study found that individuals with this variant had a higher risk of developing prostate cancer.'),
182 | (34, 09, 11, 0.03, 'This variant is commonly found in patients with retinoblastoma and is associated with a higher risk of developing secondary cancers.'),
183 | (35, 10, 19, 0.04, 'This variant has been found to be associated with a higher risk of developing pancreatic cancer.'),
184 | (36, 11, 10, 0.05, 'This variant is commonly found in patients with Cornelia de Lange syndrome and is associated with a more severe phenotype.'),
185 | (37, 12, 03, 0.07, 'This variant is commonly found in patients with colorectal cancer and is associated with a poorer prognosis.'),
186 | (38, 13, 04, 0.01, 'A study found that individuals with this variant had a higher risk of developing lymphoma.'),
187 | (39, 14, 12, 0.02, 'This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.');
188 |
189 | CREATE TABLE Patient_Gene_variant(
190 | patient_id integer,
191 | gene_variant_id integer,
192 | vaf numeric,
193 | FOREIGN KEY (gene_variant_id) REFERENCES Gene_variant (gene_variant_id),
194 | FOREIGN KEY (patient_id) REFERENCES Patient (patient_id),
195 | PRIMARY KEY (patient_id, gene_variant_id)
196 | );
197 |
198 | INSERT INTO Patient_Gene_variant
199 | (PATIENT_ID, GENE_variant_ID, vaf)
200 | VALUES
201 | (1, 1 , 0.35),
202 | (2, 1 , 0.48),
203 | (3, 2 , 0.15),
204 | (4, 3 , 0.22),
205 | (5, 4 , 0.67),
206 | (6, 5 , 0.41),
207 | (7, 6 , 0.58),
208 | (8, 7 , 0.72),
209 | (9, 7 , 0.31),
210 | (10, 8 , 0.44),
211 | (11, 9 , 0.27),
212 | (12, 10, 0.39),
213 | (13, 11, 0.18),
214 | (14, 12, 0.51),
215 | (15, 13, 0.62),
216 | (16, 14, 0.29),
217 | (17, 15, 0.76),
218 | (18, 16, 0.47),
219 | (19, 17, 0.53),
220 | (20, 18, 0.61),
221 | (21, 19, 0.36),
222 | (22, 20, 0.12),
223 | (23, 21, 0.63),
224 | (24, 1 , 0.21),
225 | (25, 1 , 0.48),
226 | (26, 22, 0.36),
227 | (27, 22, 0.42),
228 | (28, 22, 0.18),
229 | (29, 23, 0.75),
230 | (30, 24, 0.31),
231 | (31, 25, 0.28),
232 | (32, 2 , 0.59),
233 | (33, 26, 0.14),
234 | (34, 27, 0.67),
235 | (35, 28, 0.51),
236 | (36, 29, 0.42),
237 | (37, 30, 0.23),
238 | (38, 31, 0.57),
239 | (39, 32, 0.41),
240 | (40, 33, 0.62),
241 | (41, 34, 0.28),
242 | (42, 35, 0.39),
243 | (43, 36, 0.18),
244 | (44, 37, 0.35),
245 | (45, 38, 0.49),
246 | (46, 39, 0.27),
247 | (5, 05, 0.62),
248 | (6, 10, 0.29),
249 | (7, 15, 0.76),
250 | (8, 20, 0.47),
251 | (9, 25, 0.53),
252 | (10, 24, 0.61),
253 | (11, 24, 0.36),
254 | (12, 24, 0.12),
255 | (13, 1 , 0.63),
256 | (26, 1 , 0.21),
257 | (27, 1 , 0.48),
258 | (28, 23, 0.12),
259 | (29, 39, 0.63),
260 | (30, 22, 0.21),
261 | (31, 31, 0.48),
262 | (43, 12, 0.36),
263 | (44, 9 , 0.42),
264 | (45, 8 , 0.18),
265 | (6, 6 , 0.75),
266 | (7, 7 , 0.31),
267 | (8, 21, 0.28),
268 | (9, 22, 0.59),
269 | (10, 23, 0.61),
270 | (11, 27, 0.36),
271 | (12, 28, 0.12),
272 | (28, 31, 0.63),
273 | (29, 32, 0.21),
274 | (30, 33, 0.48),
275 | (31, 3 , 0.36),
276 | (43, 8 , 0.42),
277 | (44, 18, 0.18),
278 | (8, 6 , 0.75),
279 | (9, 4 , 0.31),
280 | (10, 32, 0.15),
281 | (11, 33, 0.03),
282 | (12, 34, 0.37),
283 | (28, 27, 0.25),
284 | (31, 29, 0.58),
285 | (43, 9 , 0.21),
286 | (44, 10, 0.32),
287 | (10, 2 , 0.23),
288 | (11, 1 , 0.19),
289 | (12, 2 , 0.18),
290 | (28, 1 , 0.38),
291 | (31, 5 , 0.37),
292 | (11, 15, 0.18),
293 | (12, 15, 0.26),
294 | (28, 15, 0.48),
295 | (11, 16, 0.45),
296 | (12, 16, 0.28),
297 | (28, 16, 0.26),
298 | (11, 17, 0.55),
299 | (12, 17, 0.34),
300 | (28, 17, 0.43),
301 | (12, 39, 0.42);
--------------------------------------------------------------------------------
/R/examples/bioinformatics/data/new_records/new_records.csv:
--------------------------------------------------------------------------------
1 | NHI,variant_code,gene_name,vaf,pop_freq,tumour_type
2 | NTTTTAX111,E1038G,BRCA1,0.33,0.07,osteosarcoma
3 | NTTTTAX112,D835Y,XRCC2,0.74,0.01,meningioma
4 |
--------------------------------------------------------------------------------
/R/examples/bioinformatics/data/patient_variants.csv:
--------------------------------------------------------------------------------
1 | "","patient_id","gene_name","variant_name","comment","vaf","pop_freq","ethnicity","cancer_type"
2 | "1",1,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.35,0.01,"Asian","Breast"
3 | "2",2,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.48,0.01,"Caucasian","Ovarian"
4 | "3",3,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.15,0.02,"African American","Acute myeloid leukemia"
5 | "4",4,"GATA2","R396C","Associated with susceptibility to myelodysplastic syndrome",0.22,0.05,"Hispanic","Myelodysplastic syndrome"
6 | "5",5,"JAK2","V617F","Found in many cases of myeloproliferative neoplasms",0.67,0.02,"Caucasian","Myeloproliferative neoplasm"
7 | "6",6,"MAP2K1","K57N","Associated with increased risk of colorectal cancer",0.41,0.1,"African American","Colorectal"
8 | "7",7,"NOTCH1","P2514R","Found in many cases of T-cell acute lymphoblastic leukemia",0.58,0.07,"Hispanic","T-cell acute lymphoblastic leukemia"
9 | "8",8,"PIK3CA","E542K","Frequently mutated in various cancers, including breast, colon, and lung",0.72,0.03,"Caucasian","Lung cancer"
10 | "9",9,"PIK3CA","E542K","Frequently mutated in various cancers, including breast, colon, and lung",0.31,0.03,"Asian","Prostate cancer"
11 | "10",10,"PTEN","R130G","Associated with increased risk of multiple types of cancer",0.44,0.08,"Caucasian","Skin cancer"
12 | "11",11,"RB1","R661W","Frequently mutated in various types of cancer, including retinoblastoma",0.27,0.04,"African American","Retinoblastoma"
13 | "12",12,"SMAD4","R361H","Associated with increased risk of pancreatic cancer",0.39,0.01,"Hispanic","Pancreatic cancer"
14 | "13",13,"SMC1A","N496K","Associated with increased risk of Cornelia de Lange syndrome",0.18,0.02,"Caucasian","Cornelia de Lange syndrome"
15 | "14",14,"SMC3","N1020Y","Associated with increased risk of colorectal cancer",0.51,0.09,"Asian","Colorectal"
16 | "15",15,"STAT3","Y640F","Frequently mutated in various types of cancer, including lymphoma and leukemia",0.62,0.05,"American","Lymphoma African"
17 | "16",16,"TET2","G1664R","Frequently mutated in myelodysplastic syndromes and acute myeloid leukemia",0.29,0.03,"Hispanic","Myelodysplastic syndrome"
18 | "17",17,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.76,0.01,"Caucasian","Hereditary nonpolyposis colorectal cancer"
19 | "18",18,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.47,0.02,"African American","Breast cancer"
20 | "19",19,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.53,0.05,"Hispanic","Renal cell carcinoma"
21 | "20",20,"XPA","R228H","Associated with increased risk of skin cancer and xeroderma pigmentosum",0.61,0.06,"Caucasian","Skin cancer"
22 | "21",21,"XRCC2","R188H","Associated with increased risk of breast cancer and ovarian cancer",0.36,0.08,"Asian","Breast cancer"
23 | "22",22,"ZFHX3","R622W","Associated with increased risk of schizophrenia and bipolar disorder",0.12,0.02,"Caucasian","Schizophrenia"
24 | "23",23,"BRCA1","R361H","A study found that individuals with this variant had a higher risk of developing breast and ovarian cancer.",0.63,0.01,"Asian","Lung Cancer"
25 | "24",24,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.21,0.01,"African","Lung Cancer"
26 | "25",25,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.48,0.01,"Caucasian","Breast Cancer"
27 | "26",26,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.36,0.03,"Asian","Breast Cancer"
28 | "27",27,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.42,0.03,"Caucasian","Lung Cancer"
29 | "28",28,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.18,0.03,"African","Lung Cancer"
30 | "29",29,"BRCA1","R396C",NA,0.75,0.02,"Caucasian","Prostate"
31 | "30",30,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.31,0.04,"Asian","Prostate Cancer"
32 | "31",31,"FLT3","R622W","A study found that this variant is associated with a higher response rate to FLT3 inhibitors in patients with acute myeloid leukemia.",0.28,0.06,"Hispanic","Lung Cancer"
33 | "32",32,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.59,0.02,"Asian","Lung Cancer"
34 | "33",33,"GATA2","E1038G","This variant has been found to be associated with an increased risk of developing myelodysplastic syndrome.",0.14,0.09,"African","Lung"
35 | "34",34,"JAK2","K57N","This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.",0.67,0.05,"Caucasian","Lung"
36 | "35",35,"MAP2K1","P2514R","A study found that individuals with this variant had a higher risk of developing colorectal cancer.",0.51,0.07,"Caucasian","Breast Cancer"
37 | "36",36,"MAP2K1","E542K","This variant has been found to be associated with a poorer response to chemotherapy in patients with colorectal cancer.",0.42,0.02,"Asian","Breast Cancer"
38 | "37",37,"NOTCH1","V617F","This variant has been found to be associated with a poorer prognosis in patients with T-cell acute lymphoblastic leukemia.",0.23,0.01,"Caucasian","Leukemia"
39 | "38",38,"PIK3CA","R661W","This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.",0.57,0.08,"Asian","Colorectal Cancer"
40 | "39",39,"PIK3CA","R396C","This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.",0.41,0.06,"African","Colorectal Cancer"
41 | "40",40,"PTEN","R361H","A study found that individuals with this variant had a higher risk of developing prostate cancer.",0.62,0.02,"African","Prostate Cancer"
42 | "41",41,"RB1","N496K","This variant is commonly found in patients with retinoblastoma and is associated with a higher risk of developing secondary cancers.",0.28,0.03,"Caucasian","Retinoblastoma"
43 | "42",42,"SMAD4","R188H","This variant has been found to be associated with a higher risk of developing pancreatic cancer.",0.39,0.04,"Hispanic","Pancreatic Cancer"
44 | "43",43,"SMC1A","R361H","This variant is commonly found in patients with Cornelia de Lange syndrome and is associated with a more severe phenotype.",0.18,0.05,"African","Ovarian Cancer"
45 | "44",44,"SMC3","R396C","This variant is commonly found in patients with colorectal cancer and is associated with a poorer prognosis.",0.35,0.07,"Caucasian","Ovarian"
46 | "45",45,"STAT3","V617F","A study found that individuals with this variant had a higher risk of developing lymphoma.",0.49,0.01,"African","Leukemia"
47 | "46",46,"TET2","N1020Y","This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.",0.27,0.02,"African","Myelodysplastic Syndrome"
48 | "47",5,"MAP2K1","K57N","Associated with increased risk of colorectal cancer",0.62,0.1,"Caucasian","Myeloproliferative neoplasm"
49 | "48",6,"SMAD4","R361H","Associated with increased risk of pancreatic cancer",0.29,0.01,"African American","Colorectal"
50 | "49",7,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.76,0.01,"Hispanic","T-cell acute lymphoblastic leukemia"
51 | "50",8,"ZFHX3","R622W","Associated with increased risk of schizophrenia and bipolar disorder",0.47,0.02,"Caucasian","Lung cancer"
52 | "51",9,"FLT3","R622W","A study found that this variant is associated with a higher response rate to FLT3 inhibitors in patients with acute myeloid leukemia.",0.53,0.06,"Asian","Prostate cancer"
53 | "52",10,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.61,0.04,"Caucasian","Skin cancer"
54 | "53",11,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.36,0.04,"African American","Retinoblastoma"
55 | "54",12,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.12,0.04,"Hispanic","Pancreatic cancer"
56 | "55",13,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.63,0.01,"Caucasian","Cornelia de Lange syndrome"
57 | "56",26,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.21,0.01,"Asian","Breast Cancer"
58 | "57",27,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.48,0.01,"Caucasian","Lung Cancer"
59 | "58",28,"BRCA1","R396C",NA,0.12,0.02,"African","Lung Cancer"
60 | "59",29,"TET2","N1020Y","This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.",0.63,0.02,"Caucasian","Prostate"
61 | "60",30,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.21,0.03,"Asian","Prostate Cancer"
62 | "61",31,"PIK3CA","R661W","This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.",0.48,0.08,"Hispanic","Lung Cancer"
63 | "62",43,"SMC3","N1020Y","Associated with increased risk of colorectal cancer",0.36,0.09,"African","Ovarian Cancer"
64 | "63",44,"RB1","R661W","Frequently mutated in various types of cancer, including retinoblastoma",0.42,0.04,"Caucasian","Ovarian"
65 | "64",45,"PTEN","R130G","Associated with increased risk of multiple types of cancer",0.18,0.08,"African","Leukemia"
66 | "65",6,"NOTCH1","P2514R","Found in many cases of T-cell acute lymphoblastic leukemia",0.75,0.07,"African American","Colorectal"
67 | "66",7,"PIK3CA","E542K","Frequently mutated in various cancers, including breast, colon, and lung",0.31,0.03,"Hispanic","T-cell acute lymphoblastic leukemia"
68 | "67",8,"BRCA1","R361H","A study found that individuals with this variant had a higher risk of developing breast and ovarian cancer.",0.28,0.01,"Caucasian","Lung cancer"
69 | "68",9,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.59,0.03,"Asian","Prostate cancer"
70 | "69",10,"BRCA1","R396C",NA,0.61,0.02,"Caucasian","Skin cancer"
71 | "70",11,"JAK2","K57N","This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.",0.36,0.05,"African American","Retinoblastoma"
72 | "71",12,"MAP2K1","P2514R","A study found that individuals with this variant had a higher risk of developing colorectal cancer.",0.12,0.07,"Hispanic","Pancreatic cancer"
73 | "72",28,"PIK3CA","R661W","This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.",0.63,0.08,"African","Lung Cancer"
74 | "73",29,"PIK3CA","R396C","This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.",0.21,0.06,"Caucasian","Prostate"
75 | "74",30,"PTEN","R361H","A study found that individuals with this variant had a higher risk of developing prostate cancer.",0.48,0.02,"Asian","Prostate Cancer"
76 | "75",31,"GATA2","R396C","Associated with susceptibility to myelodysplastic syndrome",0.36,0.05,"Hispanic","Lung Cancer"
77 | "76",43,"PTEN","R130G","Associated with increased risk of multiple types of cancer",0.42,0.08,"African","Ovarian Cancer"
78 | "77",44,"XPA","R228H","Associated with increased risk of skin cancer and xeroderma pigmentosum",0.18,0.06,"Caucasian","Ovarian"
79 | "78",8,"NOTCH1","P2514R","Found in many cases of T-cell acute lymphoblastic leukemia",0.75,0.07,"Caucasian","Lung cancer"
80 | "79",9,"JAK2","V617F","Found in many cases of myeloproliferative neoplasms",0.31,0.02,"Asian","Prostate cancer"
81 | "80",10,"PIK3CA","R396C","This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.",0.15,0.06,"Caucasian","Skin cancer"
82 | "81",11,"PTEN","R361H","A study found that individuals with this variant had a higher risk of developing prostate cancer.",0.03,0.02,"African American","Retinoblastoma"
83 | "82",12,"RB1","N496K","This variant is commonly found in patients with retinoblastoma and is associated with a higher risk of developing secondary cancers.",0.37,0.03,"Hispanic","Pancreatic cancer"
84 | "83",28,"JAK2","K57N","This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.",0.25,0.05,"African","Lung Cancer"
85 | "84",31,"MAP2K1","E542K","This variant has been found to be associated with a poorer response to chemotherapy in patients with colorectal cancer.",0.58,0.02,"Hispanic","Lung Cancer"
86 | "85",43,"RB1","R661W","Frequently mutated in various types of cancer, including retinoblastoma",0.21,0.04,"African","Ovarian Cancer"
87 | "86",44,"SMAD4","R361H","Associated with increased risk of pancreatic cancer",0.32,0.01,"Caucasian","Ovarian"
88 | "87",10,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.23,0.02,"Caucasian","Skin cancer"
89 | "88",11,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.19,0.01,"African American","Retinoblastoma"
90 | "89",12,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.18,0.02,"Hispanic","Pancreatic cancer"
91 | "90",28,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.38,0.01,"African","Lung Cancer"
92 | "91",31,"MAP2K1","K57N","Associated with increased risk of colorectal cancer",0.37,0.1,"Hispanic","Lung Cancer"
93 | "92",11,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.18,0.01,"African American","Retinoblastoma"
94 | "93",12,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.26,0.01,"Hispanic","Pancreatic cancer"
95 | "94",28,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.48,0.01,"African","Lung Cancer"
96 | "95",11,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.45,0.02,"African American","Retinoblastoma"
97 | "96",12,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.28,0.02,"Hispanic","Pancreatic cancer"
98 | "97",28,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.26,0.02,"African","Lung Cancer"
99 | "98",11,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.55,0.05,"African American","Retinoblastoma"
100 | "99",12,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.34,0.05,"Hispanic","Pancreatic cancer"
101 | "100",28,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.43,0.05,"African","Lung Cancer"
102 | "101",12,"TET2","N1020Y","This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.",0.42,0.02,"Hispanic","Pancreatic cancer"
103 | "102",47,NA,NA,NA,NA,NA,"African","Myelodysplastic Syndrome"
104 |
--------------------------------------------------------------------------------
/R/examples/bioinformatics/data/variants.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/examples/bioinformatics/data/variants.sqlite
--------------------------------------------------------------------------------
/R/examples/bioinformatics/data/variants_dodgy.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/examples/bioinformatics/data/variants_dodgy.sqlite
--------------------------------------------------------------------------------
/R/examples/bioinformatics/functions.R:
--------------------------------------------------------------------------------
1 | # -----------------------
2 | #
3 | # Author: Daniel Fryer
4 | # Date: 2023-02-23
5 | #
6 | # Helper functions for bioinformatics example
7 | #
8 | # -----------------------
9 |
10 |
11 | # create_dummy_database
12 | #
13 | # This fuction loads '.sql' file and then executes
14 | # every statement in the file, on the sql database.
15 | #
16 | # It breaks the script up and executes one statement
17 | # at a time because dbExecute is unable to
18 | # execute multiple stataments.
19 | #
20 | execute_sql_script <- function(con, file) {
21 | sql <- readr::read_file(file)
22 | sql <- glue::glue_collapse(sql)
23 | sql <- strsplit(sql, ";")
24 | lapply(
25 | sql[[1]],
26 | function(s) {DBI::dbExecute(con, s)}
27 | )
28 | }
29 |
30 |
31 | # split_variant_name
32 | #
33 | # The first letter of a variant name refers to the amino acid that is normally
34 | # at the position within the gene.
35 | #
36 | # Sandwiched between the first and last letter is a number.
37 | # This number is the position within the gene.
38 | #
39 | # The last letter of a variant name refers to the amino acid resulting from
40 | # the change.
41 | #
42 | split_variant_name <- function(variant_name) {
43 | first_letter <- stringr::str_sub(variant_name, 1, 1)
44 | last_letter <- stringr::str_sub(variant_name, - 1, - 1)
45 | middle_part <- stringr::str_sub(variant_name, 2, -2)
46 | return(
47 | data.frame(change_from = first_letter,
48 | position = as.integer(middle_part),
49 | change_to = last_letter)
50 | )
51 | }
52 |
53 |
54 | # find_gene_variant_by_name
55 | #
56 | # check if a gene_variant is already in the database, searching
57 | # only by gene name and variant name
58 | #
59 | # if it is present, return details from the database=
60 | # otherwise, return an empty table
61 | #
62 | find_gene_variant_by_name <- function(con, gene_name, variant_name) {
63 |
64 | query <- glue::glue("
65 | SELECT G.name AS gene_name,
66 | V.name AS variant_name,
67 | GV.pop_freq, GV.comment,
68 | G.gene_id, V.variant_id, GV.gene_variant_id
69 | FROM gene_variant GV
70 | JOIN gene G ON GV.gene_id = G.gene_id
71 | JOIN variant V ON GV.variant_id = V.variant_id
72 | WHERE G.name = '{gene_name}' AND V.name = '{variant_name}'
73 | ")
74 | return(DBI::dbGetQuery(con, query))
75 | }
76 |
77 |
78 | # find_gene_variant_by_name
79 | #
80 | # check if a gene_variant is already in the database, searching
81 | # only by gene_id and variant_id
82 | #
83 | # if it is present, return details from the database=
84 | # otherwise, return an empty table
85 | #
86 | find_gene_variant <- function(con, gene_id, variant_id) {
87 |
88 | query <- glue::glue("
89 | SELECT G.name AS gene_name,
90 | V.name AS variant_name,
91 | GV.pop_freq, GV.comment,
92 | G.gene_id, V.variant_id, GV.gene_variant_id
93 | FROM gene_variant GV
94 | JOIN gene G ON GV.gene_id = G.gene_id
95 | JOIN variant V ON GV.variant_id = V.variant_id
96 | WHERE G.name = '{gene_id}' AND V.name = '{variant_id}'
97 | ")
98 | return(DBI::dbGetQuery(con, query))
99 | }
100 |
--------------------------------------------------------------------------------
/R/examples/bioinformatics/start_here.R:
--------------------------------------------------------------------------------
1 | # -----------------------
2 | #
3 | # Author: Daniel Fryer
4 | # Date: 2023-02-23
5 | #
6 | # This script creates two sqlite databases:
7 | #
8 | # 1. variants
9 | # 2. variants_dodgy
10 | #
11 | # Database 1 (variants) contains 4 tables:
12 | # GENE, VARIANT, COMBINATION and PATIENT.
13 | #
14 | # Database 2 (variants_dodgy) contains 1 table:
15 | # PATIENT_VARIANT
16 | #
17 | # In database 1, we use primary/foreign key pairs
18 | # to specify relationships between tables.
19 | #
20 | # In database 2, we avoid primary/foreign keys,
21 | # and just insert everything into one table.
22 | #
23 | # The same dummy data is used in both databases.
24 | #
25 | # Database 1 (variants) is then
26 | # compared to database 2 (variants_dodgy).
27 | #
28 | # The comparison highlights why we prefer
29 | # database 1 over database 2.
30 | #
31 | # CAUTION:
32 | # A lot of the data for this exercise was generated by ChatGPT.
33 | # It is NOT a real dataset.
34 | #
35 | # -----------------------
36 |
37 | # See RSQLite tutorial here:
38 | # https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html
39 |
40 | install.packages("RSQLite")
41 | library(RSQLite)
42 |
43 | # We will also use these libraries (some are used by functions in functions.R)
44 | library(readr)
45 | library(glue)
46 | library(tibble)
47 | library(dplyr)
48 | library(magrittr)
49 |
50 | # We will use these helper functions too
51 | source("functions.R")
52 |
53 | # -------------------------------------------------------------------------
54 |
55 | # create an empty database and connect to it
56 | con <- DBI::dbConnect(RSQLite::SQLite(), "data/variants.sqlite")
57 |
58 | # execute the script create_variants_database.sql
59 | # NOTE: if the database already exists, this will produce an error.
60 | # you may want to delete data/variants.sqlite first.
61 | execute_sql_script(con, "create_variants_database.sql")
62 |
63 | # list the table name
64 | DBI::dbListTables(con)
65 |
66 | # take a look at the Gene table for example
67 | gene <- dplyr::tbl(con, "Gene")
68 | gene <- gene %>% dplyr::collect()
69 |
70 | # This joins all the tables in the variants database
71 | query <- '
72 | SELECT P.patient_id,
73 | G.name AS gene_name, V.name AS variant_name,
74 | GV.comment,
75 | PGV.vaf, GV.pop_freq,
76 | P.ethnicity, P.cancer_type
77 | FROM Patient_Gene_variant PGV
78 | RIGHT JOIN Gene_variant GV ON PGV.gene_variant_id = GV.gene_variant_id
79 | RIGHT JOIN Gene G ON GV.gene_id = G.gene_id
80 | RIGHT JOIN Variant V ON GV.variant_id = V.variant_id
81 | RIGHT JOIN Patient P ON PGV.patient_id = P.patient_id
82 | '
83 | one_table <- DBI::dbGetQuery(con, query)
84 |
85 | # disconnect from the variants database
86 | DBI::dbDisconnect(con)
87 |
88 | # create an empty variants_dodgy database
89 | con <- DBI::dbConnect(RSQLite::SQLite(), "data/variants_dodgy.sqlite")
90 |
91 | # add table to the database
92 | DBI::dbWriteTable(con, "patient_variants", one_table)
93 |
94 | # disconnect from the variants_dodgy database
95 | DBI::dbDisconnect(con)
96 |
97 | # write the patient_variants table as a csv
98 | write.csv(one_table, "data/patient_variants.csv")
99 |
100 | # Example -----------------------------------------------------------------
101 |
102 | con1 <- DBI::dbConnect(RSQLite::SQLite(), "data/variants_dodgy.sqlite")
103 | con2 <- DBI::dbConnect(RSQLite::SQLite(), "data/variants.sqlite")
104 |
105 | # The variants (like G13D) always have pattern .
106 | # Final all the variants starting with D. Count the number of times a patient
107 | # has had that variant.
108 |
109 | # query1 and query2 both achieve the same thing, just on different database
110 | # structures.
111 |
112 | # This one uses variants_dodgy database. It makes use of LIKE
113 | query1 <- "
114 | SELECT COUNT(*) AS num
115 | FROM patient_variants
116 | WHERE variant_name LIKE 'D%'
117 | "
118 |
119 | # # This one uses variants database. It makes use of JOIN and '='
120 | query2 <- "
121 | SELECT COUNT(*) AS num
122 | FROM Patient_Gene_variant PGV
123 | JOIN Gene_Variant GV ON PGV.gene_variant_id = GV.gene_variant_id
124 | JOIN Variant V ON GV.variant_id = V.variant_id
125 | WHERE V.change_from = 'D'
126 | "
127 |
128 | result1 <- DBI::dbGetQuery(con1, query1)
129 | result2 <- DBI::dbGetQuery(con2, query2)
130 |
131 | DBI::dbDisconnect(con1)
132 | DBI::dbDisconnect(con2)
133 |
134 | # Auto-generated primary key (AUTOINCREMENT) -------------------------------
135 |
136 | # The tables are set up so that they automatically
137 | # generate a primary key if we don't insert one explicitly
138 | con <- DBI::dbConnect(RSQLite::SQLite(), "data/variants.sqlite")
139 | p <- data.frame(ethnicity = "TEST")
140 | DBI::dbAppendTable(con, "Patient", p) # success!
141 |
142 | # We can retrieve the patient_id that was generated, and use it to find the patient record
143 | auto_patient_id <- DBI::dbGetQuery(con, "SELECT last_insert_rowid()")$`last_insert_rowid()`
144 | DBI::dbGetQuery(con, glue::glue(
145 | "SELECT * FROM Patient WHERE patient_id = {auto_patient_id}"))
146 |
147 | DBI::dbDisconnect(con)
148 |
149 | # the columns of the new data are:
150 | # NHI, variant_code, gene_name, vaf, pop_freq, tumour_type
151 |
152 | new_records <- read.csv("data/new_records.csv")
153 |
154 | # no ethnicity (NULL)
155 | new_patients <- data.frame(patient_id = new_records$NHI,
156 | cancer_type = new_records$tumour_type)
157 |
158 | # TODO: check if the patient is in the table
159 | # TODO: if new patient then insert row, retrieve patient_id
160 |
161 | # no variant_id
162 | new_variants <- data.frame(name = new_records$variant_code)
163 | new_variants <- cbind(new_variants, split_variant_name(new_variants$name))
164 |
165 | # TODO: check if the variant is in the table
166 | # TODO: if new variant then insert row, retrieve the generated variant_id
167 |
168 | # no gene_id
169 | new_genes <- data.frame(name = new_records$gene_name)
170 |
171 | # TODO: check if the gene is in the table
172 | # TODO: if new gene then insert row, retrieve the generated gene_id
173 |
174 | # no gene_variant_id, gene_id or variant_id
175 | new_gene_variants <- data.frame(pop_freq = new_records$pop_freq)
176 |
177 | # TODO: use gene_id and variant_id in find_gene_variant() to check if the gene_variant is in the table
178 | # TODO: if new gene_variant then insert row, retrieve the gene_variant_id
179 |
180 | # no gene_variant_id
181 | new_patient_gene_variants <- data.frame(patient_id = new_records$NHI,
182 | vaf = new_records$vaf)
183 |
184 | # TODO: if it's an existing patient, check if they already have that gene_variant recorded
185 | # TODO: if new patient_gene_variant then insert row
186 |
187 | # Exploring ---------------------------------------------------------------
188 |
189 | # # convert one_table to a tibble and arrange by patient_id
190 | # one_table <- one_table %>%
191 | # tibble::as_tibble() %>%
192 | # dplyr::arrange(patient_id)
193 | #
194 | # # try out the function for splitting variant names
195 | # split_variant_name(one_table$variant_name)
196 |
197 | # Invalid missense variant nomenclature examples for G13D:
198 | # '13D', 'G 13 D', '13>GD', etc
199 | # 'KRAS: c.38G>A','p.Gly13Asp'
200 |
201 | # Invalid gene nomenclature examples for KRAS:
202 | # 'C-K-RAS', 'CFC2', 'K-RAS2A', 'K-RAS2B'
203 |
204 | # vaf:
205 | # the proportion of DNA molecules in a
206 | # sample that carry a specific genetic variant.
207 |
208 | # pop_freq:
209 | # proportion of individuals in a given population
210 | # who carry a specific genetic variant.
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
--------------------------------------------------------------------------------
/R/intro-to-R.R:
--------------------------------------------------------------------------------
1 | message("
2 | This R script aims to get you acquainted with R for the first time.
3 |
4 | Prerequisites are that you:
5 |
6 | * have R and RStudio installed
7 | * know how to execute code in R
8 | * know what a script is, and how to execute chunks of code from a script
9 |
10 | If you are a student in my course, feel free to contact me any time for help!
11 |
12 | Each comment below explains what the code underneath it is doing.
13 |
14 | Read each comment, and then execute the code below the comment (in RStudio).
15 |
16 | ")
17 |
18 | # Step 1: install the Swirl R package
19 | install.packages("swirl")
20 |
21 | # Step 2: load the swirl library
22 | library(swirl)
23 |
24 | # Step 3: run the swirl tutorial
25 | swirl()
26 |
27 | # Done! Now, move your cursor down into the console (bottom left of RStudio)
28 | # and follow the instructions in the console (given by the swirl tutorial).
29 |
30 | # The instructions in the console will let you choose which 'courses' to do.
31 | # Please complete as many courses as you can, but at least complete:
32 | # 1: R Programming: The basics of programming in R.
33 |
34 | # Have fun.
--------------------------------------------------------------------------------
/R/sqlite-R/MyDB.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/sqlite-R/MyDB.sqlite
--------------------------------------------------------------------------------
/R/sqlite-R/README.md:
--------------------------------------------------------------------------------
1 | # Using SQLite in R
2 |
3 | This folder contains an R script and practice database to guide you through using SQLite in R.
4 |
5 | To download the scripts, you can right-click (or control-click) the links below, and then click "save link as".
6 |
7 | * [Click here for the R script](https://github.com/frycast/SQL_course/raw/master/R/sqlite-R/sqlite.R)
8 | * [Click here for the Sandpit practice database file](https://github.com/frycast/SQL_course/raw/master/R/sqlite-R/Sandpit.sqlite)
9 |
--------------------------------------------------------------------------------
/R/sqlite-R/Sandpit.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/sqlite-R/Sandpit.sqlite
--------------------------------------------------------------------------------
/R/sqlite-R/sqlite.R:
--------------------------------------------------------------------------------
1 | # See RSQLite tutorial here:
2 | # https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html
3 |
4 | install.packages("RSQLite")
5 | library(RSQLite)
6 |
7 | # Connect to the Sandpit database -----------------------------------------
8 |
9 | # If the SQLite Sandpit database is not already in your working directory,
10 | # then first download it from the link below,
11 | # and save it in your working directory.
12 | # https://github.com/frycast/SQL_course/raw/master/R/Sandpit.sqlite
13 |
14 | # connect to Sandpit
15 | con <- DBI::dbConnect(RSQLite::SQLite(), "Sandpit.sqlite")
16 |
17 | # List all the tables in Sandpit
18 | DBI::dbListTables(con)
19 |
20 | # disconnect
21 | DBI::dbDisconnect(con)
22 |
23 | # Create a new empty database --------------------------------------------
24 |
25 | # create (or connect to) the database
26 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite")
27 |
28 | # disconnect
29 | DBI::dbDisconnect(con)
30 |
31 | # Save data to a database -----------------------------------------------
32 |
33 | # We'll save the built-in mtcars and iris datasets to a SQLite database
34 | mtcars
35 | iris
36 |
37 | # create (or connect to) the database
38 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite")
39 |
40 | # Write data to the SQLite database
41 | DBI::dbWriteTable(con, "mtcars", mtcars)
42 | DBI::dbWriteTable(con, "iris", iris)
43 |
44 | # View the tables in our database
45 | DBI::dbListTables(con)
46 |
47 | # Connect to one of the tables
48 | mtcars_con <- dplyr::tbl(con, "mtcars")
49 |
50 | # write queries
51 | library(dbplyr)
52 | library(dplyr)
53 |
54 | # query 1
55 | mtcars_q1 <- mtcars_con %>%
56 | dplyr::select(mpg, cyl, disp)
57 |
58 | # query 2
59 | mtcars_q2 <- mtcars_con %>%
60 | dplyr::filter(gear == 4) %>%
61 | dplyr::select(mpg, cyl)
62 |
63 | # query 3
64 | mtcars_q3 <- mtcars_con %>%
65 | dplyr::filter(qsec > 16) %>%
66 | dplyr::group_by(gear) %>%
67 | dplyr::summarise(
68 | mpg_max = max(mpg, na.rm=T),
69 | cyl_mean = mean(cyl, na.rm=T))
70 |
71 | # query 4
72 | mtcars_q4 <- mtcars_con %>%
73 | dplyr::group_by(gear) %>%
74 | dplyr::filter(qsec > 16) %>%
75 | dplyr::summarise(
76 | mpg_max = max(mpg, na.rm=T),
77 | cyl_mean = mean(cyl, na.rm=T)) %>%
78 | dplyr::filter(cyl_mean <= 5)
79 |
80 | # look at the SQL code that dplyr generated
81 | mtcars_q1 %>% show_query()
82 | mtcars_q2 %>% show_query()
83 | mtcars_q3 %>% show_query()
84 | mtcars_q4 %>% show_query() # this one uses a subquery to avoid using HAVING.
85 |
86 | # execute the sql code and retrieve the full results
87 | mtcars_q1 %>% collect()
88 | mtcars_q2 %>% collect()
89 | mtcars_q3 %>% collect()
90 | mtcars_q4 %>% collect()
91 |
92 | # Or we manually write SQL instead!
93 | result <- DBI::dbGetQuery(
94 | con, "
95 | SELECT gear, MAX(mpg) AS mpg_max, AVG(cyl) AS cyl_mean
96 | FROM mtcars
97 | WHERE qsec > 16
98 | GROUP BY gear
99 | HAVING cyl_mean <= 5
100 | ")
101 |
102 | # view the result
103 | result
104 |
105 | # A neat trick is to use string interpolation to edit the queries
106 | # with variables from your R environment:
107 | # See https://stringr.tidyverse.org/reference/str_interp.html
108 | for (next_cyl in c(4,6,8)) {
109 | res <- DBI::dbGetQuery(con, stringr::str_interp("
110 | SELECT *
111 | FROM mtcars
112 | WHERE cyl = ${next_cyl}
113 | "))
114 | cat("\nResults for cyl = ", next_cyl, "\n")
115 | print(res)
116 | }
117 |
118 | # disconnect
119 | DBI::dbDisconnect(con)
120 |
121 | # Side note:
122 | # You may want to browse the database we created, outside of R.
123 | # Assuming the database file ("MyDB.sqlite") is saved on your computer
124 | # you can browse it with DB Browser (a great open-source app).
125 | # Download DB Browser here:
126 | # https://sqlitebrowser.org/dl/
127 |
128 | # After installing DB browser, you may have to choose "open with"
129 | # on the "MyDB.sqlite" file, and then navigate to your programs/apps directory
130 | # to find DB Browser and open it.
131 |
132 | # Batched queries ---------------------------------------------------------
133 |
134 | # create (or connect to) the database
135 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite")
136 |
137 | # Send a query but don't retrieve the results yet
138 | rs <- DBI::dbSendQuery(con, "
139 | SELECT *
140 | FROM mtcars
141 | ")
142 | rs
143 |
144 | # Retrieve the first 3 results
145 | df <- DBI::dbFetch(rs, n = 3)
146 |
147 | # df is a regular data.frame
148 | df
149 | mean(df$mpg)
150 |
151 | # We can see how many results have been retrieved
152 | rs
153 |
154 | # This will return TRUE if all results have been retrieved
155 | DBI::dbHasCompleted(rs)
156 |
157 | # Retrieve the next 3 results
158 | df <- DBI::dbFetch(rs, n = 3)
159 | df
160 |
161 | # Retrieve the rest of the results, 3 at a time
162 | while (!DBI::dbHasCompleted(rs)) {
163 | df <- DBI::dbFetch(rs, n = 3)
164 |
165 | # << insert processing on df here >>
166 |
167 | print(nrow(df))
168 | }
169 |
170 | # Clear/delete the query
171 | DBI::dbClearResult(rs)
172 |
173 | # disconnect
174 | DBI::dbDisconnect(con)
175 |
176 | # Saving CSV files to a database -----------------------------------------
177 |
178 | # create (or connect to) the database
179 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite")
180 |
181 | # First let's make some CSVs to practice with (in the working directory)
182 | write.csv(mtcars, "mtcars_example.csv")
183 | write.csv(iris, "iris_example.csv")
184 |
185 | # Find all the filenames in the working directory
186 | all_filenames <- list.files(".")
187 |
188 | # Find all the filenames that end in ".csv"
189 | csv_filenames <- all_filenames %>% stringr::str_subset(".csv")
190 |
191 | # Loop over the CSVs filenames
192 | for (f in csv_filenames) {
193 |
194 | # Read in the CSV
195 | data <- read.csv(f)
196 |
197 | # Remove ".csv" from the name
198 | table_name <- f %>% stringr::str_remove(".csv")
199 |
200 | # Write the table to our database
201 | RSQLite::dbWriteTable(con, name = table_name, value = data)
202 | }
203 |
204 | # List all the tables in our database
205 | DBI::dbListTables(con)
206 |
207 | # disconnect
208 | DBI::dbDisconnect(con)
209 |
--------------------------------------------------------------------------------
/R/tidy-data.R:
--------------------------------------------------------------------------------
1 | # TODO: Check out https://tidydatatutor.com/
2 |
3 | # To get started, we need the tidyr package
4 | install.packages("tidyr")
5 | library(tidyr)
6 |
7 | # Relig_income data -------------------------------------------------------
8 |
9 | # The package comes with its own datasets to practice on, for example:
10 | relig_income
11 |
12 | # Pivoting the table will make it 'tidy'
13 | pivot_longer(
14 | data=relig_income,
15 | cols=!religion,
16 | names_to = "income",
17 | values_to = "count"
18 | )
19 |
20 | # Non-atomic cell values --------------------------------------------------
21 |
22 | # Create the address data
23 | address_data <- data.frame(
24 | name = c("Raleigh Smith",
25 | "Hoa Pham",
26 | "Ram Singh"),
27 | address = c("109 Tenterfield Rd, North Sydney, NSW, 2060",
28 | "36 Feather St, Kings Beach, QLD, 4551",
29 | "90 Sunnyside Rd, Renmark, SA, 5341")
30 | )
31 |
32 | # Separate the address column
33 | separate(
34 | data = address_data,
35 | col = address,
36 | into = c("street","city","state","postcode"),
37 | sep = ", ")
38 |
39 | # Anscombe's Quartet data -------------------------------------------------
40 |
41 | anscombe_tidy <- pivot_longer(
42 | data = anscombe,
43 | cols = everything(),
44 | names_to = c(".value", "set"),
45 | names_pattern = "(.)(.)"
46 | )
47 |
48 | anscombe_tidy
49 |
50 | ## Figuring out the rest of this example is an advanced exercise
51 | ## (not usually suitable for new R programmers).
52 |
53 | # Plot the data with lines of best fit
54 | library(ggplot2)
55 | anscombe_tidy %>%
56 | ggplot(aes(x = x, y = y)) +
57 | geom_point() +
58 | facet_wrap(~set) +
59 | geom_smooth(method = "lm", se = FALSE) +
60 | theme_minimal()
61 |
62 | # apply linear regression to each model and obtain results for comparison
63 | library(dplyr)
64 | library(map)
65 | library(broom)
66 | library(purrr)
67 | anscombe_long %>%
68 | nest(data = !set) %>%
69 | mutate(
70 | model = map(data, ~lm(y~x, data = .)),
71 | tidied = map(model, tidy),
72 | data = map(model, augment)) %>%
73 | unnest(tidied)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to SQL
2 |
3 | Textbook and course material, for the Introduction to SQL course, taught by [Daniel Fryer](https://danielvfryer.com).
4 |
5 | This repository includes slides for the full 4-day course.
6 |
7 | There is also a 2-day course. If you are attending the 2-day course, we won't cover all of the 4-day material, but you still get access to all of it.
8 |
9 | ## Set up guides
10 |
11 | For this course, you need to choose between T-SQL and MySQL. If you don't know which one you want, then consider this: if you are on Windows, T-SQL is the easiest to set up and use. If you are on macOS, then MySQL is the easiest to set up and use. Please do not leave the set up process to the last minute. Leave some time to iron out difficulties getting set up! Feel free to contact me if you need help. The guides below will help you get set up.
12 |
13 | In my experience, it is best to do this on a personal computer, since some people have experienced difficulty getting set up on a work computer (e.g., if you don't have administrator rights).
14 |
15 | * **Set up your database with T-SQL (easiest for Windows):** [click here](create-database/T-SQL/README.md)
16 | * **Set up your database with MySQL (easiest for macOS):** [click here](create-database/MySQL/README.md)
17 |
18 | ## Textbook and slides
19 |
20 | The textbook and slides are linked below.
21 |
22 | * [Course text (The Necessary SQL, featuring MySQL and T-SQL)](textbook-and-slides/SQL_Course_Textbook.pdf)
23 | * [Day 1 slides](textbook-and-slides/SQL_Course_Slides_Day_1.pdf)
24 | * [Day 2 slides](textbook-and-slides/SQL_Course_Slides_Day_2.pdf)
25 | * [Day 3 slides](textbook-and-slides/SQL_Course_Slides_Day_3.pdf)
26 | * Day 4 slides:
27 | - [Day 4 non-interactive pdf version](textbook-and-slides/SQL_Course_Slides_Day_4.pdf)
28 | - [Day 4 interactive web version](https://rpubs.com/frycast/sql-with-r)
29 |
30 |
31 | ## Beginner R tutorial
32 |
33 | R is covered in 4-day course, though we may have time for some R material during the 2-day course, depending on the audience. For beginner R programmers, day 4 may move a bit fast. This tutorial (by [SwirlStats](https://swirlstats.com/students.html)) will help you get prepared (or brush up). Right-click (or control-click) the link below, and choose 'save link as'. This will download the R script. Once downloaded, open the script in RStudio.
34 |
35 | * [Beginner R tutorial (R script)](https://github.com/frycast/SQL_course/raw/master/R/intro-to-R.R)
36 |
37 | If this is the first time you've used RStudio, I've created a video for you to guide you through using the above script.
38 |
39 | * [Brief intro to RStudio (video)](https://youtu.be/rdcVS7CrWPw)
40 |
41 | To be fully prepared for day 4, I suggest you complete all lessons from Lesson 1 (Basic Building Blocks), to Lesson 9 (Functions), of the Intro to R course, within the Swirl tutorial. You will see these lessons when you start the Swirl tutorial (also explained in the video linked above). I suspect this will take about 1 hour.
42 |
43 | ## IDI Stuff
44 |
45 | If you are working with the New Zealand Integrated Data Infrastructure (4-day course only), you may be interested in the links below.
46 |
47 | * [Day 3 slides include a section on the IDI](textbook-and-slides/SQL_Course_Slides_Day_2.pdf)
48 | * [The 'IDI Stuff' folder includes some useful papers and two data dictionaries](IDI-stuff)
49 | * [Many more data dictionaries on StatsNZ DataInfo+ website](http://datainfoplus.stats.govt.nz/Search?query=idi&search=Search&itemType=4bd6eef6-99df-40e6-9b11-5b8f64e5cb23)
50 | * [A searchable list of variables available in the IDI](https://idi-search.web.app/claims/acc_cla_accident_in_NZ_ind)
51 |
52 |
--------------------------------------------------------------------------------
/create-database/MySQL/MySQL-database.sql:
--------------------------------------------------------------------------------
1 | SET SQL_REQUIRE_PRIMARY_KEY = OFF;
2 |
3 | -- -----------------------------------------------------------------
4 |
5 | -- Practice data for the Intro to SQL Course by Daniel Fryer.
6 |
7 | -- -----------------------------------------------------------------
8 |
9 | -- -----------------------------------------------------------------
10 | -- CREATE IDI_CLEAN DATABASE --------------------------------------
11 | -- -----------------------------------------------------------------
12 |
13 | DROP DATABASE IF EXISTS IDI_Clean;
14 | CREATE DATABASE IDI_Clean;
15 |
16 | USE IDI_Clean;
17 |
18 | CREATE TABLE DIA_Clean_births (
19 | snz_uid int not null UNIQUE,
20 | snz_dia_uid int not null UNIQUE,
21 | parent1_snz_uid int null,
22 | parent1_snz_dia_uid int null,
23 | dia_bir_parent1_sex_snz_code varchar(100) null,
24 | dia_bir_parent1_occupation_text varchar(60) null,
25 | parent2_snz_uid int null,
26 | parent2_snz_dia_uid int null,
27 | dia_bir_parent2_sex_snz_code varchar(100) null,
28 | dia_bir_parent2_occupation_text varchar(60) null,
29 | dia_bir_birth_month_nbr tinyint null,
30 | dia_bir_birth_year_nbr smallint null,
31 | dia_bir_sex_snz_code varchar(100) null,
32 | dia_bir_ethnic_grp1_snz_ind bit not null, -- European
33 | dia_bir_ethnic_grp2_snz_ind bit not null, -- Maori
34 | dia_bir_ethnic_grp3_snz_ind bit not null, -- Pacific
35 | dia_bir_ethnic_grp4_snz_ind bit not null, -- Asian
36 | dia_bir_ethnic_grp5_snz_ind bit not null, -- MELAA
37 | dia_bir_ethnic_grp6_snz_ind bit not null); -- Other
38 |
39 | CREATE TABLE DIA_Clean_deaths (
40 | snz_uid int not null UNIQUE,
41 | snz_dia_uid int not null UNIQUE,
42 | dia_dth_death_month_nbr tinyint null,
43 | dia_dth_death_year_nbr smallint null,
44 | dia_dth_last_seen_month_nbr tinyint null,
45 | dia_dth_last_seen_year_nbr smallint null);
46 |
47 | CREATE TABLE DIA_Clean_marriages (
48 | partnr1_snz_uid int not null UNIQUE,
49 | partnr1_snz_dia_uid int not null UNIQUE,
50 | partnr2_snz_uid int not null UNIQUE,
51 | partnr2_snz_dia_uid int not null UNIQUE,
52 | dia_mar_partnr1_birth_month_nbr tinyint null,
53 | dia_mar_partnr1_birth_year_nbr smallint null,
54 | dia_mar_partnr1_sex_snz_code varchar(100) null,
55 | dia_mar_partnr1_occupation_text varchar(60) null,
56 | dia_mar_partnr2_birth_month_nbr tinyint null,
57 | dia_mar_partnr2_birth_year_nbr smallint null,
58 | dia_mar_partnr2_sex_snz_code varchar(100) null,
59 | dia_mar_partnr2_occupation_text varchar(60) null);
60 |
61 | CREATE TABLE DIA_Clean_civil_unions (
62 | partnr1_snz_uid int not null UNIQUE,
63 | partnr1_snz_dia_uid int not null UNIQUE,
64 | partnr2_snz_uid int not null UNIQUE,
65 | partnr2_snz_dia_uid int not null UNIQUE,
66 | dia_civ_partnr1_birth_month_nbr tinyint null,
67 | dia_civ_partnr1_birth_year_nbr smallint null,
68 | dia_civ_partnr1_sex_snz_code varchar(100) null,
69 | dia_civ_partnr1_occupation_text varchar(60) null,
70 | dia_civ_partnr2_birth_month_nbr tinyint null,
71 | dia_civ_partnr2_birth_year_nbr smallint null,
72 | dia_civ_partnr2_sex_snz_code varchar(100) null,
73 | dia_civ_partnr2_occupation_text varchar(60) null);
74 |
75 | -- Linkage process information.
76 | -- One row for each snz_uid.
77 | -- Includes links between individual identifiers.
78 | -- Most values are NULL.
79 | CREATE TABLE security_concordance (
80 | snz_uid int not null UNIQUE,
81 | snz_dia_uid int,
82 | snz_ird_uid int,
83 | snz_moh_uid int,
84 | snz_cen_uid int,
85 | snz_acc_uid int,
86 | snz_dol_uid int,
87 | snz_spine_uid int);
88 |
89 | INSERT INTO security_concordance VALUES
90 | (10 ,34 , 0 , NULL, NULL ,29 ,NULL,100 ),
91 | (2 ,55 , 1 , NULL, NULL ,23 ,NULL,143 ),
92 | (1 ,32 , 2 , NULL, NULL ,22 ,NULL,412 ),
93 | (4 ,1 , 3 , NULL, NULL ,21 ,NULL,563 ),
94 | (7 ,67 , 4 , NULL, NULL ,26 ,NULL,213 ),
95 | (9 ,NULL , 5 , NULL, NULL ,25 ,NULL,553 ),
96 | (5 ,32 , 6 , NULL, NULL ,24 ,NULL,153 ),
97 | (8 ,43 , 7 , NULL, NULL ,28 ,NULL,562 ),
98 | (6 ,23 , 8 , NULL, NULL ,27 ,NULL,643 ),
99 | (3 ,123 , 9 , NULL, NULL ,20 ,NULL,142 ),
100 | (11 ,NULL , 7 , NULL, NULL ,NULL,NULL,154 ),
101 | (12 ,65 , 5 , NULL, NULL ,NULL,NULL,853 ),
102 | (13 ,NULL , 10 , NULL, NULL ,6 ,NULL,128 ),
103 | (14 ,NULL , 12 , NULL, NULL ,1 ,NULL,732 ),
104 | (15 ,NULL , 43 , NULL, NULL ,NULL,NULL,129 ),
105 | (16 ,765 , 44 , NULL, NULL ,NULL,NULL,923 ),
106 | (17 ,NULL , 34 , NULL, NULL ,7 ,NULL,132 ),
107 | (18 ,76 , 100 , NULL, NULL ,NULL,NULL,731 ),
108 | (19 ,NULL , 101 , NULL, 32 ,3 ,NULL,766 ),
109 | (20 ,NULL , NULL , 1 , NULL ,31 ,NULL,NULL),
110 | (21 ,NULL , NULL , 2 , NULL ,32 ,NULL,NULL),
111 | (22 ,NULL , NULL , 3 , NULL ,33 ,NULL,NULL),
112 | (23 ,NULL , NULL , 4 , NULL ,34 ,NULL,NULL),
113 | (24 ,NULL , NULL , 5 , NULL ,35 ,NULL,NULL),
114 | (25 ,NULL , 15 , NULL, NULL ,36 ,NULL,NULL),
115 | (26 ,NULL , 16 , NULL, NULL ,37 ,NULL,NULL),
116 | (27 ,NULL , 17 , NULL, NULL ,38 ,NULL,NULL),
117 | (28 ,NULL , 18 , NULL, NULL ,39 ,NULL,NULL),
118 | (29 ,NULL , 19 , NULL, NULL ,41 ,NULL,NULL),
119 | (30 ,NULL , 20 , NULL, NULL ,42 ,NULL,NULL),
120 | (31 ,NULL , 21 , NULL, NULL ,43 ,NULL,NULL),
121 | (32 ,NULL , 22 , NULL, NULL ,44 ,NULL,NULL),
122 | (33 ,NULL , 23 , NULL, NULL ,NULL,1 ,NULL),
123 | (34 ,NULL , 42 , NULL, NULL ,NULL,2 ,NULL),
124 | (36 ,NULL , 65 , NULL, NULL ,NULL,3 ,NULL),
125 | (37 ,NULL , 54 , NULL, NULL ,NULL,NULL,NULL),
126 | (38 ,NULL , 32 , NULL, NULL ,NULL,NULL,NULL);
127 |
128 | -- Contains start and end of overseas spells.
129 | -- Researchers may wish to exclude people who
130 | -- were overseas.
131 | CREATE TABLE data_person_overseas_spell (
132 | snz_uid int not null UNIQUE,
133 | pos_applied_date date null,
134 | pos_ceased_date date null);
135 |
136 | -- One row for each snz_uid.
137 | -- The column with 0 or 1 indicates
138 | -- if an individual is in the spine.
139 | CREATE TABLE data_personal_detail (
140 | snz_uid int not null UNIQUE,
141 | snz_in_spine int not null
142 | );
143 |
144 | -- In this table there should be
145 | -- one row for each unique combination
146 | -- of snz_uid and year.
147 | -- A subset of the people in the spine.
148 | CREATE TABLE data_snz_res_pop (
149 | snz_uid int not null UNIQUE,
150 | year int not null
151 | );
152 |
153 | -- I CREATED THESE EMPTY TABLES SO THE TABLE
154 | -- NAMES WOULD MATCH SOME OF THOSE IN THE
155 | -- DATA SCHEMA
156 | CREATE TABLE data_source_ranked_ethnicity (
157 | snz_uid int not null UNIQUE,
158 | dummy1 int,
159 | dummy2 int,
160 | dummy3 int);
161 |
162 | CREATE TABLE data_income_tax_yr_summary (
163 | snz_uid int not null UNIQUE,
164 | dummy1 int,
165 | dummy2 int,
166 | dummy3 int);
167 |
168 | CREATE TABLE data_income_cal_yr_summary (
169 | snz_uid int not null UNIQUE,
170 | dummy1 int,
171 | dummy2 int,
172 | dummy3 int);
173 |
174 | CREATE TABLE data_address_notification (
175 | snz_uid int not null UNIQUE,
176 | dummy1 int,
177 | dummy2 int,
178 | dummy3 int);
179 |
180 | CREATE TABLE ACC_Clean_Medical_Codes (
181 | snz_acc_claim_uid int not null,
182 | acc_med_injury_count_nbr smallint,
183 | acc_med_read_code varchar(6),
184 | acc_med_read_code_text varchar(255),
185 | acc_med_injury_precedence_nbr int not null
186 | );
187 |
188 | INSERT INTO ACC_Clean_Medical_Codes VALUES
189 | (1 ,2, 'a', 'broken leg' ,1 ),
190 | (2 ,3, 'b', 'popped out eyeball',2 ),
191 | (3 ,1, 'a', 'broken leg' ,1 ),
192 | (4 ,2, 'a', 'broken leg' ,1 ),
193 | (5 ,3, 'b', 'popped out eyeball',1 ),
194 | (6 ,4, 'b', 'popped out eyeball',2 ),
195 | (7 ,2, 'a', 'broken leg' ,2 ),
196 | (8 ,2, 'c', 'exploded lung' ,3 ),
197 | (9 ,3, 'c', 'exploded lung' ,3 ),
198 | (10,3, 'c', 'exploded lung' ,3 );
199 |
200 | CREATE TABLE ACC_Clean_Serious_Injury (
201 | snz_uid int not null,
202 | snz_acc_uid int not null,
203 | snz_employee_ird_uid int null,
204 | snz_employer_ird_uid int null,
205 | acc_cla_accident_date date null,
206 | acc_cla_claim_costs_to_date_ex_gst_amt decimal(13,2),
207 | acc_cla_ethnic_grp1_snz_uid bit not null,
208 | acc_cla_ethnic_grp2_snz_uid bit not null,
209 | acc_cla_ethnic_grp3_snz_uid bit not null,
210 | acc_cla_ethnic_grp4_snz_uid bit not null,
211 | acc_cla_ethnic_grp5_snz_uid bit not null,
212 | acc_cla_ethnic_grp6_snz_uid bit not null,
213 | snz_acc_claim_uid int not null,
214 | acc_cla_meshblock_code varchar(7) null
215 | );
216 |
217 | INSERT INTO ACC_Clean_Serious_Injury VALUES
218 | (10, 29 ,0 ,42 , '20160901', 15000 ,1 ,0 ,0 ,1 ,0 ,1 ,1 , 'MZ321' ),
219 | (2 , 23 ,1 ,32 , '20160912', 12000 ,1 ,0 ,0 ,0 ,0 ,0 ,2 , 'KL653' ),
220 | (1 , 22 ,2 ,32 , '20160913', 130000 ,0 ,1 ,0 ,0 ,0 ,0 ,3 , 'DF24' ),
221 | (4 , 21 ,3 ,54 , '20160923', 132000 ,0 ,1 ,0 ,0 ,0 ,0 ,4 , 'EW321' ),
222 | (7 , 26 ,4 ,65 , '20160902', 23000 ,0 ,1 ,0 ,0 ,0 ,0 ,5 , 'EW321' ),
223 | (9 , 25 ,5 ,65 , '20160921', 32000 ,0 ,0 ,0 ,1 ,0 ,0 ,6 , 'KL432' ),
224 | (5 , 24 ,6 ,23 , '20160918', 500 ,1 ,0 ,0 ,0 ,0 ,0 ,7 , 'EW234' ),
225 | (8 , 28 ,7 ,42 , '20160916', 120 ,0 ,0 ,0 ,0 ,1 ,0 ,8 , 'FD432' ),
226 | (6 , 27 ,8 ,42 , '20160918', 130 ,0 ,1 ,0 ,0 ,0 ,0 ,9 , 'HFD432'),
227 | (3 , 20 ,9 ,42 , '20160919', 45000 ,1 ,1 ,0 ,0 ,0 ,0 ,10 , 'FGV432'),
228 | (20, 31 ,NULL,42 , '20170601', 20000 ,1 ,0 ,0 ,0 ,0 ,0 ,11 , 'EW321' ),
229 | (21, 32 ,NULL,23 , '20170602', 20000 ,1 ,0 ,0 ,0 ,0 ,0 ,12 , 'EW321' ),
230 | (22, 33 ,NULL,65 , '20170603', 20000 ,1 ,0 ,0 ,0 ,0 ,0 ,13 , 'EW234' ),
231 | (23, 34 ,NULL,65 , '20170604', 20000 ,0 ,1 ,0 ,0 ,0 ,0 ,14 , 'EW234' ),
232 | (24, 35 ,NULL,54 , '20170605', 20000 ,0 ,1 ,0 ,0 ,0 ,0 ,15 , 'FD432' ),
233 | (25, 36 ,15 ,32 , '20171206', 20000 ,0 ,1 ,0 ,0 ,0 ,0 ,16 , 'FD432' ),
234 | (26, 37 ,16 ,32 , '20170207', 30000 ,0 ,1 ,0 ,0 ,0 ,0 ,17 , 'FD432' ),
235 | (27, 38 ,17 ,42 , '20170608', 30000 ,0 ,1 ,0 ,0 ,0 ,0 ,18 , 'FD432' ),
236 | (28, 39 ,18 ,65 , '20180309', 30000 ,0 ,0 ,1 ,0 ,0 ,0 ,19 , 'FD432' ),
237 | (29, 41 ,19 ,54 , '20181110', 30000 ,0 ,0 ,1 ,0 ,0 ,0 ,20 , 'FD432' ),
238 | (30, 42 ,20 ,32 , '20180711', 45000 ,0 ,0 ,1 ,0 ,0 ,0 ,21 , 'DF24' ),
239 | (31, 43 ,21 ,32 , '20180612', 45000 ,0 ,0 ,0 ,1 ,0 ,0 ,22 , 'DF24' ),
240 | (32, 44 ,22 ,42 , '20180513', 45000 ,0 ,0 ,0 ,1 ,0 ,0 ,23 , 'MZ321' ),
241 | (33, 45 ,23 ,32 , '20180614', 45000 ,0 ,0 ,0 ,0 ,1 ,0 ,24 , 'MZ321' );
242 |
243 | -- INSERT INTO DIA_Clean_civil_unions VALUES (10, 34, 6 , 23 , 1, 1975, 1, NULL, 1, 1976, 1, NULL);
244 | -- INSERT INTO DIA_Clean_civil_unions VALUES (2, 55, 3 , 123 , 2, 1966, 0, NULL, 6, 1969, 1, NULL);
245 | -- INSERT INTO DIA_Clean_civil_unions VALUES (1, 32, 12, 65 , 5, 1977, 0, NULL, 4, 1973, 1, NULL);
246 | -- INSERT INTO DIA_Clean_civil_unions VALUES (4, 1, 16, 765 , 5, 1988, 1, NULL, 4, 1989, 0, NULL);
247 | -- INSERT INTO DIA_Clean_civil_unions VALUES (7, 67, 18, 76 , 9, 1999, 0, NULL, 12, 1995, 0, NULL);
248 |
249 | -- ---------------------------------------------------------------
250 | -- CREATE IDI_Metadata DATABASE ---------------------------------
251 | -- ---------------------------------------------------------------
252 | DROP DATABASE IF EXISTS IDI_Metadata;
253 | CREATE DATABASE IDI_Metadata;
254 |
255 | USE IDI_Metadata;
256 |
257 | CREATE TABLE clean_read_CLASSIFICATIONS_acc_ethnicity_code (
258 | ethnic_grp int not null, -- a number from 1 to 6
259 | description varchar(100) not null
260 | );
261 |
262 | CREATE TABLE clean_read_CLASSIFICATIONS_post_codes (
263 | post_code int not null,
264 | description varchar(100)
265 | );
266 |
267 | INSERT INTO clean_read_CLASSIFICATIONS_acc_ethnicity_code
268 | (ethnic_grp, description)
269 | VALUES
270 | (1, 'European'),
271 | (2, 'Maori'),
272 | (3, 'Pacific Peoples'),
273 | (4, 'Asian'),
274 | (5, 'Middle Eastern/Latin American/African'),
275 | (6, 'Other ethnicity');
276 |
277 | CREATE TABLE clean_read_CLASSIFICATIONS_cor_ethnicity_code (
278 | ethnic_grp int not null, -- a number from 1 to 6
279 | description varchar(100) not null
280 | );
281 |
282 | INSERT INTO clean_read_CLASSIFICATIONS_cor_ethnicity_code
283 | (ethnic_grp, description)
284 | VALUES
285 | (2, 'European'),
286 | (1, 'Maori/Pacific Peoples'),
287 | (3, 'Asian'),
288 | (4, 'Other ethnicity');
289 |
290 | -- ---------------------------------------------------------------
291 | -- CREATE OLDER IDI_Clean REFRESH DATABASE ----------------------
292 | -- ---------------------------------------------------------------
293 |
294 | -- INSERT INTO security_concordance (
295 | -- snz_uid ,
296 | -- snz_dia_uid ,
297 | -- snz_ird_uid ,
298 | -- snz_moh_uid ,
299 | -- snz_cen_uid ,
300 | -- snz_acc_uid ,
301 | -- snz_dol_uid ,
302 | -- snz_in_spine)
303 | -- VALUES
304 | -- (13, 34 , NULL , NULL, NULL ,NULL,NULL, 1),
305 | -- (21 ,55 , NULL , NULL, NULL ,2 ,NULL,1),
306 | -- (34 ,32 , NULL , NULL, NULL ,NULL,NULL,1),
307 | -- (24 ,1 , NULL , NULL, NULL ,4 ,NULL,1),
308 | -- (55 ,67 , NULL , NULL, NULL ,5 ,NULL,1),
309 | -- (7 ,NULL , 3 , NULL, NULL ,NULL,NULL,0),
310 | -- (17 ,32 , 1 , NULL, NULL ,NULL,NULL,0),
311 | -- (28 ,43 , 2 , NULL, NULL ,8 ,NULL,1),
312 | -- (59 ,23 , 4 , NULL, NULL ,9 ,NULL,1),
313 | -- (1237,123 , 6 , NULL, NULL ,10 ,NULL,1),
314 | -- (121,NULL , 7 , NULL, NULL ,NULL,NULL,1),
315 | -- (345,65 , 5 , NULL, NULL ,NULL,NULL,0),
316 | -- (765, NULL , 10 , NULL, NULL ,6 ,NULL,1),
317 | -- (1432, NULL , 12 , NULL, NULL ,1 ,NULL,1),
318 | -- (873, NULL , 43 , NULL, NULL ,NULL,NULL,1),
319 | -- (3,765 , 44 , NULL, NULL ,NULL,NULL,0),
320 | -- (5,NULL , 34 , NULL, NULL ,7 ,NULL,1),
321 | -- (78,76 , 100 , NULL, NULL ,NULL,NULL,0),
322 | -- (1,NULL , 101 , NULL, 32 ,3 ,NULL,0);
323 | --
324 |
325 | -- ---------------------------------------------------------------
326 | -- CREATE Sandpit DATABASE ---------------------------------------
327 | -- ---------------------------------------------------------------
328 |
329 | DROP DATABASE IF EXISTS Sandpit;
330 | CREATE DATABASE Sandpit;
331 |
332 | USE Sandpit;
333 |
334 |
335 | CREATE TABLE Ape_Colours (
336 | ColourID int not null,
337 | ColourName varchar(20) not null,
338 | Comments varchar(100), -- I find this Colour strange etc.
339 | PRIMARY KEY (ColourID)
340 | );
341 |
342 | CREATE TABLE Ape_Friends (
343 | FriendID int not null,
344 | FirstName varchar(20),
345 | LastName varchar(20),
346 | FavColourID int,
347 | FOREIGN KEY (FavColourID) REFERENCES Ape_Colours (ColourID),
348 | PRIMARY KEY (FriendID)
349 | );
350 |
351 | CREATE TABLE Ape_BananaTree (
352 | TreeID int not null,
353 | Height decimal(5,2),
354 | YearPlanted int,
355 | MonthPlanted int,
356 | Width decimal(5,2),
357 | PRIMARY KEY (TreeID)
358 | );
359 |
360 | CREATE TABLE Ape_EatingFrom (
361 | FriendID int not null,
362 | TreeID int not null,
363 | FOREIGN KEY (FriendID) REFERENCES Ape_Friends (FriendID),
364 | FOREIGN KEY (TreeID) REFERENCES Ape_BananaTree (TreeID)
365 | );
366 |
367 | CREATE TABLE Ape_Banana (
368 | BananaID int not null,
369 | TasteRank int, -- from 1 to 5
370 | DatePicked date not null,
371 | DateEaten date not null,
372 | Ripe bit,
373 | TreeID int not null,
374 | Comments varchar(100),
375 | FOREIGN KEY (TreeID) REFERENCES Ape_BananaTree (TreeID),
376 | PRIMARY KEY (BananaID)
377 | );
378 |
379 |
380 | INSERT INTO Ape_Colours VALUES
381 | (1 ,'blue' , 'similar to sky' ),
382 | (2 ,'green' , 'bad tasting bananas'),
383 | (3 ,'yellow' , 'my favourite because banana'),
384 | (4 ,'orange' , 'no connection to orangutan'),
385 | (5 ,'red' , 'generally a bad sign'),
386 | (6 ,'purple' , 'never seen this before'),
387 | (7 ,'brown' , 'many things are brown'),
388 | (8 ,'magenta' , 'one of the primary subtractive colours'),
389 | (9 ,'pink' , 'very manly'),
390 | (10,'lime' , 'almost green'),
391 | (11,'turquoise', 'not to be confused with tortoise');
392 |
393 |
394 | INSERT INTO Ape_Friends VALUES
395 | (1 , 'Caesar' , 'Serkis', 3 ),
396 | (2 , 'Harambe' , 'Porter', 1 ),
397 | (3 , 'Aldo' , 'Atkins', 3 ),
398 | (4 , 'Cornelius', 'Porter', 1 ),
399 | (5 , 'Zira' , 'Porter', 4 ),
400 | (6 , 'Ishmael' , 'Serkis', 3 ),
401 | (7 , 'Monsieur' , 'Mallah', 3 ),
402 | (8 , 'Titano' , 'Atkins', 6 ),
403 | (9 , 'King' , 'Kong' , 3 ),
404 | (10, 'Bobo' , 'Kong' , 8 ),
405 | (11, 'Myster' , 'Ious' , NULL);
406 |
407 |
408 | INSERT INTO Ape_BananaTree VALUES
409 | (1, 5.5, 2018, 08, 31),
410 | (2, 4.3, 2018, 08, 27),
411 | (3, 4.7, 2018, 08, 36),
412 | (4, 3.8, 2018, 08, 20),
413 | (5, 6.2, 2018, 08, 40),
414 | (6, 6.4, 2016, 07, 23),
415 | (7, 5.4, 2016, 07, 32),
416 | (8, 5.2, 2016, 07, 31),
417 | (9, 4.8, 2016, 07, 19),
418 | (10, 4.5, 2015, 09, 28),
419 | (11, 5.8, 2015, 09, 35),
420 | (12, 7.5, 2015, 09, 45),
421 | (13, 6.4, 2015, 09, 30),
422 | (14, 5.0, 2015, 09, 24),
423 | (15, 5.1, 2014, 10, 34),
424 | (16, 4.2, 2014, 10, 23),
425 | (17, 5.4, 2014, 08, 39),
426 | (18, 5.2, 2014, 08, 28);
427 |
428 |
429 | INSERT INTO Ape_EatingFrom VALUES
430 | (3 ,1 ),
431 | (2 ,1 ),
432 | (1 ,1 ),
433 | (6 ,3 ),
434 | (10,5 ),
435 | (4 ,5 ),
436 | (9 ,5 ),
437 | (8 ,8 ),
438 | (7 ,8 ),
439 | (10,8 ),
440 | (5 ,8 ),
441 | (2 ,8 ),
442 | (3 ,8 ),
443 | (4 ,8 ),
444 | (6 ,2 ),
445 | (6 ,2 ),
446 | (6 ,2 ),
447 | (8 ,2 ),
448 | (9 ,1 ),
449 | (2 ,1 ),
450 | (1 ,1 ),
451 | (6 ,2 ),
452 | (6 ,2 ),
453 | (10,8 ),
454 | (2 ,18),
455 | (6 ,15),
456 | (7 ,11),
457 | (2 ,14),
458 | (2 ,1 );
459 |
460 |
461 | INSERT INTO Ape_Banana VALUES
462 | (1 , 2, '20181003', '20181004', 0, 1 , NULL),
463 | (2 , 4, '20181003', '20181004', 1, 2 , NULL),
464 | (3 , 4, '20181003', '20181004', 1, 2 , NULL),
465 | (4 , 5, '20181003', '20181006', 1, 1 , NULL),
466 | (5 , 5, '20181003', '20181006', 1, 2 , 'best banana ever'),
467 | (6 , 3, '20181003', '20181004', 1, 2 , NULL),
468 | (7 , 2, '20181002', '20181004', 0, 3 , NULL),
469 | (8 , 5, '20181002', '20181005', 1, 3 , 'smooth and delectable'),
470 | (9 , 3, '20181002', '20181003', 1, 4 , NULL),
471 | (10, 3, '20181002', '20181003', 1, 5 , NULL),
472 | (11, 2, '20181002', '20181003', 0, 5 , NULL),
473 | (12, 5, '20181002', '20181005', 1, 4 , NULL),
474 | (13, 1, '20181002', '20181002', 0, 9 , NULL),
475 | (14, 1, '20181001', '20181001', 0, 16, NULL),
476 | (15, 1, '20181001', '20181001', 0, 16, NULL),
477 | (16, 5, '20181001', '20181004', 1, 5 , 'a culinary delight'),
478 | (17, 5, '20181001', '20181004', 1, 6 , NULL),
479 | (18, 5, '20181001', '20181004', 1, 7 , NULL),
480 | (19, 5, '20181001', '20181004', 1, 8 , NULL),
481 | (20, 5, '20181001', '20181004', 1, 9 , 'soft with nutty undertones'),
482 | (21, 4, '20181001', '20181003', 1, 18, NULL),
483 | (22, 4, '20180930', '20181001', 1, 14, NULL),
484 | (23, 4, '20180930', '20181001', 1, 17, NULL),
485 | (24, 4, '20180930', '20181001', 1, 12, NULL),
486 | (25, 2, '20180930', '20181030', 1, 11, NULL),
487 | (26, 3, '20180930', '20181030', 0, 5 , 'good mid-range banana'),
488 | (27, 4, '20180930', '20181003', 1, 2 , NULL),
489 | (28, 5, '20180930', '20181003', 1, 10, NULL),
490 | (29, 5, '20180929', '20181003', 1, 11, NULL),
491 | (30, 1, '20180929', '20181001', 0, 4 , 'tasted like ape-shit'),
492 | (31, 1, '20180929', '20180929', 0, 7 , NULL),
493 | (32, 2, '20180929', '20181002', 0, 8 , NULL),
494 | (33, 5, '20180929', '20181002', 1, 12, NULL),
495 | (34, 4, '20180929', '20180930', 1, 2 , NULL),
496 | (35, 2, '20180929', '20180930', 0, 2 , NULL),
497 | (36, 3, '20180929', '20180930', 0, 18, NULL),
498 | (37, 3, '20180929', '20180930', 0, 13, NULL),
499 | (38, 4, '20180928', '20180929', 1, 15, NULL),
500 | (39, 5, '20180928', '20181001', 1, 13, NULL),
501 | (40, 2, '20180928', '20180930', 1, 12, NULL),
502 | (41, 1, '20180928', '20180928', 0, 15, NULL),
503 | (42, 1, '20180928', '20180928', 1, 12, 'had ants in it'),
504 | (43, 1, '20180928', '20180928', 0, 7 , NULL),
505 | (44, 4, '20180927', '20180929', 0, 7 , NULL),
506 | (45, 5, '20180927', '20180930', 1, 14, NULL),
507 | (46, 5, '20180927', '20180930', 1, 15, NULL),
508 | (47, 5, '20180927', '20180930', 1, 16, NULL),
509 | (48, 5, '20180927', '20180930', 1, 17, NULL),
510 | (49, 5, '20180927', '20180930', 1, 18, NULL),
511 | (50, 3, '20180927', '20180928', 1, 3 , NULL);
512 |
513 |
514 | -- - NOTES SCHEMA --
515 |
516 | CREATE TABLE Notes_Friends (
517 | FriendID int not null,
518 | FirstName varchar(20),
519 | LastName varchar(20),
520 | FavColour varchar(20),
521 | PRIMARY KEY (FriendID)
522 | );
523 |
524 | CREATE TABLE Notes_Pets (
525 | PetID int not null,
526 | PetName varchar(20),
527 | PetDOB date,
528 | FriendID int not null,
529 | FOREIGN KEY (FriendID) REFERENCES Notes_Friends (FriendID),
530 | PRIMARY KEY (PetID)
531 | );
532 |
533 | CREATE TABLE Notes_Scratched (
534 | ScratcherID int not null,
535 | ScratchDate date,
536 | ScratchTime time,
537 | ScratcheeID int not null,
538 | FOREIGN KEY (ScratcherID) REFERENCES Notes_Friends (FriendID),
539 | FOREIGN KEY (ScratcheeID) REFERENCES Notes_Friends (FriendID),
540 | PRIMARY KEY (ScratcherID, ScratcheeID, ScratchDate, ScratchTime)
541 | );
542 |
543 | CREATE TABLE Notes_PlayCount (
544 | PetID int not null,
545 | PlayCounter int,
546 | FriendID int not null,
547 | FOREIGN KEY (PetID) REFERENCES Notes_Pets (PetID),
548 | FOREIGN KEY (FriendID) REFERENCES Notes_Friends (FriendID),
549 | PRIMARY KEY (PetID, FriendID)
550 | );
551 |
552 | CREATE TABLE Notes_Passports (
553 | PptNo varchar(5) not null,
554 | PptCountry varchar(20),
555 | PptExpiry date,
556 | FriendID int,
557 | FOREIGN KEY (FriendID) REFERENCES Notes_Friends (FriendID),
558 | PRIMARY KEY (PptNo)
559 | );
560 |
561 | CREATE TABLE Notes_Table1 (
562 | A int not null,
563 | B varchar(20),
564 | C varchar(20),
565 | PRIMARY KEY (A)
566 | );
567 |
568 | CREATE TABLE Notes_Table2 (
569 | D varchar(20),
570 | E int not null,
571 | A int not null,
572 | FOREIGN KEY (A) REFERENCES Notes_Table1 (A),
573 | PRIMARY KEY (E)
574 | );
575 |
576 | CREATE TABLE Notes_Letters (
577 | A char,
578 | B char,
579 | Num int not null,
580 | PRIMARY KEY (Num)
581 | );
582 |
583 | CREATE TABLE Notes_RandomPeople (
584 | PersonName varchar(20),
585 | Gender char(2),
586 | Age int
587 | );
588 |
589 | CREATE TABLE Notes_Houses (
590 | house_ID varchar(5) not null,
591 | house_owner varchar(50),
592 | house_address varchar(200),
593 | post_code varchar(4),
594 | house_price decimal(10,2),
595 | PRIMARY KEY (house_ID)
596 | );
597 |
598 | CREATE TABLE Notes_Suburbs (
599 | post_code varchar(5) not null,
600 | suburb_name varchar(100) not null,
601 | vaccination_rate decimal(3,2),
602 | PRIMARY KEY (post_code, suburb_name)
603 | );
604 |
605 | CREATE TABLE Notes_Numbers (
606 | Num int,
607 | NumString varchar(10)
608 | );
609 |
610 | CREATE TABLE Notes_EduStudy (
611 | Id varchar(6) not null,
612 | Income varchar(8),
613 | Education int,
614 | PRIMARY KEY (Id)
615 | );
616 |
617 | CREATE TABLE Notes_Orders (
618 | OrderID int not null,
619 | Item varchar(30),
620 | Price decimal(5,2),
621 | OrderDT datetime,
622 | PRIMARY KEY (OrderID)
623 | );
624 |
625 | INSERT INTO Notes_Orders VALUES
626 | (1, 'Boiled leaves' , 2.99 , '2021-12-31 15:13:00'),
627 | (2, 'Bow wow' , 15 , '2021-12-31 15:34:00'),
628 | (3, 'Cackleberry stew', 32.55, '2022-01-01 09:32:00'),
629 | (4, 'Mug of murk' , 4.40 , '2022-01-01 10:16:00');
630 |
631 | INSERT INTO Notes_EduStudy VALUES
632 | ('EI13', 'low', 5),
633 | ('EI122', 'low', 1),
634 | ('EI281', 'low-mid', 4),
635 | ('EI3332', 'middle', 3),
636 | ('EI4751', 'high-mid', 3),
637 | ('EI12', 'high', 2);
638 |
639 | INSERT INTO Notes_Numbers VALUES
640 | (111, '111'),
641 | (31, '31'),
642 | (32, '32'),
643 | (211, '211');
644 |
645 | INSERT INTO Notes_Houses VALUES
646 | ('H0001', 'Millard Claassen' , '7235 East Van Dyke St' , '3128', 300000),
647 | ('H0002', 'Jamie Pew' , '8914 South Sunnyslope Dr', '3128', 150000),
648 | ('H0003', 'Bethel Viviano' , '87 South West Halifax St', '3142', 400000),
649 | ('H0004', 'Brandi Hovis' , '676 Ocean St' , '3142', 360000),
650 | ('H0005', 'Mei Colby' , '62 West Park Ave' , '3556', 220000),
651 | ('H0006', 'Marilu Munz' , '62 Elm Road' , '3083', 120000),
652 | ('H0007', 'Rhiannon Carwile' , '7005 Anderson Ave' , '3779', 500000),
653 | ('H0008', 'Joycelyn Hamburger' , '7410 Park Drive' , '3778', 550000),
654 | ('H0009', 'Leopoldo Flaherty' , '3 Dogwood Dr' , '3083', 1200000),
655 | ('H0010', 'Xavier Farrer' , '767 Rockville Street' , '3083', 100000),
656 | ('H0011', 'Waldo Wingboard' , '8712 Thorne Street' , NULL, 640000),
657 | ('H0012', 'Jimmy Jenkins' , '32 Rosey Cres' , NULL, 70000);
658 |
659 | -- There is no primary / foreign key pair for Houses and Suburbs.
660 | -- The primary key of suburbs is not as may be expected (not post_code).
661 | -- 3218 connects to 2 suburbs
662 | -- some houses have NULL suburb
663 | -- 3142 has no corresponding suburb
664 | -- the data type for post_code in suburb is varchar(6), one suburb has postcode '33128'
665 |
666 | INSERT INTO Notes_Suburbs VALUES
667 | ('3128' , 'Erebor' , 0.8),
668 | ('33128', 'Erberor' , 0.8),
669 | ('3128' , 'Fangorn' , 0.2),
670 | ('3779' , 'Durin' , 0.4),
671 | ('3556' , 'Gondor' , 0.65),
672 | ('3083' , 'Isengaard', 0.35);
673 |
674 | INSERT INTO Notes_Friends VALUES
675 | (1, 'X', 'A', 'red'),
676 | (2, 'Y', 'B', 'blue'),
677 | (3, 'Z', 'C', NULL);
678 |
679 | INSERT INTO Notes_Pets VALUES
680 | (1, 'Chikin', '20160924', 2),
681 | (2, 'Cauchy', '20120301', 3),
682 | (3, 'Gauss', '20120301', 3);
683 |
684 | INSERT INTO Notes_Scratched VALUES
685 | (1, '20180905', '12:00', 2),
686 | (1, '20180905', '12:30', 3),
687 | (2, '20180906', '11:00', 1),
688 | (3, '20180907', '10:00', 1),
689 | (2, '20180801', '16:15', 1),
690 | (2, '20180801', '13:00', 3),
691 | (1, '20170214', '04:30', 2),
692 | (3, '20200128', '18:00', 1);
693 |
694 | INSERT INTO Notes_PlayCount VALUES
695 | (1, 3, 1),
696 | (1, 5, 2),
697 | (3, 4, 2);
698 |
699 | INSERT INTO Notes_Passports VALUES
700 | ('E1321', 'Australia', '20210312', NULL),
701 | ('LA123', 'New Zealand', '20320901', 2),
702 | ('S9876', 'Monaco', '20280619', 3);
703 |
704 | INSERT INTO Notes_Table1 (
705 | A, B, C
706 | ) VALUES
707 | (1, 'Ignorance', 'is'),
708 | (2, 'War', 'is'),
709 | (3, 'Freedom', 'is'),
710 | (4, 'Friendship', 'is');
711 |
712 | INSERT INTO Notes_Table2 (
713 | D, E, A
714 | ) VALUES
715 | ('slavery.', 3, 1),
716 | ('weakness.', 4, 2),
717 | ('strength.', 1, 3),
718 | ('peace.', 2, 4);
719 |
720 | INSERT INTO Notes_Letters VALUES
721 | ('a', 'b', 1),
722 | ('a', 'c', 2),
723 | ('a', 'b', 3),
724 | ('a', 'c', 4);
725 |
726 | INSERT INTO Notes_RandomPeople VALUES
727 | ('Beyonce', 'F', 37),
728 | ('Laura Marling', 'F', 28),
729 | ('Darren Hayes', 'M', 46),
730 | ('Bret McKenzie', 'M', 42),
731 | ('Jack Monroe', 'NB', 30);
732 |
733 | -- ---------------------------------------------------------------
734 | -- CREATE Analytics DATABASE -------------------------------------
735 | -- ---------------------------------------------------------------
736 |
737 | DROP DATABASE IF EXISTS Analytics;
738 |
739 | CREATE DATABASE Analytics;
740 |
741 | USE Analytics;
742 |
743 | CREATE TABLE Membership (
744 | memberID INT not null,
745 | memberName VARCHAR(100),
746 | phone VARCHAR(20),
747 | joinDate DATE,
748 | PRIMARY KEY (memberID)
749 | );
750 |
751 | INSERT INTO Membership
752 | VALUES
753 | (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'),
754 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'),
755 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'),
756 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'),
757 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'),
758 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29');
759 |
760 | CREATE TABLE SausageSizzle (
761 | saleId INT NOT NULL,
762 | saleDate DATE,
763 | product VARCHAR(20),
764 | quantity INT,
765 | friendId INT,
766 | PRIMARY KEY (saleId)
767 | );
768 |
769 | INSERT INTO SausageSizzle VALUES
770 | (1 , '1999-12-31', 'pork' , 1, NULL),
771 | (2 , '1999-12-31', 'veggie', 3, NULL),
772 | (3 , '1999-12-31', 'pork' , 2, 1 ),
773 | (4 , '2000-01-01', 'veggie', 4, NULL),
774 | (5 , '2000-01-01', 'veggie', 2, 2 ),
775 | (6 , '2000-01-01', 'pork' , 1, NULL),
776 | (7 , '2000-01-01', 'veggie', 1, NULL),
777 | (8 , '2000-01-01', 'pork' , 1, NULL),
778 | (9 , '2000-01-02', 'veggie', 1, 2 ),
779 | (10, '2000-01-02', 'pork' , 2, NULL),
780 | (11, '2000-01-02', 'veggie', 3, NULL),
781 | (12, '2000-01-02', 'pork' , 4, NULL),
782 | (13, '2000-01-02', 'veggie', 2, 3 ),
783 | (14, '2000-01-03', 'veggie', 1, NULL),
784 | (15, '2000-01-03', 'pork' , 6, NULL),
785 | (16, '2000-01-03', 'veggie', 1, 1 ),
786 | (17, '2000-01-04', 'pork', 1, 1 ),
787 | (18, '2000-01-05', 'veggie', 5, 1 );
788 |
789 | CREATE VIEW SausageSizzleSummary AS
790 | SELECT Product, SaleDate, SUM(Quantity) AS Sales
791 | FROM SausageSizzle
792 | GROUP BY Product, SaleDate;
793 |
794 |
--------------------------------------------------------------------------------
/create-database/MySQL/README.md:
--------------------------------------------------------------------------------
1 | # MySQL create databases guide
2 |
3 | This is a guide to installing a MySQL server on Windows or Mac, so you can start using MySQL.
4 |
5 | This will guide you through **installing the server**, **installing the editor**, and then **creating the databases**.
6 |
7 | ## Windows
8 |
9 | ### Windows: installing the server
10 | 1. Go to [the 'MySQL installer' download page](https://dev.mysql.com/downloads/installer/).
11 | 2. On that page, there are two options. Download the **bottom** one (`mysql-installer-community`). On the next page, you do not have to log in, just click 'no thanks...'.
12 | 3. Run the installer that you just downloaded.
13 | 4. The 'Choosing a Setup Type' menu appears. Choose 'Developer Default' and press 'Next'. If an issue is then displayed related to Visual Studio, press 'Back', choose 'Custom' setup type, choose 'Next', then remove 'MySQL for Visual Studio' from the products to be installed. Then click 'Next'.
14 | 5. The 'Installation' overview menu appears. Click 'Execute'.
15 | 6. Wait for installation to complete. Then click 'Next'.
16 | 7. The 'Product Configuration' menu appears. Click 'Next'.
17 | 8. The 'Type and Networking' menu appears. Click 'Next'.
18 | 9. The 'Authentication Method' menu appears. Click 'Next'.
19 | 10. The 'Accounts and Roles' menu appears. Choose a secure MySQL Root Password, and write it down. You do not need to create any other user accounts. Click 'Next'.
20 | 11. The 'Windows Service' menu appears. Do not change the defaults. Click 'Next'.
21 | 12. When the configuration is done, click 'Finish'.
22 | 13. Product configuration continues. Take a deep breath. Click 'Next'.
23 | 14. The 'Connect To Server' menu appears. Input the root password that you chose in step 11. Click 'check' and then click 'Next'.
24 | 15. The 'Apply Configuration' menu appears. Click 'Execute'.
25 | 16. When the configuration is done, click 'Finish'.
26 | 17. Product configuration continues. Take another deep breath. Click 'Next'.
27 | 18. The 'MySQL Router Configuration' menu appears. Click 'Finish'.
28 | 19. The 'Product Configuration' menu appears again. Click 'Next'.
29 | 20. Installation is complete. Click 'Finish'.
30 |
31 | ### Windows: installing the editor
32 |
33 | The above process also installed MySQL Workbench. This is the MySQL editor we will use.
34 |
35 | 21. Open MySQL Workbench (e.g., press the 'Windows' key on your keyboard, and type 'MySQL Workbench', then press Enter).
36 | 22. Next to the word 'MySQL Connections', there is a plus symbol. Click it to add a new connection.
37 | 23. Under 'Connection Name' type 'MyLocal'.
38 | 24. Under 'Hostname' type 'localhost'.
39 | 25. Under 'Username' type 'root'.
40 | 26. Click 'OK'.
41 | 27. The 'MyLocal' connection box has been created. Click on it, then enter the password that you chose in step 11. If you like, you can check the box that says 'Save password in vault'. Click 'OK'.
42 | 28. MySQL Workbench is now connected.
43 |
44 | ### Windows: creating the databases
45 |
46 | 29. **Right click** [this link to the MySQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/MySQL/MySQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'MySQL-database.sql'.
47 | 30. In MySQL Workbench click 'File >> Open SQL Script' (or press CTRL+SHIFT+O).
48 | 31. Find the file 'MySQL-database.sql' and open it.
49 | 32. In MySQL Workbench press the small lightning bolt symbol to run the script (or press CTRL+SHIFT+ENTER).
50 | 33. The databases have now been created.
51 |
52 | ## Mac
53 |
54 | ### Mac: installing the server
55 |
56 | 1. Go to [the MySQL download page](https://dev.mysql.com/downloads/mysql/).
57 | 2. On that page, there are many options. The two top ones have 'DMG Archive' in the name. If you are on a mac M1 device (these are new mac devices made since 2020), then choose the one that says '(ARM, 64-bit), DMG Archive'. If you are on an older mac (not an M1), then choose the one that says '(x86, 64-bit), DMG Archive'.
58 | 3. Mount the DMG. Then a box opens with a pkg file. Run the pkg file.
59 | 4. The installation menu begins. Click 'Continue'.
60 | 5. The next menu opens. Click 'Install'.
61 | 6. The 'Configure MySQL Server' menu appears. Click 'Next'.
62 | 7. Enter a password for the MySQL root user, and write it down. Click 'Finish'.
63 | 8. When the installation completes, click 'Close'.
64 |
65 | ### Mac: installing the editor
66 |
67 | 9. Go to the app store and search for 'SQL Ace'. Install it (free).
68 | 10. Once installed, open SQL Ace. A connection menu appears.
69 | 11. Under 'host' type 'localhost'.
70 | 12. Under 'Password' type the password you chose in step 7.
71 | 13. Click 'Connect'. If the connection fails, you may need to restart your mac first.
72 | 14. SQL Ace is now connected.
73 |
74 | ### Mac: creating the databases
75 |
76 | 15. Control-click [this link to the MySQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/MySQL/MySQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'MySQL-database.sql'.
77 | 16. In SQL Ace click 'File >> Open' (or press Command+O).
78 | 17. Find the file 'MySQL-database.sql' and open it.
79 | 18. In SQL Ace, find the drop-down arrow next to 'Run Current', and press it. A drop-down menu opens. Press 'Run All Queries' (alternatively, just press Option+Command+R).
80 |
--------------------------------------------------------------------------------
/create-database/SQL-data-definition-examples.sql:
--------------------------------------------------------------------------------
1 | -- preparation
2 | DROP TABLE IF EXISTS MySchema.MyFriends;
3 | DROP TABLE IF EXISTS MySchema.MyFriendsNames;
4 | DROP VIEW IF EXISTS MySchema.MyFriendsNames_view
5 | DROP SCHEMA IF EXISTS MySchema;
6 | DELETE FROM Notes.Friends WHERE Notes.Friends.friendID > 995;
7 | GO
8 |
9 | -- slide 1 CREATE SCHEMA to store your tables
10 | CREATE SCHEMA MySchema;
11 | GO
12 |
13 | -- slide 2 CREATE VIEW to store a query like a table
14 | CREATE OR ALTER VIEW MySchema.MyFriendsNames_view AS
15 | SELECT firstName, lastName
16 | FROM Notes.Friends;
17 | GO
18 |
19 | -- see result
20 | SELECT *
21 | FROM MySchema.MyFriendsNames_view;
22 | GO
23 |
24 | -- slide 3 SELECT INTO to store result in a table
25 | SELECT friendID, firstName, lastName
26 | INTO MySchema.MyFriends
27 | FROM Notes.Friends;
28 | GO
29 |
30 | -- slide 4 ALTER to add columns to a stored table
31 | ALTER TABLE MySchema.MyFriends
32 | ADD initials varchar(4);
33 | GO
34 |
35 | -- see result
36 | SELECT *
37 | FROM MySchema.MyFriends;
38 | GO
39 |
40 | -- slide 5 UPDATE to change the entries in a table
41 | UPDATE MySchema.MyFriends
42 | SET initials = CONCAT(
43 | SUBSTRING(firstName, 1, 1),
44 | SUBSTRING(lastName, 1, 1)
45 | )
46 | WHERE firstName IS NOT NULL
47 | AND lastName IS NOT NULL;
48 | GO
49 |
50 | -- see result
51 | SELECT *
52 | FROM MySchema.MyFriends;
53 | GO
54 |
55 | -- slide 6 note we could instead just alter the view
56 | CREATE OR ALTER VIEW MySchema.MyFriendsNames_view AS
57 | SELECT friendID, firstName, lastName,
58 | (CONCAT(SUBSTRING(firstName, 1, 1),
59 | SUBSTRING(lastName, 1, 1))
60 | ) AS initials
61 | FROM Notes.Friends;
62 | GO
63 |
64 | -- see result
65 | SELECT *
66 | FROM MySchema.MyFriendsNames_view;
67 |
68 | -- slide 7 INSERT INTO to create a whole record
69 | INSERT INTO Notes.Friends
70 | (friendID, firstName, lastName, favColour)
71 | VALUES
72 | (997, NULL, NULL, NULL),
73 | (998, '', '', ''),
74 | (999, 'NA', 'NA', 'NA');
75 | GO
76 |
77 | -- see result
78 | SELECT *
79 | FROM MySchema.MyFriendsNames_view;
80 |
81 | -- slide 8 But our table doesn't change
82 | SELECT *
83 | FROM MySchema.MyFriends;
84 |
85 | -- update stored table
86 | INSERT INTO MySchema.MyFriends
87 | (friendID, firstName, lastName)
88 | SELECT friendID, firstName, lastName
89 | FROM Notes.Friends
90 | WHERE friendID > 995
91 | GO
92 |
93 | -- see result
94 | SELECT *
95 | FROM MySchema.MyFriends;
96 |
97 | -- slide 9 the CASE clause
98 | UPDATE MySchema.MyFriends
99 | SET initials = (CASE
100 | WHEN (firstName IS NULL or lastName IS NULL) THEN 'none'
101 | WHEN (firstName = '' and lastName = '') THEN 'none'
102 | WHEN (firstName = 'NA' and lastName = 'NA') THEN 'none'
103 | ELSE (CONCAT(SUBSTRING(firstName, 1, 1),
104 | SUBSTRING(lastName, 1, 1)))
105 | END);
106 | GO
107 |
108 | -- see result
109 | SELECT *
110 | FROM MySchema.MyFriends;
111 |
112 | -- cleaning up
113 | DROP TABLE IF EXISTS MySchema.MyFriends;
114 | DROP TABLE IF EXISTS MySchema.MyFriendsNames;
115 | DROP VIEW IF EXISTS MySchema.MyFriendsNames_view
116 | DROP SCHEMA IF EXISTS MySchema;
117 | DELETE FROM Notes.Friends WHERE Notes.Friends.friendID > 995;
118 | GO
119 |
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/create-database/SQLFiddle/README.md:
--------------------------------------------------------------------------------
1 | ## Should you use SQL Fiddle?
2 |
3 | Many of the exercises in this course can be completed through a browser using [sqlfiddle.com](http://sqlfiddle.com/).
4 |
5 | The drawbacks are that you can't work with multiple schemas and databases at the same time, and you need to build each schema as you use it.
6 |
7 | The advantage is that you can use SQL Fiddle from any browser with installation steps.
8 |
9 | ## Using SQL Fiddle
10 |
11 | Navigate to [sqlfiddle.com](http://sqlfiddle.com/), select MS SQL Server 2017 from the drop-down menu at the top left of the page, and then build a schema by copying the schema code (linked below) into the schema panel then pressing 'build schema'.
12 |
13 | Here are links to the schema code for the exercises:
14 |
15 | * [Notes schema](https://github.com/frycast/SQL_course/raw/master/create-database/SQLFiddle/T-SQL-notes-schema.sql)
16 | * [Ape schema](https://github.com/frycast/SQL_course/raw/master/create-database/SQLFiddle/T-SQL-ape-schema.sql)
17 | * [Analytics schema](https://github.com/frycast/SQL_course/raw/master/create-database/SQLFiddle/T-SQL-analytics-schema.sql)
18 |
19 |
--------------------------------------------------------------------------------
/create-database/SQLFiddle/T-SQL-analytics-schema.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE Membership (
2 | memberID INT not null,
3 | memberName VARCHAR(100),
4 | phone VARCHAR(20),
5 | joinDate DATE,
6 | PRIMARY KEY (memberID)
7 | );
8 |
9 |
10 | INSERT INTO Membership
11 | VALUES
12 | (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'),
13 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'),
14 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'),
15 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'),
16 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'),
17 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29');
18 |
19 |
20 | CREATE TABLE SausageSizzle (
21 | saleId INT NOT NULL,
22 | saleDate DATE,
23 | product VARCHAR(20),
24 | quantity INT,
25 | friendId INT,
26 | PRIMARY KEY (saleId)
27 | );
28 |
29 |
30 | INSERT INTO SausageSizzle VALUES
31 | (1 , '1999-12-31', 'pork' , 1, NULL),
32 | (2 , '1999-12-31', 'veggie', 3, NULL),
33 | (3 , '1999-12-31', 'pork' , 2, 1 ),
34 | (4 , '2000-01-01', 'veggie', 4, NULL),
35 | (5 , '2000-01-01', 'veggie', 2, 2 ),
36 | (6 , '2000-01-01', 'pork' , 1, NULL),
37 | (7 , '2000-01-01', 'veggie', 1, NULL),
38 | (8 , '2000-01-01', 'pork' , 1, NULL),
39 | (9 , '2000-01-02', 'veggie', 1, 2 ),
40 | (10, '2000-01-02', 'pork' , 2, NULL),
41 | (11, '2000-01-02', 'veggie', 3, NULL),
42 | (12, '2000-01-02', 'pork' , 4, NULL),
43 | (13, '2000-01-02', 'veggie', 2, 3 ),
44 | (14, '2000-01-03', 'veggie', 1, NULL),
45 | (15, '2000-01-03', 'pork' , 6, NULL),
46 | (16, '2000-01-03', 'veggie', 1, 1 ),
47 | (17, '2000-01-04', 'pork', 1, 1 ),
48 | (18, '2000-01-05', 'veggie', 5, 1 );
49 |
50 |
51 | CREATE VIEW SausageSizzleSummary AS
52 | SELECT Product, SaleDate, SUM(Quantity) AS Sales
53 | FROM SausageSizzle
54 | GROUP BY Product, SaleDate;
55 |
--------------------------------------------------------------------------------
/create-database/SQLFiddle/T-SQL-ape-schema.sql:
--------------------------------------------------------------------------------
1 | --- APE SCHEMA --
2 | CREATE TABLE Colours (
3 | ColourID int not null,
4 | ColourName varchar(20) not null,
5 | Comments varchar(100), -- I find this Colour strange etc.
6 | PRIMARY KEY (ColourID)
7 | );
8 | GO
9 | CREATE TABLE Friends (
10 | FriendID int not null,
11 | FirstName varchar(20),
12 | LastName varchar(20),
13 | FavColourID int,
14 | FOREIGN KEY (FavColourID) REFERENCES Colours (ColourID),
15 | PRIMARY KEY (FriendID)
16 | );
17 | GO
18 | CREATE TABLE BananaTree (
19 | TreeID int not null,
20 | Height decimal(5,2),
21 | YearPlanted int,
22 | MonthPlanted int,
23 | Width decimal(5,2),
24 | PRIMARY KEY (TreeID)
25 | );
26 | GO
27 | CREATE TABLE EatingFrom (
28 | FriendID int not null,
29 | TreeID int not null,
30 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID),
31 | FOREIGN KEY (TreeID) REFERENCES BananaTree (TreeID)
32 | );
33 | GO
34 | CREATE TABLE Banana (
35 | BananaID int not null,
36 | TasteRank int, -- from 1 to 5
37 | DatePicked date not null,
38 | DateEaten date not null,
39 | Ripe bit,
40 | TreeID int not null,
41 | Comments varchar(100),
42 | FOREIGN KEY (TreeID) REFERENCES BananaTree (TreeID),
43 | PRIMARY KEY (BananaID)
44 | );
45 | GO
46 |
47 | INSERT INTO Colours VALUES
48 | (1 ,'blue' , 'similar to sky' ),
49 | (2 ,'green' , 'bad tasting bananas'),
50 | (3 ,'yellow' , 'my favourite because banana'),
51 | (4 ,'orange' , 'no connection to orangutan'),
52 | (5 ,'red' , 'generally a bad sign'),
53 | (6 ,'purple' , 'never seen this before'),
54 | (7 ,'brown' , 'many things are brown'),
55 | (8 ,'magenta' , 'one of the primary subtractive colours'),
56 | (9 ,'pink' , 'very manly'),
57 | (10,'lime' , 'almost green'),
58 | (11,'turquoise', 'not to be confused with tortoise');
59 | GO
60 |
61 | INSERT INTO Friends VALUES
62 | (1 , 'Caesar' , 'Serkis', 3 ),
63 | (2 , 'Harambe' , 'Porter', 1 ),
64 | (3 , 'Aldo' , 'Atkins', 3 ),
65 | (4 , 'Cornelius', 'Porter', 1 ),
66 | (5 , 'Zira' , 'Porter', 4 ),
67 | (6 , 'Ishmael' , 'Serkis', 3 ),
68 | (7 , 'Monsieur' , 'Mallah', 3 ),
69 | (8 , 'Titano' , 'Atkins', 6 ),
70 | (9 , 'King' , 'Kong' , 3 ),
71 | (10, 'Bobo' , 'Kong' , 8 ),
72 | (11, 'Myster' , 'Ious' , NULL);
73 | GO
74 |
75 | INSERT INTO BananaTree VALUES
76 | (1, 5.5, 2018, 08, 31),
77 | (2, 4.3, 2018, 08, 27),
78 | (3, 4.7, 2018, 08, 36),
79 | (4, 3.8, 2018, 08, 20),
80 | (5, 6.2, 2018, 08, 40),
81 | (6, 6.4, 2016, 07, 23),
82 | (7, 5.4, 2016, 07, 32),
83 | (8, 5.2, 2016, 07, 31),
84 | (9, 4.8, 2016, 07, 19),
85 | (10, 4.5, 2015, 09, 28),
86 | (11, 5.8, 2015, 09, 35),
87 | (12, 7.5, 2015, 09, 45),
88 | (13, 6.4, 2015, 09, 30),
89 | (14, 5.0, 2015, 09, 24),
90 | (15, 5.1, 2014, 10, 34),
91 | (16, 4.2, 2014, 10, 23),
92 | (17, 5.4, 2014, 08, 39),
93 | (18, 5.2, 2014, 08, 28);
94 | GO
95 |
96 | INSERT INTO EatingFrom VALUES
97 | (3 ,1 ),
98 | (2 ,1 ),
99 | (1 ,1 ),
100 | (6 ,3 ),
101 | (10,5 ),
102 | (4 ,5 ),
103 | (9 ,5 ),
104 | (8 ,8 ),
105 | (7 ,8 ),
106 | (10,8 ),
107 | (5 ,8 ),
108 | (2 ,8 ),
109 | (3 ,8 ),
110 | (4 ,8 ),
111 | (6 ,2 ),
112 | (6 ,2 ),
113 | (6 ,2 ),
114 | (8 ,2 ),
115 | (9 ,1 ),
116 | (2 ,1 ),
117 | (1 ,1 ),
118 | (6 ,2 ),
119 | (6 ,2 ),
120 | (10,8 ),
121 | (2 ,18),
122 | (6 ,15),
123 | (7 ,11),
124 | (2 ,14),
125 | (2 ,1 );
126 | GO
127 |
128 | INSERT INTO Banana VALUES
129 | (1 , 2, '20181003', '20181004', 0, 1 , NULL),
130 | (2 , 4, '20181003', '20181004', 1, 2 , NULL),
131 | (3 , 4, '20181003', '20181004', 1, 2 , NULL),
132 | (4 , 5, '20181003', '20181006', 1, 1 , NULL),
133 | (5 , 5, '20181003', '20181006', 1, 2 , 'best banana ever'),
134 | (6 , 3, '20181003', '20181004', 1, 2 , NULL),
135 | (7 , 2, '20181002', '20181004', 0, 3 , NULL),
136 | (8 , 5, '20181002', '20181005', 1, 3 , 'smooth and delectable'),
137 | (9 , 3, '20181002', '20181003', 1, 4 , NULL),
138 | (10, 3, '20181002', '20181003', 1, 5 , NULL),
139 | (11, 2, '20181002', '20181003', 0, 5 , NULL),
140 | (12, 5, '20181002', '20181005', 1, 4 , NULL),
141 | (13, 1, '20181002', '20181002', 0, 9 , NULL),
142 | (14, 1, '20181001', '20181001', 0, 16, NULL),
143 | (15, 1, '20181001', '20181001', 0, 16, NULL),
144 | (16, 5, '20181001', '20181004', 1, 5 , 'a culinary delight'),
145 | (17, 5, '20181001', '20181004', 1, 6 , NULL),
146 | (18, 5, '20181001', '20181004', 1, 7 , NULL),
147 | (19, 5, '20181001', '20181004', 1, 8 , NULL),
148 | (20, 5, '20181001', '20181004', 1, 9 , 'soft with nutty undertones'),
149 | (21, 4, '20181001', '20181003', 1, 18, NULL),
150 | (22, 4, '20180930', '20181001', 1, 14, NULL),
151 | (23, 4, '20180930', '20181001', 1, 17, NULL),
152 | (24, 4, '20180930', '20181001', 1, 12, NULL),
153 | (25, 2, '20180930', '20181030', 1, 11, NULL),
154 | (26, 3, '20180930', '20181030', 0, 5 , 'good mid-range banana'),
155 | (27, 4, '20180930', '20181003', 1, 2 , NULL),
156 | (28, 5, '20180930', '20181003', 1, 10, NULL),
157 | (29, 5, '20180929', '20181003', 1, 11, NULL),
158 | (30, 1, '20180929', '20181001', 0, 4 , 'tasted like ape-shit'),
159 | (31, 1, '20180929', '20180929', 0, 7 , NULL),
160 | (32, 2, '20180929', '20181002', 0, 8 , NULL),
161 | (33, 5, '20180929', '20181002', 1, 12, NULL),
162 | (34, 4, '20180929', '20180930', 1, 2 , NULL),
163 | (35, 2, '20180929', '20180930', 0, 2 , NULL),
164 | (36, 3, '20180929', '20180930', 0, 18, NULL),
165 | (37, 3, '20180929', '20180930', 0, 13, NULL),
166 | (38, 4, '20180928', '20180929', 1, 15, NULL),
167 | (39, 5, '20180928', '20181001', 1, 13, NULL),
168 | (40, 2, '20180928', '20180930', 1, 12, NULL),
169 | (41, 1, '20180928', '20180928', 0, 15, NULL),
170 | (42, 1, '20180928', '20180928', 1, 12, 'had ants in it'),
171 | (43, 1, '20180928', '20180928', 0, 7 , NULL),
172 | (44, 4, '20180927', '20180929', 0, 7 , NULL),
173 | (45, 5, '20180927', '20180930', 1, 14, NULL),
174 | (46, 5, '20180927', '20180930', 1, 15, NULL),
175 | (47, 5, '20180927', '20180930', 1, 16, NULL),
176 | (48, 5, '20180927', '20180930', 1, 17, NULL),
177 | (49, 5, '20180927', '20180930', 1, 18, NULL),
178 | (50, 3, '20180927', '20180928', 1, 3 , NULL);
179 | GO
--------------------------------------------------------------------------------
/create-database/SQLFiddle/T-SQL-notes-schema.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE Friends (
2 | FriendID int not null,
3 | FirstName varchar(20),
4 | LastName varchar(20),
5 | FavColour varchar(20),
6 | PRIMARY KEY (FriendID)
7 | );
8 | GO
9 | CREATE TABLE Pets (
10 | PetID int not null,
11 | PetName varchar(20),
12 | PetDOB date,
13 | FriendID int not null,
14 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID),
15 | PRIMARY KEY (PetID)
16 | );
17 | GO
18 | CREATE TABLE Scratched (
19 | ScratcherID int not null,
20 | ScratchDate date,
21 | ScratchTime time,
22 | ScratcheeID int not null,
23 | FOREIGN KEY (ScratcherID) REFERENCES Friends (FriendID),
24 | FOREIGN KEY (ScratcheeID) REFERENCES Friends (FriendID),
25 | PRIMARY KEY (ScratcherID, ScratcheeID, ScratchDate, ScratchTime)
26 | );
27 | GO
28 | CREATE TABLE PlayCount (
29 | PetID int not null,
30 | PlayCounter int,
31 | FriendID int not null,
32 | FOREIGN KEY (PetID) REFERENCES Pets (PetID),
33 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID),
34 | PRIMARY KEY (PetID, FriendID)
35 | );
36 | GO
37 | CREATE TABLE Passports (
38 | PptNo varchar(5) not null,
39 | PptCountry varchar(20),
40 | PptExpiry date,
41 | FriendID int,
42 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID),
43 | PRIMARY KEY (PptNo)
44 | );
45 | GO
46 | CREATE TABLE Table1 (
47 | A int not null,
48 | B varchar(20),
49 | C varchar(20),
50 | PRIMARY KEY (A)
51 | );
52 | GO
53 | CREATE TABLE Table2 (
54 | D varchar(20),
55 | E int not null,
56 | A int not null,
57 | FOREIGN KEY (A) REFERENCES Table1 (A),
58 | PRIMARY KEY (E)
59 | );
60 | GO
61 | CREATE TABLE Letters (
62 | A char,
63 | B char,
64 | Num int not null,
65 | PRIMARY KEY (Num)
66 | );
67 | GO
68 | CREATE TABLE RandomPeople (
69 | PersonName varchar(20),
70 | Gender char(2),
71 | Age int
72 | );
73 | GO
74 | CREATE TABLE Houses (
75 | house_ID varchar(5) not null,
76 | house_owner varchar(50),
77 | house_address varchar(200),
78 | post_code varchar(4),
79 | house_price decimal(10,2),
80 | PRIMARY KEY (house_ID)
81 | );
82 | GO
83 | CREATE TABLE Suburbs (
84 | post_code varchar(5) not null,
85 | suburb_name varchar(100) not null,
86 | vaccination_rate decimal(3,2),
87 | PRIMARY KEY (post_code, suburb_name)
88 | );
89 | GO
90 |
91 | CREATE TABLE Numbers (
92 | Num int,
93 | NumString varchar(10)
94 | );
95 | GO
96 |
97 | CREATE TABLE EduStudy (
98 | Id varchar(6) not null,
99 | Income varchar(8),
100 | Education int,
101 | PRIMARY KEY (Id)
102 | );
103 | GO
104 |
105 | CREATE TABLE Orders (
106 | OrderID int not null,
107 | Item varchar(30),
108 | Price decimal(5,2),
109 | OrderDT datetime,
110 | PRIMARY KEY (OrderID)
111 | );
112 | GO
113 |
114 | INSERT INTO Orders VALUES
115 | (1, 'Boiled leaves' , 2.99 , '2021-12-31 15:13:00'),
116 | (2, 'Bow wow' , 15 , '2021-12-31 15:34:00'),
117 | (3, 'Cackleberry stew', 32.55, '2022-01-01 09:32:00'),
118 | (4, 'Mug of murk' , 4.40 , '2022-01-01 10:16:00');
119 | GO
120 |
121 | INSERT INTO EduStudy VALUES
122 | ('EI13', 'low', 5),
123 | ('EI122', 'low', 1),
124 | ('EI281', 'low-mid', 4),
125 | ('EI3332', 'middle', 3),
126 | ('EI4751', 'high-mid', 3),
127 | ('EI12', 'high', 2);
128 | GO
129 |
130 | INSERT INTO Numbers VALUES
131 | (111, '111'),
132 | (31, '31'),
133 | (32, '32'),
134 | (211, '211');
135 | GO
136 |
137 |
138 | INSERT INTO Houses VALUES
139 | ('H0001', 'Millard Claassen' , '7235 East Van Dyke St' , '3128', 300000),
140 | ('H0002', 'Jamie Pew' , '8914 South Sunnyslope Dr', '3128', 150000),
141 | ('H0003', 'Bethel Viviano' , '87 South West Halifax St', '3142', 400000),
142 | ('H0004', 'Brandi Hovis' , '676 Ocean St' , '3142', 360000),
143 | ('H0005', 'Mei Colby' , '62 West Park Ave' , '3556', 220000),
144 | ('H0006', 'Marilu Munz' , '62 Elm Road' , '3083', 120000),
145 | ('H0007', 'Rhiannon Carwile' , '7005 Anderson Ave' , '3779', 500000),
146 | ('H0008', 'Joycelyn Hamburger' , '7410 Park Drive' , '3778', 550000),
147 | ('H0009', 'Leopoldo Flaherty' , '3 Dogwood Dr' , '3083', 1200000),
148 | ('H0010', 'Xavier Farrer' , '767 Rockville Street' , '3083', 100000),
149 | ('H0011', 'Waldo Wingboard' , '8712 Thorne Street' , NULL, 640000),
150 | ('H0012', 'Jimmy Jenkins' , '32 Rosey Cres' , NULL, 70000);
151 | GO
152 |
153 | -- There is no primary / foreign key pair for Houses and Suburbs.
154 | -- The primary key of suburbs is not as may be expected (not post_code).
155 | -- 3218 connects to 2 suburbs
156 | -- some houses have NULL suburb
157 | -- 3142 has no corresponding suburb
158 | -- the data type for post_code in suburb is varchar(6), one suburb has postcode '33128'
159 |
160 | INSERT INTO Suburbs VALUES
161 | ('3128' , 'Erebor' , 0.8),
162 | ('33128', 'Erberor' , 0.8),
163 | ('3128' , 'Fangorn' , 0.2),
164 | ('3779' , 'Durin' , 0.4),
165 | ('3556' , 'Gondor' , 0.65),
166 | ('3083' , 'Isengaard', 0.35);
167 | GO
168 |
169 | INSERT INTO Friends VALUES
170 | (1, 'X', 'A', 'red'),
171 | (2, 'Y', 'B', 'blue'),
172 | (3, 'Z', 'C', NULL);
173 | GO
174 | INSERT INTO Pets VALUES
175 | (1, 'Chikin', '20160924', 2),
176 | (2, 'Cauchy', '20120301', 3),
177 | (3, 'Gauss', '20120301', 3);
178 | GO
179 | INSERT INTO Scratched VALUES
180 | (1, '20180905', '12:00', 2),
181 | (1, '20180905', '12:30', 3),
182 | (2, '20180906', '11:00', 1),
183 | (3, '20180907', '10:00', 1),
184 | (2, '20180801', '16:15', 1),
185 | (2, '20180801', '13:00', 3),
186 | (1, '20170214', '04:30', 2),
187 | (3, '20200128', '18:00', 1);
188 | GO
189 | INSERT INTO PlayCount VALUES
190 | (1, 3, 1),
191 | (1, 5, 2),
192 | (3, 4, 2);
193 | GO
194 | INSERT INTO Passports VALUES
195 | ('E1321', 'Australia', '20210312', NULL),
196 | ('LA123', 'New Zealand', '20320901', 2),
197 | ('S9876', 'Monaco', '20280619', 3);
198 |
199 | GO
200 | INSERT INTO Table1 (
201 | A, B, C
202 | ) VALUES
203 | (1, 'Ignorance', 'is'),
204 | (2, 'War', 'is'),
205 | (3, 'Freedom', 'is'),
206 | (4, 'Friendship', 'is');
207 | GO
208 |
209 | INSERT INTO Table2 (
210 | D, E, A
211 | ) VALUES
212 | ('slavery.', 3, 1),
213 | ('weakness.', 4, 2),
214 | ('strength.', 1, 3),
215 | ('peace.', 2, 4);
216 | GO
217 |
218 | INSERT INTO Letters VALUES
219 | ('a', 'b', 1),
220 | ('a', 'c', 2),
221 | ('a', 'b', 3),
222 | ('a', 'c', 4);
223 | GO
224 |
225 | INSERT INTO RandomPeople VALUES
226 | ('Beyonce', 'F', 37),
227 | ('Laura Marling', 'F', 28),
228 | ('Darren Hayes', 'M', 46),
229 | ('Bret McKenzie', 'M', 42),
230 | ('Jack Monroe', 'NB', 30);
--------------------------------------------------------------------------------
/create-database/T-SQL/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/create-database/T-SQL/.Rhistory
--------------------------------------------------------------------------------
/create-database/T-SQL/README.md:
--------------------------------------------------------------------------------
1 | # T-SQL create database guide
2 |
3 | This is a guide to installing a T-SQL server on Windows or Mac, so you can start using T-SQL.
4 |
5 | This will guide you through **installing the server and editor** and then **creating the databases**. The editor we will use is Azure Data Studio.
6 |
7 | ## Windows
8 |
9 | ### Windows: installing the server and editor
10 |
11 | 1. Navigate to [Azure Data Studio](https://docs.microsoft.com/en-us/sql/azure-data-studio/download-azure-data-studio) and download the 'User Installer' for windows.
12 | 2. Run the installer, clicking through to accept all the default options.
13 | 3. Launch Azure Data Studio and click 'Deploy a Server'.
14 | 4. A window opens asking you to select deployment options. Choose 'SQL Server on Windows' and click 'Select'.
15 | 5. The deployment prerequisites window opens. Click 'Select'. This will cause SQL Server 2019 Developer Edition to download and begin installing.
16 | 6. In the SQL Server installer window, choose installation type 'Basic'.
17 | 7. Read and accept the terms and conditions.
18 | 8. The install location window opens. Leave the default install location. Click 'install'.
19 | 9. Once installation completes, click 'Close'.
20 | 10. In Azure Data Studio click 'Create a connection'.
21 | 11. The 'Connection Details' panel opens. Make sure the Connection type is 'Microsoft SQL Server' and the Authentication type is 'Windows Authentication'. For Server, type 'localhost'. Leave the username and password blank. Click 'Connect'.
22 | 12. Azure Data Studio is now connected to your SQL Server.
23 |
24 | ### Windows: creating the databases
25 |
26 | 13. **Right click** [this link to the T-SQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/T-SQL/T-SQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'T-SQL-database.sql'.
27 | 14. Find 'T-SQL-database.sql' on your computer and click and drag it into the Azure Data Studio window.
28 | 15. Click the green play button to execute the script, or press F5.
29 | 16. The databases have now been created.
30 |
31 | ## Mac
32 |
33 | ### Mac: installing the server and editor
34 |
35 | 1. Navigate to [Azure Data Studio](https://docs.microsoft.com/en-us/sql/azure-data-studio/download-azure-data-studio) and download the '.zip file' for macOS.
36 | 2. Extract 'Azure Data Studio' (app file) from the zip file. This should happen automatically after your download.
37 | 3. Drag 'Azure Data Studio' (app file) to the Applications folder.
38 | 4. The remainder of the process involves installing Docker and using it to pull a SQL Server container. Follow [the guide here](https://database.guide/how-to-install-sql-server-on-a-mac/).
39 | 5. Once you have connected Azure Data Studio to a local SQL Server 2019 instance, you are done.
40 |
41 | ### Mac: creating the databases
42 |
43 | 6. **Control-click** [this link to the T-SQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/T-SQL/T-SQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'T-SQL-database.sql'.
44 | 7. Find 'T-SQL-database.sql' on your computer and click and drag it into the Azure Data Studio window.
45 | 8. Click the green play button to execute the script, or press F5.
46 | 9. The databases have now been created.
47 |
--------------------------------------------------------------------------------
/create-database/T-SQL/T-SQL-database.sql:
--------------------------------------------------------------------------------
1 | -------------------------------------------------------------------
2 |
3 | -- Practice data for the Intro to SQL Course by Daniel Fryer.
4 |
5 | -------------------------------------------------------------------
6 |
7 | -------------------------------------------------------------------
8 | -- CREATE IDI_CLEAN DATABASE -------------------------------------
9 | -------------------------------------------------------------------
10 | USE master;
11 | GO
12 |
13 | DROP DATABASE IF EXISTS IDI_Clean;
14 | GO
15 |
16 | CREATE DATABASE IDI_Clean;
17 | GO -- GO is an T-SQL batch terminator
18 |
19 | USE IDI_Clean;
20 | GO
21 |
22 | -- DIA (Department of Internal Affairs)
23 | CREATE SCHEMA DIA_clean;
24 | GO
25 |
26 | CREATE TABLE DIA_Clean.births (
27 | snz_uid int not null UNIQUE,
28 | snz_dia_uid int not null UNIQUE,
29 | parent1_snz_uid int null,
30 | parent1_snz_dia_uid int null,
31 | dia_bir_parent1_sex_snz_code varchar(100) null,
32 | dia_bir_parent1_occupation_text varchar(60) null,
33 | parent2_snz_uid int null,
34 | parent2_snz_dia_uid int null,
35 | dia_bir_parent2_sex_snz_code varchar(100) null,
36 | dia_bir_parent2_occupation_text varchar(60) null,
37 | dia_bir_birth_month_nbr tinyint null,
38 | dia_bir_birth_year_nbr smallint null,
39 | dia_bir_sex_snz_code varchar(100) null,
40 | dia_bir_ethnic_grp1_snz_ind bit not null, -- European
41 | dia_bir_ethnic_grp2_snz_ind bit not null, -- Maori
42 | dia_bir_ethnic_grp3_snz_ind bit not null, -- Pacific
43 | dia_bir_ethnic_grp4_snz_ind bit not null, -- Asian
44 | dia_bir_ethnic_grp5_snz_ind bit not null, -- MELAA
45 | dia_bir_ethnic_grp6_snz_ind bit not null); -- Other
46 | GO
47 | CREATE TABLE DIA_Clean.deaths (
48 | snz_uid int not null UNIQUE,
49 | snz_dia_uid int not null UNIQUE,
50 | dia_dth_death_month_nbr tinyint null,
51 | dia_dth_death_year_nbr smallint null,
52 | dia_dth_last_seen_month_nbr tinyint null,
53 | dia_dth_last_seen_year_nbr smallint null,);
54 | GO
55 | CREATE TABLE DIA_Clean.marriages (
56 | partnr1_snz_uid int not null UNIQUE,
57 | partnr1_snz_dia_uid int not null UNIQUE,
58 | partnr2_snz_uid int not null UNIQUE,
59 | partnr2_snz_dia_uid int not null UNIQUE,
60 | dia_mar_partnr1_birth_month_nbr tinyint null,
61 | dia_mar_partnr1_birth_year_nbr smallint null,
62 | dia_mar_partnr1_sex_snz_code varchar(100) null,
63 | dia_mar_partnr1_occupation_text varchar(60) null,
64 | dia_mar_partnr2_birth_month_nbr tinyint null,
65 | dia_mar_partnr2_birth_year_nbr smallint null,
66 | dia_mar_partnr2_sex_snz_code varchar(100) null,
67 | dia_mar_partnr2_occupation_text varchar(60) null);
68 | GO
69 | CREATE TABLE DIA_Clean.civil_unions (
70 | partnr1_snz_uid int not null UNIQUE,
71 | partnr1_snz_dia_uid int not null UNIQUE,
72 | partnr2_snz_uid int not null UNIQUE,
73 | partnr2_snz_dia_uid int not null UNIQUE,
74 | dia_civ_partnr1_birth_month_nbr tinyint null,
75 | dia_civ_partnr1_birth_year_nbr smallint null,
76 | dia_civ_partnr1_sex_snz_code varchar(100) null,
77 | dia_civ_partnr1_occupation_text varchar(60) null,
78 | dia_civ_partnr2_birth_month_nbr tinyint null,
79 | dia_civ_partnr2_birth_year_nbr smallint null,
80 | dia_civ_partnr2_sex_snz_code varchar(100) null,
81 | dia_civ_partnr2_occupation_text varchar(60) null);
82 | GO
83 |
84 | CREATE SCHEMA [security];
85 | GO
86 | GO
87 |
88 | CREATE TABLE security.concordance (
89 | snz_uid int not null UNIQUE,
90 | snz_dia_uid int,
91 | snz_ird_uid int,
92 | snz_moh_uid int,
93 | snz_cen_uid int,
94 | snz_acc_uid int,
95 | snz_dol_uid int,
96 | snz_in_spine bit not null);
97 | GO
98 |
99 | CREATE SCHEMA [data];
100 | GO
101 |
102 | -- Researchers may wish to exclude people who
103 | -- were overseas
104 | CREATE TABLE [data].person_overseas_spell (
105 | snz_uid int not null UNIQUE,
106 | pos_applied_date date null,
107 | pos_ceased_date date null);
108 | GO
109 |
110 | -- The column with 0 or 1 indicates
111 | -- if an individual is in the spine
112 | CREATE TABLE [data].personal_detail (
113 | snz_uid int not null UNIQUE,
114 | snz_in_spine int not null
115 | );
116 | GO
117 |
118 | -- In this table there should be
119 | -- one row for each unique combination
120 | -- of snz_uid and year
121 | CREATE TABLE [data].snz_res_pop (
122 | snz_uid int not null UNIQUE,
123 | year int not null
124 | );
125 | GO
126 |
127 | -- I CREATED THESE EMPTY TABLES SO THE TABLE
128 | -- NAMES WOULD MATCH SOME OF THOSE IN THE
129 | -- DATA SCHEMA
130 | CREATE TABLE [data].source_ranked_ethnicity (
131 | snz_uid int not null UNIQUE,
132 | dummy1 int,
133 | dummy2 int,
134 | dummy3 int);
135 | GO
136 | CREATE TABLE [data].income_tax_yr_summary (
137 | snz_uid int not null UNIQUE,
138 | dummy1 int,
139 | dummy2 int,
140 | dummy3 int);
141 | GO
142 | CREATE TABLE [data].income_cal_yr_summary (
143 | snz_uid int not null UNIQUE,
144 | dummy1 int,
145 | dummy2 int,
146 | dummy3 int);
147 | GO
148 | CREATE TABLE [data].address_notification (
149 | snz_uid int not null UNIQUE,
150 | dummy1 int,
151 | dummy2 int,
152 | dummy3 int);
153 | GO
154 |
155 | CREATE SCHEMA ACC_Clean;
156 | GO
157 |
158 | CREATE TABLE ACC_Clean.Medical_Codes (
159 | snz_acc_claim_uid int not null,
160 | acc_med_injury_count_nbr smallint,
161 | acc_med_read_code varchar(6),
162 | acc_med_read_code_text varchar(255),
163 | acc_med_injury_precedence_nbr int not null
164 | );
165 | GO
166 |
167 | CREATE TABLE ACC_Clean.Serious_Injury (
168 | snz_uid int not null,
169 | snz_acc_uid int not null,
170 | snz_employee_ird_uid int null,
171 | snz_employer_ird_uid int null,
172 | acc_cla_accident_date date null,
173 | acc_cla_claim_costs_to_date_ex_gst_amt decimal(13,2),
174 | acc_cla_ethnic_grp1_snz_uid bit not null,
175 | acc_cla_ethnic_grp2_snz_uid bit not null,
176 | acc_cla_ethnic_grp3_snz_uid bit not null,
177 | acc_cla_ethnic_grp4_snz_uid bit not null,
178 | acc_cla_ethnic_grp5_snz_uid bit not null,
179 | acc_cla_ethnic_grp6_snz_uid bit not null,
180 | snz_acc_claim_uid int not null,
181 | acc_cla_meshblock_code varchar(7) null
182 | );
183 | GO
184 |
185 | INSERT INTO ACC_Clean.Medical_Codes
186 | ( snz_acc_claim_uid,
187 | acc_med_injury_count_nbr,
188 | acc_med_read_code,
189 | acc_med_read_code_text,
190 | acc_med_injury_precedence_nbr)
191 | VALUES
192 | (1 ,2, 'a', 'broken leg' ,1 ),
193 | (2 ,3, 'b', 'popped out eyeball',2 ),
194 | (3 ,1, 'a', 'broken leg' ,1 ),
195 | (4 ,2, 'a', 'broken leg' ,1 ),
196 | (5 ,3, 'b', 'popped out eyeball',1 ),
197 | (6 ,4, 'b', 'popped out eyeball',2 ),
198 | (7 ,2, 'a', 'broken leg' ,2 ),
199 | (8 ,2, 'c', 'exploded lung' ,3 ),
200 | (9 ,3, 'c', 'exploded lung' ,3 ),
201 | (10,3, 'c', 'exploded lung' ,3 );
202 | GO
203 |
204 | INSERT INTO ACC_Clean.Serious_Injury
205 | ( snz_uid ,
206 | snz_acc_uid ,
207 | snz_employee_ird_uid ,
208 | snz_employer_ird_uid ,
209 | acc_cla_accident_date ,
210 | acc_cla_claim_costs_to_date_ex_gst_amt ,
211 | acc_cla_ethnic_grp1_snz_uid ,
212 | acc_cla_ethnic_grp2_snz_uid ,
213 | acc_cla_ethnic_grp3_snz_uid ,
214 | acc_cla_ethnic_grp4_snz_uid ,
215 | acc_cla_ethnic_grp5_snz_uid ,
216 | acc_cla_ethnic_grp6_snz_uid ,
217 | snz_acc_claim_uid ,
218 | acc_cla_meshblock_code )
219 | VALUES
220 | (10, 29 ,12 ,42 , '20160901', 15000 ,1 ,0 ,0 ,1 ,0 ,1 ,1 , 'MZ321' ),
221 | (2 , 23 ,14 ,32 , '20160912', 12000 ,1 ,0 ,0 ,0 ,0 ,0 ,2 , 'KL653' ),
222 | (1 , 22 ,17 ,32 , '20160913', 130000 ,0 ,1 ,0 ,0 ,0 ,0 ,3 , 'DF24' ),
223 | (4 , 21 ,18 ,54 , '20160923', 132000 ,0 ,1 ,0 ,0 ,0 ,0 ,4 , 'EW321' ),
224 | (7 , 26 ,12 ,65 , '20160902', 23000 ,0 ,1 ,0 ,0 ,0 ,0 ,5 , 'EW321' ),
225 | (9 , 25 ,19 ,65 , '20160921', 32000 ,0 ,0 ,0 ,1 ,0 ,0 ,6 , 'KL432' ),
226 | (5 , 24 ,19 ,23 , '20160918', 500 ,1 ,0 ,0 ,0 ,0 ,0 ,7 , 'EW234' ),
227 | (8 , 28 ,15 ,42 , '20160916', 120 ,0 ,0 ,0 ,0 ,1 ,0 ,8 , 'FD432' ),
228 | (6 , 27 ,14 ,42 , '20160918', 130 ,0 ,1 ,0 ,0 ,0 ,0 ,9 , 'HFD432'),
229 | (3 , 20 ,12 ,42 , '20160919', 45000 ,1 ,1 ,0 ,0 ,0 ,0 ,10 , 'FGV432');
230 | GO
231 |
232 | INSERT INTO security.concordance (
233 | snz_uid ,
234 | snz_dia_uid ,
235 | snz_ird_uid ,
236 | snz_moh_uid ,
237 | snz_cen_uid ,
238 | snz_acc_uid ,
239 | snz_dol_uid ,
240 | snz_in_spine)
241 | VALUES
242 | (10 ,34 , NULL , NULL, NULL ,29 ,NULL, 1),
243 | (2 ,55 , NULL , NULL, NULL ,23 ,NULL,1),
244 | (1 ,32 , NULL , NULL, NULL ,22 ,NULL,1),
245 | (4 ,1 , NULL , NULL, NULL ,21 ,NULL,1),
246 | (7 ,67 , NULL , NULL, NULL ,26 ,NULL,1),
247 | (9 ,NULL , 3 , NULL, NULL ,25 ,NULL,0),
248 | (5 ,32 , 1 , NULL, NULL ,24 ,NULL,0),
249 | (8 ,43 , 2 , NULL, NULL ,28 ,NULL,1),
250 | (6 ,23 , 4 , NULL, NULL ,27 ,NULL,1),
251 | (3 ,123 , 6 , NULL, NULL ,20 ,NULL,1),
252 | (11 ,NULL , 7 , NULL, NULL ,NULL,NULL,1),
253 | (12 ,65 , 5 , NULL, NULL ,NULL,NULL,0),
254 | (13 ,NULL , 10 , NULL, NULL ,6 ,NULL,1),
255 | (14 ,NULL , 12 , NULL, NULL ,1 ,NULL,1),
256 | (15 ,NULL , 43 , NULL, NULL ,NULL,NULL,1),
257 | (16 ,765 , 44 , NULL, NULL ,NULL,NULL,0),
258 | (17 ,NULL , 34 , NULL, NULL ,7 ,NULL,1),
259 | (18 ,76 , 100 , NULL, NULL ,NULL,NULL,0),
260 | (19 ,NULL , 101 , NULL, 32 ,3 ,NULL,0);
261 | GO
262 |
263 | --CREATE TABLE dia_clean.births (
264 | -- snz_uid int not null UNIQUE,
265 | -- snz_dia_uid int not null UNIQUE,
266 | -- parent1_snz_uid int null,
267 | -- parent1_snz_dia_uid int null,
268 | -- dia_bir_parent1_sex_snz_code varchar(100) null,
269 | -- dia_bir_parent1_occupation_text varchar(60) null,
270 | -- parent2_snz_uid int null,
271 | -- parent2_snz_dia_uid int null,
272 | -- dia_bir_parent2_sex_snz_code varchar(100) null,
273 | -- dia_bir_parent2_occupation_text varchar(60) null,
274 | -- dia_bir_birth_month_nbr tinyint null,
275 | -- dia_bir_birth_year_nbr smallint null,
276 | -- dia_bir_sex_snz_code varchar(100) null,
277 | -- dia_bir_ethnic_grp1_snz_ind bit not null, -- European
278 | -- dia_bir_ethnic_grp2_snz_ind bit not null, -- Maori
279 | -- dia_bir_ethnic_grp3_snz_ind bit not null, -- Pacific
280 | -- dia_bir_ethnic_grp4_snz_ind bit not null, -- Asian
281 | -- dia_bir_ethnic_grp5_snz_ind bit not null, -- MELAA
282 | -- dia_bir_ethnic_grp6_snz_ind bit not null); -- Other
283 | --GO
284 | --CREATE TABLE dia_clean.deaths (
285 | -- snz_uid int not null UNIQUE,
286 | -- snz_dia_uid int not null UNIQUE,
287 | -- dia_dth_death_month_nbr tinyint null,
288 | -- dia_dth_death_year_nbr smallint null,
289 | -- dia_dth_last_seen_month_nbr tinyint null,
290 | -- dia_dth_last_seen_year_nbr smallint null,);
291 | --GO
292 | --CREATE TABLE dia_clean.marriages (
293 | -- partnr1_snz_uid int not null UNIQUE,
294 | -- partnr1_snz_dia_uid int not null UNIQUE,
295 | -- partnr2_snz_uid int not null UNIQUE,
296 | -- partnr2_snz_dia_uid int not null UNIQUE,
297 | -- dia_mar_partnr1_birth_month_nbr tinyint null,
298 | -- dia_mar_partnr1_birth_year_nbr smallint null,
299 | -- dia_mar_partnr1_sex_snz_code varchar(100) null,
300 | -- dia_mar_partnr1_occupation_text varchar(60) null,
301 | -- dia_mar_partnr2_birth_month_nbr tinyint null,
302 | -- dia_mar_partnr2_birth_year_nbr smallint null,
303 | -- dia_mar_partnr2_sex_snz_code varchar(100) null,
304 | -- dia_mar_partnr2_occupation_text varchar(60) null);
305 | --GO
306 | --CREATE TABLE dia_clean.civil_unions (
307 | -- partnr1_snz_uid int not null UNIQUE,
308 | -- partnr1_snz_dia_uid int not null UNIQUE,
309 | -- partnr2_snz_uid int not null UNIQUE,
310 | -- partnr2_snz_dia_uid int not null UNIQUE,
311 | -- dia_civ_partnr1_birth_month_nbr tinyint null,
312 | -- dia_civ_partnr1_birth_year_nbr smallint null,
313 | -- dia_civ_partnr1_sex_snz_code varchar(100) null,
314 | -- dia_civ_partnr1_occupation_text varchar(60) null,
315 | -- dia_civ_partnr2_birth_month_nbr tinyint null,
316 | -- dia_civ_partnr2_birth_year_nbr smallint null,
317 | -- dia_civ_partnr2_sex_snz_code varchar(100) null,
318 | -- dia_civ_partnr2_occupation_text varchar(60) null);
319 | --GO
320 |
321 | -- INSERT INTO DIA_Clean.civil_unions VALUES (10, 34, 6 , 23 , 1, 1975, 1, NULL, 1, 1976, 1, NULL);
322 | -- INSERT INTO DIA_Clean.civil_unions VALUES (2, 55, 3 , 123 , 2, 1966, 0, NULL, 6, 1969, 1, NULL);
323 | -- INSERT INTO DIA_Clean.civil_unions VALUES (1, 32, 12, 65 , 5, 1977, 0, NULL, 4, 1973, 1, NULL);
324 | -- INSERT INTO DIA_Clean.civil_unions VALUES (4, 1, 16, 765 , 5, 1988, 1, NULL, 4, 1989, 0, NULL);
325 | -- INSERT INTO DIA_Clean.civil_unions VALUES (7, 67, 18, 76 , 9, 1999, 0, NULL, 12, 1995, 0, NULL);
326 | -- GO
327 |
328 | -----------------------------------------------------------------
329 | -- CREATE IDI_Metadata DATABASE ---------------------------------
330 | -----------------------------------------------------------------
331 | USE master;
332 | GO
333 |
334 | DROP DATABASE IF EXISTS IDI_Metadata;
335 | GO
336 |
337 | CREATE DATABASE IDI_Metadata;
338 | GO
339 |
340 | USE IDI_Metadata;
341 | GO
342 |
343 | CREATE SCHEMA clean_read_CLASSIFICATIONS;
344 | GO
345 |
346 | -- I'm not sure what the table names or
347 | -- variable names are in here so these
348 | -- are just made up to simulate the functionality
349 | CREATE TABLE clean_read_CLASSIFICATIONS.ethnicity (
350 | ethnic_grp int not null, --a number from 1 to 6
351 | description varchar(100) not null
352 | );
353 | GO
354 |
355 | CREATE TABLE clean_read_CLASSIFICATIONS.post_codes (
356 | post_code int not null,
357 | description varchar(100)
358 | );
359 | GO
360 |
361 | INSERT INTO clean_read_CLASSIFICATIONS.ethnicity
362 | (ethnic_grp, description)
363 | VALUES
364 | (1, 'European'),
365 | (2, 'Maori'),
366 | (3, 'Pacific Peoples'),
367 | (4, 'Asian'),
368 | (5, 'Middle Eastern/Latin American/African'),
369 | (6, 'Other ethnicity');
370 |
371 | -----------------------------------------------------------------
372 | -- CREATE OLDER IDI_Clean REFRESH DATABASE ----------------------
373 | -----------------------------------------------------------------
374 |
375 | --INSERT INTO security.concordance (
376 | -- snz_uid ,
377 | -- snz_dia_uid ,
378 | -- snz_ird_uid ,
379 | -- snz_moh_uid ,
380 | -- snz_cen_uid ,
381 | -- snz_acc_uid ,
382 | -- snz_dol_uid ,
383 | -- snz_in_spine)
384 | --VALUES
385 | --(13, 34 , NULL , NULL, NULL ,NULL,NULL, 1),
386 | --(21 ,55 , NULL , NULL, NULL ,2 ,NULL,1),
387 | --(34 ,32 , NULL , NULL, NULL ,NULL,NULL,1),
388 | --(24 ,1 , NULL , NULL, NULL ,4 ,NULL,1),
389 | --(55 ,67 , NULL , NULL, NULL ,5 ,NULL,1),
390 | --(7 ,NULL , 3 , NULL, NULL ,NULL,NULL,0),
391 | --(17 ,32 , 1 , NULL, NULL ,NULL,NULL,0),
392 | --(28 ,43 , 2 , NULL, NULL ,8 ,NULL,1),
393 | --(59 ,23 , 4 , NULL, NULL ,9 ,NULL,1),
394 | --(1237,123 , 6 , NULL, NULL ,10 ,NULL,1),
395 | --(121,NULL , 7 , NULL, NULL ,NULL,NULL,1),
396 | --(345,65 , 5 , NULL, NULL ,NULL,NULL,0),
397 | --(765, NULL , 10 , NULL, NULL ,6 ,NULL,1),
398 | --(1432, NULL , 12 , NULL, NULL ,1 ,NULL,1),
399 | --(873, NULL , 43 , NULL, NULL ,NULL,NULL,1),
400 | --(3,765 , 44 , NULL, NULL ,NULL,NULL,0),
401 | --(5,NULL , 34 , NULL, NULL ,7 ,NULL,1),
402 | --(78,76 , 100 , NULL, NULL ,NULL,NULL,0),
403 | --(1,NULL , 101 , NULL, 32 ,3 ,NULL,0);
404 | --GO
405 |
406 | -----------------------------------------------------------------
407 | -- CREATE Sandpit DATABASE --------------------------------------
408 | -------------------------------------------------------------- ---
409 | USE master;
410 | GO
411 |
412 | DROP DATABASE IF EXISTS Sandpit;
413 | GO
414 |
415 | CREATE DATABASE Sandpit;
416 | GO
417 |
418 | USE Sandpit;
419 | GO
420 |
421 | --- APE SCHEMA --
422 | CREATE SCHEMA Ape;
423 | GO
424 | CREATE TABLE Ape.Colours (
425 | ColourID int not null,
426 | ColourName varchar(20) not null,
427 | Comments varchar(100), -- I find this Colour strange etc.
428 | PRIMARY KEY (ColourID)
429 | );
430 | GO
431 | CREATE TABLE Ape.Friends (
432 | FriendID int not null,
433 | FirstName varchar(20),
434 | LastName varchar(20),
435 | FavColourID int,
436 | FOREIGN KEY (FavColourID) REFERENCES Ape.Colours (ColourID),
437 | PRIMARY KEY (FriendID)
438 | );
439 | GO
440 | CREATE TABLE Ape.BananaTree (
441 | TreeID int not null,
442 | Height decimal(5,2),
443 | YearPlanted int,
444 | MonthPlanted int,
445 | Width decimal(5,2),
446 | PRIMARY KEY (TreeID)
447 | );
448 | GO
449 | CREATE TABLE Ape.EatingFrom (
450 | FriendID int not null,
451 | TreeID int not null,
452 | FOREIGN KEY (FriendID) REFERENCES Ape.Friends (FriendID),
453 | FOREIGN KEY (TreeID) REFERENCES Ape.BananaTree (TreeID)
454 | );
455 | GO
456 | CREATE TABLE Ape.Banana (
457 | BananaID int not null,
458 | TasteRank int, -- from 1 to 5
459 | DatePicked date not null,
460 | DateEaten date not null,
461 | Ripe bit,
462 | TreeID int not null,
463 | Comments varchar(100),
464 | FOREIGN KEY (TreeID) REFERENCES Ape.BananaTree (TreeID),
465 | PRIMARY KEY (BananaID)
466 | );
467 | GO
468 |
469 | INSERT INTO Ape.Colours VALUES
470 | (1 ,'blue' , 'similar to sky' ),
471 | (2 ,'green' , 'bad tasting bananas'),
472 | (3 ,'yellow' , 'my favourite because banana'),
473 | (4 ,'orange' , 'no connection to orangutan'),
474 | (5 ,'red' , 'generally a bad sign'),
475 | (6 ,'purple' , 'never seen this before'),
476 | (7 ,'brown' , 'many things are brown'),
477 | (8 ,'magenta' , 'one of the primary subtractive colours'),
478 | (9 ,'pink' , 'very manly'),
479 | (10,'lime' , 'almost green'),
480 | (11,'turquoise', 'not to be confused with tortoise');
481 | GO
482 |
483 | INSERT INTO Ape.Friends VALUES
484 | (1 , 'Caesar' , 'Serkis', 3 ),
485 | (2 , 'Harambe' , 'Porter', 1 ),
486 | (3 , 'Aldo' , 'Atkins', 3 ),
487 | (4 , 'Cornelius', 'Porter', 1 ),
488 | (5 , 'Zira' , 'Porter', 4 ),
489 | (6 , 'Ishmael' , 'Serkis', 3 ),
490 | (7 , 'Monsieur' , 'Mallah', 3 ),
491 | (8 , 'Titano' , 'Atkins', 6 ),
492 | (9 , 'King' , 'Kong' , 3 ),
493 | (10, 'Bobo' , 'Kong' , 8 ),
494 | (11, 'Myster' , 'Ious' , NULL);
495 | GO
496 |
497 | INSERT INTO Ape.BananaTree VALUES
498 | (1, 5.5, 2018, 08, 31),
499 | (2, 4.3, 2018, 08, 27),
500 | (3, 4.7, 2018, 08, 36),
501 | (4, 3.8, 2018, 08, 20),
502 | (5, 6.2, 2018, 08, 40),
503 | (6, 6.4, 2016, 07, 23),
504 | (7, 5.4, 2016, 07, 32),
505 | (8, 5.2, 2016, 07, 31),
506 | (9, 4.8, 2016, 07, 19),
507 | (10, 4.5, 2015, 09, 28),
508 | (11, 5.8, 2015, 09, 35),
509 | (12, 7.5, 2015, 09, 45),
510 | (13, 6.4, 2015, 09, 30),
511 | (14, 5.0, 2015, 09, 24),
512 | (15, 5.1, 2014, 10, 34),
513 | (16, 4.2, 2014, 10, 23),
514 | (17, 5.4, 2014, 08, 39),
515 | (18, 5.2, 2014, 08, 28);
516 | GO
517 |
518 | INSERT INTO Ape.EatingFrom VALUES
519 | (3 ,1 ),
520 | (2 ,1 ),
521 | (1 ,1 ),
522 | (6 ,3 ),
523 | (10,5 ),
524 | (4 ,5 ),
525 | (9 ,5 ),
526 | (8 ,8 ),
527 | (7 ,8 ),
528 | (10,8 ),
529 | (5 ,8 ),
530 | (2 ,8 ),
531 | (3 ,8 ),
532 | (4 ,8 ),
533 | (6 ,2 ),
534 | (6 ,2 ),
535 | (6 ,2 ),
536 | (8 ,2 ),
537 | (9 ,1 ),
538 | (2 ,1 ),
539 | (1 ,1 ),
540 | (6 ,2 ),
541 | (6 ,2 ),
542 | (10,8 ),
543 | (2 ,18),
544 | (6 ,15),
545 | (7 ,11),
546 | (2 ,14),
547 | (2 ,1 );
548 | GO
549 |
550 | INSERT INTO Ape.Banana VALUES
551 | (1 , 2, '20181003', '20181004', 0, 1 , NULL),
552 | (2 , 4, '20181003', '20181004', 1, 2 , NULL),
553 | (3 , 4, '20181003', '20181004', 1, 2 , NULL),
554 | (4 , 5, '20181003', '20181006', 1, 1 , NULL),
555 | (5 , 5, '20181003', '20181006', 1, 2 , 'best banana ever'),
556 | (6 , 3, '20181003', '20181004', 1, 2 , NULL),
557 | (7 , 2, '20181002', '20181004', 0, 3 , NULL),
558 | (8 , 5, '20181002', '20181005', 1, 3 , 'smooth and delectable'),
559 | (9 , 3, '20181002', '20181003', 1, 4 , NULL),
560 | (10, 3, '20181002', '20181003', 1, 5 , NULL),
561 | (11, 2, '20181002', '20181003', 0, 5 , NULL),
562 | (12, 5, '20181002', '20181005', 1, 4 , NULL),
563 | (13, 1, '20181002', '20181002', 0, 9 , NULL),
564 | (14, 1, '20181001', '20181001', 0, 16, NULL),
565 | (15, 1, '20181001', '20181001', 0, 16, NULL),
566 | (16, 5, '20181001', '20181004', 1, 5 , 'a culinary delight'),
567 | (17, 5, '20181001', '20181004', 1, 6 , NULL),
568 | (18, 5, '20181001', '20181004', 1, 7 , NULL),
569 | (19, 5, '20181001', '20181004', 1, 8 , NULL),
570 | (20, 5, '20181001', '20181004', 1, 9 , 'soft with nutty undertones'),
571 | (21, 4, '20181001', '20181003', 1, 18, NULL),
572 | (22, 4, '20180930', '20181001', 1, 14, NULL),
573 | (23, 4, '20180930', '20181001', 1, 17, NULL),
574 | (24, 4, '20180930', '20181001', 1, 12, NULL),
575 | (25, 2, '20180930', '20181030', 1, 11, NULL),
576 | (26, 3, '20180930', '20181030', 0, 5 , 'good mid-range banana'),
577 | (27, 4, '20180930', '20181003', 1, 2 , NULL),
578 | (28, 5, '20180930', '20181003', 1, 10, NULL),
579 | (29, 5, '20180929', '20181003', 1, 11, NULL),
580 | (30, 1, '20180929', '20181001', 0, 4 , 'tasted like ape-shit'),
581 | (31, 1, '20180929', '20180929', 0, 7 , NULL),
582 | (32, 2, '20180929', '20181002', 0, 8 , NULL),
583 | (33, 5, '20180929', '20181002', 1, 12, NULL),
584 | (34, 4, '20180929', '20180930', 1, 2 , NULL),
585 | (35, 2, '20180929', '20180930', 0, 2 , NULL),
586 | (36, 3, '20180929', '20180930', 0, 18, NULL),
587 | (37, 3, '20180929', '20180930', 0, 13, NULL),
588 | (38, 4, '20180928', '20180929', 1, 15, NULL),
589 | (39, 5, '20180928', '20181001', 1, 13, NULL),
590 | (40, 2, '20180928', '20180930', 1, 12, NULL),
591 | (41, 1, '20180928', '20180928', 0, 15, NULL),
592 | (42, 1, '20180928', '20180928', 1, 12, 'had ants in it'),
593 | (43, 1, '20180928', '20180928', 0, 7 , NULL),
594 | (44, 4, '20180927', '20180929', 0, 7 , NULL),
595 | (45, 5, '20180927', '20180930', 1, 14, NULL),
596 | (46, 5, '20180927', '20180930', 1, 15, NULL),
597 | (47, 5, '20180927', '20180930', 1, 16, NULL),
598 | (48, 5, '20180927', '20180930', 1, 17, NULL),
599 | (49, 5, '20180927', '20180930', 1, 18, NULL),
600 | (50, 3, '20180927', '20180928', 1, 3 , NULL);
601 | GO
602 |
603 | --- NOTES SCHEMA --
604 | CREATE SCHEMA Notes;
605 | GO
606 | CREATE TABLE Notes.Friends (
607 | FriendID int not null,
608 | FirstName varchar(20),
609 | LastName varchar(20),
610 | FavColour varchar(20),
611 | PRIMARY KEY (FriendID)
612 | );
613 | GO
614 | CREATE TABLE Notes.Pets (
615 | PetID int not null,
616 | PetName varchar(20),
617 | PetDOB date,
618 | FriendID int not null,
619 | FOREIGN KEY (FriendID) REFERENCES Notes.Friends (FriendID),
620 | PRIMARY KEY (PetID)
621 | );
622 | GO
623 | CREATE TABLE Notes.Scratched (
624 | ScratcherID int not null,
625 | ScratchDate date,
626 | ScratchTime time,
627 | ScratcheeID int not null,
628 | FOREIGN KEY (ScratcherID) REFERENCES Notes.Friends (FriendID),
629 | FOREIGN KEY (ScratcheeID) REFERENCES Notes.Friends (FriendID),
630 | PRIMARY KEY (ScratcherID, ScratcheeID, ScratchDate, ScratchTime)
631 | );
632 | GO
633 | CREATE TABLE Notes.PlayCount (
634 | PetID int not null,
635 | PlayCounter int,
636 | FriendID int not null,
637 | FOREIGN KEY (PetID) REFERENCES Notes.Pets (PetID),
638 | FOREIGN KEY (FriendID) REFERENCES Notes.Friends (FriendID),
639 | PRIMARY KEY (PetID, FriendID)
640 | );
641 | GO
642 | CREATE TABLE Notes.Passports (
643 | PptNo varchar(5) not null,
644 | PptCountry varchar(20),
645 | PptExpiry date,
646 | FriendID int,
647 | FOREIGN KEY (FriendID) REFERENCES Notes.Friends (FriendID),
648 | PRIMARY KEY (PptNo)
649 | );
650 | GO
651 | CREATE TABLE Notes.Table1 (
652 | A int not null,
653 | B varchar(20),
654 | C varchar(20),
655 | PRIMARY KEY (A)
656 | );
657 | GO
658 | CREATE TABLE Notes.Table2 (
659 | D varchar(20),
660 | E int not null,
661 | A int not null,
662 | FOREIGN KEY (A) REFERENCES Notes.Table1 (A),
663 | PRIMARY KEY (E)
664 | );
665 | GO
666 | CREATE TABLE Notes.Letters (
667 | A char,
668 | B char,
669 | Num int not null,
670 | PRIMARY KEY (Num)
671 | );
672 | GO
673 | CREATE TABLE Notes.RandomPeople (
674 | PersonName varchar(20),
675 | Gender char(2),
676 | Age int
677 | );
678 | GO
679 | CREATE TABLE Notes.Houses (
680 | house_ID varchar(5) not null,
681 | house_owner varchar(50),
682 | house_address varchar(200),
683 | post_code varchar(4),
684 | house_price decimal(10,2),
685 | PRIMARY KEY (house_ID)
686 | );
687 | GO
688 | CREATE TABLE Notes.Suburbs (
689 | post_code varchar(5) not null,
690 | suburb_name varchar(100) not null,
691 | vaccination_rate decimal(3,2),
692 | PRIMARY KEY (post_code, suburb_name)
693 | );
694 | GO
695 |
696 | CREATE TABLE Notes.Numbers (
697 | Num int,
698 | NumString varchar(10)
699 | );
700 | GO
701 |
702 | CREATE TABLE Notes.EduStudy (
703 | Id varchar(6) not null,
704 | Income varchar(8),
705 | Education int,
706 | PRIMARY KEY (Id)
707 | );
708 | GO
709 |
710 | CREATE TABLE Notes.Orders (
711 | OrderID int not null,
712 | Item varchar(30),
713 | Price decimal(5,2),
714 | OrderDT datetime,
715 | PRIMARY KEY (OrderID)
716 | );
717 | GO
718 |
719 | INSERT INTO Notes.Orders VALUES
720 | (1, 'Boiled leaves' , 2.99 , '2021-12-31 15:13:00'),
721 | (2, 'Bow wow' , 15 , '2021-12-31 15:34:00'),
722 | (3, 'Cackleberry stew', 32.55, '2022-01-01 09:32:00'),
723 | (4, 'Mug of murk' , 4.40 , '2022-01-01 10:16:00');
724 | GO
725 |
726 | INSERT INTO Notes.EduStudy VALUES
727 | ('EI13', 'low', 5),
728 | ('EI122', 'low', 1),
729 | ('EI281', 'low-mid', 4),
730 | ('EI3332', 'middle', 3),
731 | ('EI4751', 'high-mid', 3),
732 | ('EI12', 'high', 2);
733 | GO
734 |
735 | INSERT INTO Notes.Numbers VALUES
736 | (111, '111'),
737 | (31, '31'),
738 | (32, '32'),
739 | (211, '211');
740 | GO
741 |
742 |
743 | INSERT INTO Notes.Houses VALUES
744 | ('H0001', 'Millard Claassen' , '7235 East Van Dyke St' , '3128', 300000),
745 | ('H0002', 'Jamie Pew' , '8914 South Sunnyslope Dr', '3128', 150000),
746 | ('H0003', 'Bethel Viviano' , '87 South West Halifax St', '3142', 400000),
747 | ('H0004', 'Brandi Hovis' , '676 Ocean St' , '3142', 360000),
748 | ('H0005', 'Mei Colby' , '62 West Park Ave' , '3556', 220000),
749 | ('H0006', 'Marilu Munz' , '62 Elm Road' , '3083', 120000),
750 | ('H0007', 'Rhiannon Carwile' , '7005 Anderson Ave' , '3779', 500000),
751 | ('H0008', 'Joycelyn Hamburger' , '7410 Park Drive' , '3778', 550000),
752 | ('H0009', 'Leopoldo Flaherty' , '3 Dogwood Dr' , '3083', 1200000),
753 | ('H0010', 'Xavier Farrer' , '767 Rockville Street' , '3083', 100000),
754 | ('H0011', 'Waldo Wingboard' , '8712 Thorne Street' , NULL, 640000),
755 | ('H0012', 'Jimmy Jenkins' , '32 Rosey Cres' , NULL, 70000);
756 | GO
757 |
758 | -- There is no primary / foreign key pair for Houses and Suburbs.
759 | -- The primary key of suburbs is not as may be expected (not post_code).
760 | -- 3218 connects to 2 suburbs
761 | -- some houses have NULL suburb
762 | -- 3142 has no corresponding suburb
763 | -- the data type for post_code in suburb is varchar(6), one suburb has postcode '33128'
764 |
765 | INSERT INTO Notes.Suburbs VALUES
766 | ('3128' , 'Erebor' , 0.8),
767 | ('33128', 'Erberor' , 0.8),
768 | ('3128' , 'Fangorn' , 0.2),
769 | ('3779' , 'Durin' , 0.4),
770 | ('3556' , 'Gondor' , 0.65),
771 | ('3083' , 'Isengaard', 0.35);
772 | GO
773 |
774 | INSERT INTO Notes.Friends VALUES
775 | (1, 'X', 'A', 'red'),
776 | (2, 'Y', 'B', 'blue'),
777 | (3, 'Z', 'C', NULL);
778 | GO
779 | INSERT INTO Notes.Pets VALUES
780 | (1, 'Chikin', '20160924', 2),
781 | (2, 'Cauchy', '20120301', 3),
782 | (3, 'Gauss', '20120301', 3);
783 | GO
784 | INSERT INTO Notes.Scratched VALUES
785 | (1, '20180905', '12:00', 2),
786 | (1, '20180905', '12:30', 3),
787 | (2, '20180906', '11:00', 1),
788 | (3, '20180907', '10:00', 1),
789 | (2, '20180801', '16:15', 1),
790 | (2, '20180801', '13:00', 3),
791 | (1, '20170214', '04:30', 2),
792 | (3, '20200128', '18:00', 1);
793 | GO
794 | INSERT INTO Notes.PlayCount VALUES
795 | (1, 3, 1),
796 | (1, 5, 2),
797 | (3, 4, 2);
798 | GO
799 | INSERT INTO Notes.Passports VALUES
800 | ('E1321', 'Australia', '20210312', NULL),
801 | ('LA123', 'New Zealand', '20320901', 2),
802 | ('S9876', 'Monaco', '20280619', 3);
803 |
804 | GO
805 | INSERT INTO Notes.Table1 (
806 | A, B, C
807 | ) VALUES
808 | (1, 'Ignorance', 'is'),
809 | (2, 'War', 'is'),
810 | (3, 'Freedom', 'is'),
811 | (4, 'Friendship', 'is');
812 | GO
813 |
814 | INSERT INTO Notes.Table2 (
815 | D, E, A
816 | ) VALUES
817 | ('slavery.', 3, 1),
818 | ('weakness.', 4, 2),
819 | ('strength.', 1, 3),
820 | ('peace.', 2, 4);
821 | GO
822 |
823 | INSERT INTO Notes.Letters VALUES
824 | ('a', 'b', 1),
825 | ('a', 'c', 2),
826 | ('a', 'b', 3),
827 | ('a', 'c', 4);
828 | GO
829 |
830 | INSERT INTO Notes.RandomPeople VALUES
831 | ('Beyonce', 'F', 37),
832 | ('Laura Marling', 'F', 28),
833 | ('Darren Hayes', 'M', 46),
834 | ('Bret McKenzie', 'M', 42),
835 | ('Jack Monroe', 'NB', 30);
836 |
837 |
838 | -- ---------------------------------------------------------------
839 | -- CREATE Analytics DATABASE -------------------------------------
840 | -- ---------------------------------------------------------------
841 |
842 | USE master;
843 | GO
844 |
845 | DROP DATABASE IF EXISTS Analytics;
846 | GO
847 |
848 | CREATE DATABASE Analytics;
849 | GO
850 |
851 | USE Analytics;
852 | GO
853 |
854 | CREATE TABLE Membership (
855 | memberID INT not null,
856 | memberName VARCHAR(100),
857 | phone VARCHAR(20),
858 | joinDate DATE,
859 | PRIMARY KEY (memberID)
860 | );
861 | GO
862 |
863 | INSERT INTO Membership
864 | VALUES
865 | (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'),
866 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'),
867 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'),
868 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'),
869 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'),
870 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29');
871 | GO
872 |
873 | CREATE TABLE SausageSizzle (
874 | saleId INT NOT NULL,
875 | saleDate DATE,
876 | product VARCHAR(20),
877 | quantity INT,
878 | friendId INT,
879 | PRIMARY KEY (saleId)
880 | );
881 | GO
882 |
883 | INSERT INTO SausageSizzle VALUES
884 | (1 , '1999-12-31', 'pork' , 1, NULL),
885 | (2 , '1999-12-31', 'veggie', 3, NULL),
886 | (3 , '1999-12-31', 'pork' , 2, 1 ),
887 | (4 , '2000-01-01', 'veggie', 4, NULL),
888 | (5 , '2000-01-01', 'veggie', 2, 2 ),
889 | (6 , '2000-01-01', 'pork' , 1, NULL),
890 | (7 , '2000-01-01', 'veggie', 1, NULL),
891 | (8 , '2000-01-01', 'pork' , 1, NULL),
892 | (9 , '2000-01-02', 'veggie', 1, 2 ),
893 | (10, '2000-01-02', 'pork' , 2, NULL),
894 | (11, '2000-01-02', 'veggie', 3, NULL),
895 | (12, '2000-01-02', 'pork' , 4, NULL),
896 | (13, '2000-01-02', 'veggie', 2, 3 ),
897 | (14, '2000-01-03', 'veggie', 1, NULL),
898 | (15, '2000-01-03', 'pork' , 6, NULL),
899 | (16, '2000-01-03', 'veggie', 1, 1 ),
900 | (17, '2000-01-04', 'pork', 1, 1 ),
901 | (18, '2000-01-05', 'veggie', 5, 1 );
902 | GO
903 |
904 | CREATE VIEW SausageSizzleSummary AS
905 | SELECT Product, SaleDate, SUM(Quantity) AS Sales
906 | FROM SausageSizzle
907 | GROUP BY Product, SaleDate;
908 | GO
--------------------------------------------------------------------------------
/other-languages.md:
--------------------------------------------------------------------------------
1 | ## Connecting from Other Languages
2 |
3 | This document aims to provide simple examples of ODBC connectivity with batch processing in a number of common statistical programming languages.
4 |
5 | A Data Source Name (DSN) is usually set up by your system administrator. If not, then there is usually a way to specify the connection settings yourself.
6 |
7 | ## R
8 |
9 | Here we establish a connection using a DSN provided by the system administrator.
10 |
11 | ```R
12 | install.packages("odbc")
13 | library(odbc)
14 |
15 | con <- odbc::dbConnect(odbc::odbc(), "insert_DSN_here")
16 | ```
17 |
18 | If you have no DSN, then you'll need to specify the connection settings. Example:
19 |
20 | ```R
21 | install.packages("odbc")
22 | library(odbc)
23 |
24 | con <- odbc::dbConnect(odbc::odbc(),
25 | Driver = "SQL Server",
26 | Server = "write_server_address_here",
27 | Database = "write_database_name_here",
28 | UID = "write_user_name_here",
29 | PWD = "write_your_password_here")
30 | ```
31 |
32 | Here is a more specific example of the above. Connecting to the master database from the course:
33 |
34 | ```R
35 | con <- odbc::dbConnect(odbc::odbc(),
36 | Driver = "SQL Server",
37 | Server = "nzssn.database.windows.net",
38 | Database = "master",
39 | UID = "Daniel",
40 | PWD = "insert_your_password")
41 | ```
42 |
43 | If you are still unsuccessful, you may find more help on the RStudio website ([click here](https://db.rstudio.com/databases/microsoft-sql-server/)).
44 |
45 | Now lets send a basic query to the server and get the result. We will need to explicitly specify `PlayPen` as the database, since we are currently connected to `master` . Example:
46 |
47 | ```R
48 | myquery <- "SELECT *
49 | FROM PlayPen.Ape.Banana;"
50 | banana <- DBI::dbGetQuery(con, statement = myquery)
51 | ```
52 |
53 | Now the result of the above query is stored in the variable named `banana`
54 |
55 | Let's take a closer look at this variable `banana`:
56 |
57 | ```R
58 | class(banana)
59 | names(banana)
60 | head(banana)
61 | ```
62 |
63 | We can also loop though a table (e.g., a very large table) and fetch only a few rows at a time. Example:
64 |
65 | ```R
66 | myquery2 <- "SELECT *
67 | FROM PlayPen.Ape.EatingFrom;"
68 | results <- DBI::dbSendQuery(con, statement = myquery2)
69 | results
70 | ```
71 |
72 | We have sent the query, and now it's time to fetch rows from the result. Here we fetch the first 5 rows
73 |
74 | ```R
75 | eating <- DBI::dbFetch(results, n = 5)
76 | eating
77 | ```
78 |
79 | Now we fetch the next 5 rows and append them to the end of the `data.frame` named `eating`
80 |
81 | ```R
82 | next5 <- DBI::dbFetch(results, n = 5)
83 | eating <- rbind(eating, next5)
84 | eating
85 | ```
86 |
87 | For info, the [RStudio guide for is useful (click here)](https://db.rstudio.com/getting-started/connect-to-database).
88 |
89 | ## STATA
90 |
91 | **Disclaimer:** *I am unable to execute this code since I don't have a copy of STATA, and I've never used STATA before. This is just my best effort at including STATA for completeness. If someone could please verify that this works (or not) and email me, that would be great.*
92 |
93 | ```SAS
94 | odbc load, exec(
95 | "SELECT TOP(100) *
96 | FROM Notes.Friends;
97 | ")
98 | dsn("ask_someone_for_the_DSN") lowercase sqlshow clear;
99 | ```
100 |
101 | ```SAS
102 | odbc load, exec(
103 | "SELECT *
104 | FROM Notes.Friends;")
105 | dsn("ask_someone_for_the_DSN") lowercase sqlshow clear;
106 | ```
107 |
108 |
109 |
110 | ## SAS
111 |
112 | Use [implicit or explicit SQL passthrough](https://documentation.sas.com/?cdcId=pgmsascdc&cdcVersion=9.4_3.5&docsetId=spdsug&docsetTarget=n095mdd1wof6ogn1neyglan77ghc.htm&locale=en#!) to execute SQL code on the server (rather than locally).
113 |
114 | Example:
115 |
116 | ```SAS
117 | proc sql;
118 | connect to odbc (dsn="ask_someone_for_the_DSN");
119 | create table My_SAS_table_of_friends as
120 | select * from connection to odbc
121 | /* -------- Start of SQL code -------*/
122 | (SELECT *
123 | FROM Notes.Friends);
124 | /* ----------------------------------*/
125 | disconnect from odbc;
126 | quit;
127 | ```
128 |
129 | Here's another example using tables and columns in the IDI
130 |
131 | ```SAS
132 | proc sql;
133 | connect to odbc(dsn="ask_someone_for_the_DSN");
134 | create table work.census_address as
135 | select * from connection to odbc
136 | /* -------- Start of SQL code -------*/
137 | (SELECT snz_uid
138 | ,snz_cen_uid
139 | ,address_type_code
140 | ,meshblock_code
141 | ,au_code
142 | ,geocode_source_code
143 | FROM cen_clean.census_address
144 | WHERE address_type_code='UR'
145 | ORDER BY meshblock_code, snz_uid);
146 | /* ----------------------------------*/
147 | disconnect from odbc;
148 | quit;
149 |
150 | ```
151 |
152 | For more IDI SAS examples [see the VHIN website](https://vhin.co.nz/guides/shared-code/)
--------------------------------------------------------------------------------
/scripts-from-notes/README.md:
--------------------------------------------------------------------------------
1 | # Scripts from notes
2 |
3 | This folder contains some of the longer scripts from the course notes, making them easier to practice and experiment with.
--------------------------------------------------------------------------------
/scripts-from-notes/bayes-vs-frequentist.sql:
--------------------------------------------------------------------------------
1 | -- This script is not intended to be run in one go.
2 | -- It is a code dump of a number of related examples from the notes.
3 | -- It is here for you to experiment with as you follow the notes.
4 |
5 | CREATE DATABASE StackExchange;
6 | GO -- only use GO in T-SQL, remove GO for MySQL
7 |
8 | USE StackExchange;
9 | GO -- only use GO in T-SQL, remove GO for MySQL
10 |
11 | CREATE TABLE Posts (
12 | Id INT NOT NULL,
13 | CreationDate DATETIME,
14 | Score INT,
15 | ViewCount INT,
16 | Body VARCHAR(100),
17 | PRIMARY KEY (Id)
18 | );
19 |
20 | INSERT INTO Posts
21 | VALUES
22 | (1, '2020-01-01',1,200,'dummy text'),
23 | (2, '2020-02-01',1,200,'dummy frequentist'),
24 | (3, '2020-03-01',1,200,'dummy text'),
25 | (4, '2020-03-01',1,200,'dummy bayesian');
26 |
27 | -- ----------------------------------
28 | -- FASTBALL TESTING / WRITING THE QUERY
29 | -- ----------------------------------
30 |
31 | -- step 1
32 | SELECT MONTH(CreationDate) AS CreationMonth,
33 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
34 | WHEN Body LIKE '%bayesian%' THEN 'B'
35 | END AS Category
36 | FROM Posts;
37 |
38 |
39 | -- ----------------------------------
40 |
41 | -- step 2
42 | INSERT INTO Posts VALUES
43 | (5, '2021-01-01',1,200,'dummy frequentist');
44 |
45 | SELECT MONTH(CreationDate) AS CreationMonth,
46 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
47 | WHEN Body LIKE '%bayesian%' THEN 'B'
48 | END AS Category
49 | FROM Posts
50 | WHERE YEAR(CreationDate) <= 2020
51 | AND Body LIKE '%bayesian%' OR Body LIKE '%frequentist%';
52 |
53 | -- ----------------------------------
54 |
55 | -- step 3
56 | INSERT INTO Posts VALUES
57 | (6, '2020-03-01',2,200,'dummy bayesian');
58 |
59 | SELECT * FROM Posts;
60 |
61 | SELECT AVG(Score) AvgScore,
62 | MONTH(CreationDate) AS CreationMonth,
63 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
64 | WHEN Body LIKE '%bayesian%' THEN 'B'
65 | END AS Category
66 | FROM Posts
67 | WHERE YEAR(CreationDate) <= 2020
68 | AND (Body LIKE '%bayesian%' OR Body LIKE '%frequentist%')
69 | GROUP BY MONTH(CreationDate),
70 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
71 | WHEN Body LIKE '%bayesian%' THEN 'B'
72 | END;
73 |
74 | -- ----------------------------------
75 |
76 | -- step 4
77 | SELECT AVG(CAST(Score AS DECIMAL)) AvgScore,
78 | AVG(CAST(ViewCount AS DECIMAL)) AS AvgViews,
79 | COUNT(*) AS NumPosts,
80 | MONTH(CreationDate) AS CreationMonth,
81 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
82 | WHEN Body LIKE '%bayesian%' THEN 'B'
83 | END AS Category
84 | FROM Posts
85 | WHERE YEAR(CreationDate) <= 2020
86 | AND (Body LIKE '%bayesian%' OR Body LIKE '%frequentist%')
87 | GROUP BY MONTH(CreationDate),
88 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
89 | WHEN Body LIKE '%bayesian%' THEN 'B'
90 | END;
91 |
92 |
93 | -- ----------------------------------
94 | -- REDUCING REPETITION
95 | -- ----------------------------------
96 |
97 | -- cleaning, step 1
98 | WITH PostCats AS (
99 | SELECT MONTH(CreationDate) AS CreationMonth,
100 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
101 | WHEN Body LIKE '%bayesian%' THEN 'B'
102 | END AS Category
103 | FROM Posts
104 | WHERE YEAR(CreationDate) <= 2020
105 | )
106 | SELECT *
107 | FROM PostCats;
108 |
109 | -- -----------------------------------
110 |
111 | -- cleaning, step 2
112 | WITH PostCats AS (
113 | SELECT MONTH(CreationDate) AS CreationMonth,
114 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
115 | WHEN Body LIKE '%bayesian%' THEN 'B'
116 | END AS Category
117 | FROM Posts
118 | WHERE YEAR(CreationDate) <= 2020
119 | )
120 | SELECT CreationMonth, Category
121 | FROM PostCats
122 | WHERE Category IS NOT NULL
123 | GROUP BY CreationMonth, Category;
124 |
125 | -- -----------------------------------
126 |
127 | -- cleaning, step 3
128 | WITH PostCats AS (
129 | SELECT MONTH(CreationDate) AS CreationMonth,
130 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
131 | WHEN Body LIKE '%bayesian%' THEN 'B'
132 | END AS Category,
133 | CAST(Score AS DECIMAL) AS Score,
134 | CAST(ViewCount AS DECIMAL) AS ViewCount
135 | FROM Posts
136 | WHERE YEAR(CreationDate) <= 2020
137 | )
138 | SELECT CreationMonth, Category,
139 | AVG(Score) AS AvgScore,
140 | AVG(ViewCount) AS AvgViews,
141 | COUNT(*) AS NumPosts
142 | FROM PostCats
143 | WHERE Category IS NOT NULL
144 | GROUP BY CreationMonth, Category;
145 |
146 | -- here is a link to the query, hosted on Stack Exchange:
147 | -- https://data.stackexchange.com/stats/query/1509266/score-and-views-for-bayesian-vs-frequentist
148 |
149 |
150 | -- ----------------------------------
151 | -- VALIDITY TESTING
152 | -- ----------------------------------
153 |
154 | -- example 1
155 |
156 | WITH Posts (Id, CreationDate, Score, ViewCount, Body) AS (
157 | SELECT 1,'2019-01-01',1,200,'dummy frequentist'
158 | UNION ALL
159 | SELECT 2,'2020-01-01',1,200,'dummy frequentist'
160 | UNION ALL
161 | SELECT 3,'2021-01-01',1,200,'dummy frequentist'
162 | UNION ALL
163 | SELECT 4,NULL,1,200,'dummy frequentist'
164 | ),
165 | PostCats AS (
166 | SELECT MONTH(CreationDate) AS CreationMonth,
167 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
168 | WHEN Body LIKE '%bayesian%' THEN 'B'
169 | END AS Category,
170 | CAST(Score AS DECIMAL) AS Score,
171 | CAST(ViewCount AS DECIMAL) AS ViewCount
172 | FROM Posts
173 | WHERE YEAR(CreationDate) <= 2020
174 | )
175 | SELECT CreationMonth, Category,
176 | AVG(Score) AS AvgScore,
177 | AVG(ViewCount) AS AvgViews,
178 | COUNT(*) AS NumPosts
179 | FROM PostCats
180 | WHERE Category IS NOT NULL
181 | GROUP BY CreationMonth, Category;
182 |
183 | -- example 2
184 |
185 | WITH Posts (Id, CreationDate, Score, ViewCount, Body) AS (
186 | SELECT 1,'2020-01-01',1,200,'dummy FREQUENTIST'
187 | UNION ALL
188 | SELECT 2,'2020-01-01',1,200,'dummy FREQUENTIST dummy'
189 | UNION ALL
190 | SELECT 3,'2020-01-01',1,200,'FREQUENTIST dummy'
191 | UNION ALL
192 | SELECT 4,'2020-01-01',1,200,NULL
193 | ),
194 | PostCats AS (
195 | SELECT MONTH(CreationDate) AS CreationMonth,
196 | CASE WHEN Body LIKE '%frequentist%' THEN 'F'
197 | WHEN Body LIKE '%bayesian%' THEN 'B'
198 | END AS Category,
199 | CAST(Score AS DECIMAL) AS Score,
200 | CAST(ViewCount AS DECIMAL) AS ViewCount
201 | FROM Posts
202 | WHERE YEAR(CreationDate) <= 2020
203 | )
204 | SELECT CreationMonth, Category,
205 | AVG(Score) AS AvgScore,
206 | AVG(ViewCount) AS AvgViews,
207 | COUNT(*) AS NumPosts
208 | FROM PostCats
209 | WHERE Category IS NOT NULL
210 | GROUP BY CreationMonth, Category;
211 |
212 | -- delete the database
213 | DROP DATABASE StackExchange;
214 | GO
--------------------------------------------------------------------------------
/scripts-from-notes/manipulate-tables.sql:
--------------------------------------------------------------------------------
1 | -- This script is not intended to be run in one go.
2 | -- It is a code dump of a number of related examples from the notes.
3 | -- It is here for you to experiment with as you follow the notes.
4 |
5 | CREATE DATABASE MyExperiments;
6 | GO -- only use GO in T-SQL, remove GO for MySQL
7 |
8 | USE MyExperiments;
9 | GO -- only use GO in T-SQL, remove GO for MySQL
10 |
11 | -- create the Friends table
12 | CREATE TABLE Friends (
13 | FriendID INT not null,
14 | FirstName VARCHAR(20),
15 | LastName VARCHAR(20),
16 | FavColour VARCHAR(20),
17 | PRIMARY KEY (FriendID)
18 | );
19 |
20 | -- create the Pets table
21 | CREATE TABLE Pets (
22 | PetID INT not null,
23 | PetName VARCHAR(20),
24 | PetDOB DATE,
25 | FriendID INT not null,
26 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID),
27 | PRIMARY KEY (PetID)
28 | );
29 |
30 | ALTER TABLE Friends
31 | ADD StartDate DATE,
32 | ADD StartTime TIME;
33 |
34 | ALTER TABLE Friends
35 | DROP COLUMN StartDate;
36 |
37 | -- produces an error (foreign key)
38 | ALTER TABLE Pets
39 | DROP COLUMN FriendID;
40 |
41 | -- circumvent the error (delete foreign key)
42 | ALTER TABLE table_name
43 | DROP CONSTRAINT pets_ibfk_1;
44 |
45 | -- change a data type in MySQL (uses 'modify column')
46 | ALTER TABLE Friends
47 | MODIFY COLUMN FirstName VARCHAR(40);
48 |
49 | -- change a data type in T-SQL (uses 'alter column')
50 | ALTER TABLE Friends
51 | ALTER COLUMN FirstName VARCHAR(40);
52 |
53 | -- using the MySQL syntax
54 | ALTER TABLE Friends
55 | MODIFY COLUMN FavColour VARCHAR(3);
56 |
57 | -- produces an error (foreign key)
58 | DROP TABLE Friends;
59 |
60 | -- remove the foreign key from Pets
61 | ALTER TABLE Pets
62 | DROP CONSTRAINT pets_ibfk_1;
63 |
64 | -- delete the Friends table
65 | DROP TABLE Friends;
66 |
67 | -- -----------------------------------------------
68 |
69 | -- create the Friends table
70 | CREATE TABLE Friends (
71 | FriendID INT not null,
72 | FirstName VARCHAR(20),
73 | LastName VARCHAR(20),
74 | FavColour VARCHAR(20),
75 | PRIMARY KEY (FriendID)
76 | );
77 |
78 | INSERT INTO Friends
79 | VALUES
80 | (1, 'X', 'A', 'red'),
81 | (2, 'Y', 'B', 'blue'),
82 | (3, 'Z', 'C', NULL),
83 | (4, 'Kimmy', 'Jenkins', 'yellow'),
84 | (5, 'Jimmy', 'Jenkins', NULL);
85 |
86 |
87 | INSERT INTO Friends
88 | (FriendID, FirstName)
89 | VALUES
90 | (6, 'Niko'),
91 | (7, 'Sage');
92 |
93 | DROP TABLE Friends;
94 |
95 | -- -----------------------------------------------
96 |
97 | -- first create the structure
98 | CREATE TABLE Friends (
99 | FriendID INT not null,
100 | FirstName VARCHAR(20),
101 | LastName VARCHAR(20),
102 | FavColour VARCHAR(20),
103 | PRIMARY KEY (FriendID)
104 | );
105 |
106 | -- then insert our 3 friends
107 | INSERT INTO Friends
108 | VALUES
109 | (1,'X','A','red'),
110 | (2,'Y','B','blue'),
111 | (3,'Z','C', NULL);
112 |
113 | -- now alter the table to add StartDate and StartTime
114 | ALTER TABLE Friends
115 | ADD StartDate DATE,
116 | ADD StartTime TIME;
117 |
118 | UPDATE Friends
119 | SET StartDate = '1999-12-30', StartTime = '16:30:00';
120 |
121 | UPDATE Friends
122 | SET StartDate = '2000-01-03', StartTime = '08:00:00'
123 | WHERE FriendID = 1;
124 |
125 | DELETE FROM Friends
126 | WHERE FriendID = 999;
127 |
128 | -- delete the database
129 | DROP DATABASE MyExperiments;
130 | GO
131 |
132 | -- ------------------------------------------------
133 |
134 | CREATE TABLE Membership (
135 | memberID INT not null,
136 | memberName VARCHAR(100),
137 | phone VARCHAR(20),
138 | joinDate DATE,
139 | PRIMARY KEY (memberID)
140 | );
141 |
142 | INSERT INTO Membership
143 | VALUES (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21');
144 |
145 | DELETE FROM Membership
146 | WHERE memberId = 12231;
147 |
148 | INSERT INTO Membership
149 | VALUES
150 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'),
151 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'),
152 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'),
153 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'),
154 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29');
155 |
156 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime
157 | FROM Membership;
158 |
159 | -- MySQL only: table creation and initial insert
160 | CREATE TABLE MemberCountHistory
161 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime
162 | FROM Membership;
163 |
164 | -- MySQL only: subsequent inserts (execute once per month)
165 | INSERT INTO MemberCountHistory
166 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime
167 | FROM Membership;
168 |
169 | -- T-SQL only: initial table creation (and execute once per month)
170 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime
171 | INTO MemberCountHistory
172 | FROM Membership;
173 |
174 | CREATE VIEW FriendsPets AS
175 | SELECT F.FirstName, P.PetName
176 | FROM Friends F JOIN Pets P ON F.FriendID = P.FriendID;
177 |
178 | SELECT *
179 | FROM FriendsPets;
--------------------------------------------------------------------------------
/textbook-and-slides/README.md:
--------------------------------------------------------------------------------
1 | For the interactive version of the day 4 slides, hosted on RPubs, [click here](https://rpubs.com/frycast/sql-with-r).
--------------------------------------------------------------------------------
/textbook-and-slides/SQL_Course_Slides_Day_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_1.pdf
--------------------------------------------------------------------------------
/textbook-and-slides/SQL_Course_Slides_Day_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_2.pdf
--------------------------------------------------------------------------------
/textbook-and-slides/SQL_Course_Slides_Day_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_3.pdf
--------------------------------------------------------------------------------
/textbook-and-slides/SQL_Course_Slides_Day_4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_4.pdf
--------------------------------------------------------------------------------
/textbook-and-slides/SQL_Course_Textbook.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Textbook.pdf
--------------------------------------------------------------------------------