├── IDI-stuff ├── MOH-SQL-code.sql ├── example-data-dictionaries │ ├── ACC_Clean.pdf │ └── DIA_Clean.pdf └── useful-papers │ ├── linking-Methodology.pdf │ ├── prototype-spine-creation.pdf │ └── use-of-IDI.pdf ├── LICENSE ├── R ├── README.md ├── beginners.zip ├── connecting-R │ ├── README.md │ ├── databases-in-R.Rmd │ ├── databases-in-R.html │ └── remote-MySQL.R ├── example-R.zip ├── examples │ └── bioinformatics │ │ ├── analysis.R │ │ ├── app.R │ │ ├── automation.R │ │ ├── create_variants_database.sql │ │ ├── data │ │ ├── new_records │ │ │ └── new_records.csv │ │ ├── patient_variants.csv │ │ ├── variants.sqlite │ │ └── variants_dodgy.sqlite │ │ ├── functions.R │ │ └── start_here.R ├── intro-to-R.R ├── sqlite-R │ ├── MyDB.sqlite │ ├── README.md │ ├── Sandpit.sqlite │ └── sqlite.R └── tidy-data.R ├── README.md ├── create-database ├── MySQL │ ├── MySQL-database.sql │ └── README.md ├── SQL-data-definition-examples.sql ├── SQLFiddle │ ├── README.md │ ├── T-SQL-analytics-schema.sql │ ├── T-SQL-ape-schema.sql │ └── T-SQL-notes-schema.sql └── T-SQL │ ├── .Rhistory │ ├── README.md │ └── T-SQL-database.sql ├── other-languages.md ├── scripts-from-notes ├── README.md ├── bayes-vs-frequentist.sql └── manipulate-tables.sql └── textbook-and-slides ├── README.md ├── SQL_Course_Slides_Day_1.pdf ├── SQL_Course_Slides_Day_2.pdf ├── SQL_Course_Slides_Day_3.pdf ├── SQL_Course_Slides_Day_4.pdf └── SQL_Course_Textbook.pdf /IDI-stuff/MOH-SQL-code.sql: -------------------------------------------------------------------------------- 1 | -- ----------------------------------- 2 | 3 | -- Here I provide queries that are complicated by the presence of long table and column names. 4 | 5 | -- These queries were provided by Stats NZ as example queries for the New Zealand Integrated Data Infrastructure (not for the course practice databases). 6 | 7 | -- Below the first two queries, I have provided simplified versions of the same query. 8 | 9 | -- The only changes made for the simplified version are: 10 | -- 11 | -- 1. assignment of aliases for table and column names 12 | -- 13 | -- 2. changing indentation and adding/removing newlines 14 | 15 | -- Note the queries are only 'simplified' if you know how the WITH clause works! 16 | -- The subquery in the WITH clause is only assigning aliases. 17 | -- The query below the WITH clause is doing all the 'work'. 18 | 19 | -- I have only simplifed the first three. You can use the remaining queries to practice with (try to simplify them in the same manner). 20 | 21 | -- ----------------------------------- 22 | -- ----------------------------------- 23 | 24 | --Clients seen by snz_uid 25 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear , 26 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid 27 | FROM IDI_Clean_20181020.moh_clean.PRIMHD 28 | WHERE IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 29 | GROUP BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 30 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid 31 | 32 | -- simplified version 33 | WITH Shortened AS ( 34 | SELECT M.moh_mhd_activity_start_date AS astart, 35 | M.moh_clean.PRIMHD.snz_moh_uid AS muid, 36 | M.moh_mhd_activity_type_code AS activity 37 | FROM IDI_Clean_20181020.moh_clean.PRIMHD AS M 38 | ) 39 | SELECT year(astart) AS StartYear, 40 | FROM Shortened 41 | WHERE activity != 'T35' 42 | GROUP BY year(astart), muid 43 | 44 | -- ----------------------------------- 45 | -- ----------------------------------- 46 | 47 | --Service users by snz_moh_uid 48 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 49 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid 50 | FROM IDI_Clean_20181020.moh_clean.PRIMHD 51 | WHERE IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 52 | GROUP BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 53 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid 54 | ORDER BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 55 | 56 | -- simplified version 57 | WITH Shortened AS ( 58 | SELECT M.moh_mhd_activity_start_date AS astart, 59 | M.snz_moh_uid AS muid, 60 | M.moh_mhd_activity_type_code AS activity 61 | FROM IDI_Clean_20181020.moh_clean.PRIMHD AS M 62 | ) 63 | SELECT year(astart) AS StartYear, M.muid 64 | FROM Shortened 65 | WHERE M.activity != 'T35' 66 | GROUP BY year(astart), M.muid 67 | ORDER BY year(astart); 68 | 69 | -- ----------------------------------- 70 | -- ----------------------------------- 71 | 72 | --Service users/AoD service users 73 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 74 | count (DISTINCT IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) AS 'Service Users', 75 | count (DISTINCT (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or 76 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) 77 | then IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid else NULL end)) AS 'AoD Service Users' 78 | FROM IDI_Clean_20181020.moh_clean.PRIMHD 79 | WHERE IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 80 | GROUP BY year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date); 81 | 82 | -- simplified version 83 | WITH Shortened AS ( 84 | SELECT M.moh_mhd_activity_start_date AS astart, 85 | M.snz_moh_uid AS muid, 86 | M.moh_mhd_team_type_code AS team, 87 | M.moh_mhd_activity_type_code AS activity 88 | FROM IDI_Clean_20181020.moh_clean.PRIMHD M 89 | ) 90 | SELECT year(astart) AS Year, 91 | count(DISTINCT muid) AS 'Service Users', 92 | count(DISTINCT(case when (team in ('03', '11') or activity in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) 93 | then muid else NULL end)) AS 'AoD Service Users' 94 | FROM Shortened 95 | WHERE type != 'T35' 96 | GROUP BY year(astart); 97 | 98 | -- ----------------------------------- 99 | -- ----------------------------------- 100 | 101 | --Service users by DHB/non-DHB 102 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear , 103 | case when [IDI_Metadata].[clean_read_CLASSIFICATIONS].[moh_primhd_organisation_code].[ORGANISATION_TYPE] = 'District Health Board (DHB)' then 'DHB' 104 | else 'non-DHB' end 'Organisation type', 105 | count (distinct IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) 'Count of service users' 106 | FROM 107 | IDI_Clean_20181020.moh_clean.PRIMHD 108 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on 109 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID 110 | WHERE 111 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 112 | GROUP BY 113 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 114 | case when [IDI_Metadata].[clean_read_CLASSIFICATIONS].[moh_primhd_organisation_code].[ORGANISATION_TYPE] = 'District Health Board (DHB)' then 'DHB' 115 | else 'non-DHB' end 116 | ORDER BY 117 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 118 | 119 | -- ----------------------------------- 120 | -- ----------------------------------- 121 | 122 | --Face to face service users (clients seen) 123 | --Clients seen/AoD clients seen 124 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 125 | count (distinct IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) AS 'Clients Seen', 126 | count ( distinct case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or 127 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) then 128 | (IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid) else NULL end) AS 'AoD_Clients_Seen' 129 | FROM 130 | IDI_Clean_20181020.moh_clean.PRIMHD 131 | WHERE 132 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37','T08') 133 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM') 134 | GROUP BY 135 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date); 136 | 137 | -- ----------------------------------- 138 | -- ----------------------------------- 139 | 140 | --Clients seen by DHB/non-DHB 141 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 142 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_TYPE = 'District Health Board (DHB)'then 'DHB' 143 | else 'non-DHB' end 'Organisation type', 144 | count (DISTINCT snz_moh_uid) AS 'Clients seen' 145 | FROM 146 | IDI_Clean_20181020.moh_clean.PRIMHD 147 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on 148 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID 149 | WHERE 150 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08') 151 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM') 152 | GROUP BY 153 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 154 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_TYPE = 'District Health Board (DHB)'then 'DHB' 155 | else 'non-DHB' end 156 | ORDER BY 157 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date ) 158 | 159 | -- ----------------------------------- 160 | -- ----------------------------------- 161 | 162 | --Contacts 163 | --AoD Contacts/all contacts 164 | SELECT 165 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 166 | sum (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or 167 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) 168 | then IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr else NULL end) AS 'AoD Contacts', 169 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'All Contacts' 170 | FROM 171 | IDI_Clean_20181020.moh_clean.PRIMHD 172 | WHERE 173 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 174 | AND 175 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON' 176 | GROUP BY 177 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 178 | ORDER BY 179 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 180 | 181 | -- ----------------------------------- 182 | -- ----------------------------------- 183 | 184 | --All face to face contacts/AoD face to face contacts 185 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 186 | sum (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) AS 'All F2F contacts', 187 | sum (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or 188 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) then 189 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr else NULL end) AS 'AoD F2F Contacts' 190 | FROM 191 | IDI_Clean_20181020.moh_clean.PRIMHD 192 | WHERE 193 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08') 194 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM') 195 | and IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON' 196 | GROUP BY 197 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date); 198 | 199 | -- ----------------------------------- 200 | -- ----------------------------------- 201 | 202 | --People with face to face contacts (snz_uid) 203 | SELECT distinct(snz_moh_uid), 204 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear 205 | FROM 206 | IDI_Clean_20181020.moh_clean.PRIMHD 207 | WHERE 208 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08') 209 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM') 210 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON' 211 | ORDER BY 212 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date); 213 | 214 | -- ----------------------------------- 215 | -- ----------------------------------- 216 | 217 | --People with face to face contacts by DHB/non-DHB 218 | SELECT year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 219 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end 'Organisation type', 220 | count (DISTINCT snz_moh_uid) AS 'Service users face to face contacts' 221 | FROM 222 | IDI_Clean_20181020.moh_clean.PRIMHD 223 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on 224 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID 225 | WHERE 226 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code not in ('T35','T32','T33','T37', 'T08') 227 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_setting_code not in ('WR','PH','SM','OM') 228 | AND IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'CON' 229 | GROUP BY 230 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 231 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end 232 | ORDER BY 233 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 234 | 235 | -- ----------------------------------- 236 | -- ----------------------------------- 237 | 238 | --Bednights 239 | Sum of bednights/AoD bednights 240 | SELECT 241 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 242 | sum (case when (IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_team_type_code in ('03', '11') or 243 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code in ('T16', 'T17', 'T18', 'T19', 'T20','T48')) 244 | then IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr else NULL end) AS 'AoD_Bed_Nights', 245 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'All Bed Nights' 246 | FROM 247 | IDI_Clean_20181020.moh_clean.PRIMHD 248 | WHERE 249 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'BED' 250 | GROUP BY 251 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 252 | ORDER BY 253 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date); 254 | 255 | -- ----------------------------------- 256 | -- ----------------------------------- 257 | 258 | --Bednights by snz_moh_uid 259 | SELECT 260 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 261 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid, 262 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'Bed Nights' 263 | FROM 264 | IDI_Clean_20181020.moh_clean.PRIMHD 265 | WHERE 266 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 267 | AND 268 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'BED' 269 | GROUP BY 270 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 271 | IDI_Clean_20181020.moh_clean.PRIMHD.snz_moh_uid 272 | ORDER BY 273 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 274 | 275 | -- ----------------------------------- 276 | -- ----------------------------------- 277 | 278 | --Bednights by DHB/non-DHB 279 | SELECT 280 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) AS StartYear, 281 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end 'Organisation type', 282 | sum(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_count_nbr) 'Bed Nights' 283 | FROM 284 | IDI_Clean_20181020.moh_clean.PRIMHD 285 | INNER JOIN IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code on 286 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_organisation_id_code = IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_ID 287 | WHERE 288 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_type_code != 'T35' 289 | AND 290 | IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_unit_type_text = 'BED' 291 | GROUP BY 292 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date), 293 | case when IDI_Metadata.clean_read_CLASSIFICATIONS.moh_primhd_organisation_code.ORGANISATION_NAME LIKE '%District Health Board%' then 'DHB' else 'non-DHB' end 294 | ORDER BY 295 | year(IDI_Clean_20181020.moh_clean.PRIMHD.moh_mhd_activity_start_date) 296 | -------------------------------------------------------------------------------- /IDI-stuff/example-data-dictionaries/ACC_Clean.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/example-data-dictionaries/ACC_Clean.pdf -------------------------------------------------------------------------------- /IDI-stuff/example-data-dictionaries/DIA_Clean.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/example-data-dictionaries/DIA_Clean.pdf -------------------------------------------------------------------------------- /IDI-stuff/useful-papers/linking-Methodology.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/useful-papers/linking-Methodology.pdf -------------------------------------------------------------------------------- /IDI-stuff/useful-papers/prototype-spine-creation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/useful-papers/prototype-spine-creation.pdf -------------------------------------------------------------------------------- /IDI-stuff/useful-papers/use-of-IDI.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/IDI-stuff/useful-papers/use-of-IDI.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /R/README.md: -------------------------------------------------------------------------------- 1 | # This folder contains scripts and guides related to R 2 | 3 | ### Beginners 4 | 5 | For beginners, I've prepared the below zip file containing four scripts for you to run through in order (0,1,2,3). These four scripts contain the major takeaways from day 4. 6 | 7 | * [(zip file) beginner scripts](beginners.zip) 8 | 9 | ### Connecting remotely 10 | 11 | The above-mentioned beginner scripts create a SQLite connection object (which we call `con`). You can also create connection objects to remove or local MySQL or T-SQL servers. The guide below will show you how to do that. The connection objects you create can be used in exactly the same ways demontrated in the beginner scripts `con`. 12 | 13 | * [General guide for connecting to local or remote server](https://htmlpreview.github.io/?https://github.com/frycast/SQL_course/blob/master/R/connecting-R/databases-in-R.html) 14 | 15 | ### Other scripts 16 | 17 | For the links that say "R script", you can right-click (or control-click) the link, and then click "save link as". 18 | 19 | * [(R script) Intro tutorial for programming in R](https://github.com/frycast/SQL_course/raw/master/R/intro-to-R.R) 20 | * [(R script) Connecting to a remote MySQL server](https://github.com/frycast/SQL_course/raw/master/R/connecting-R/remote-MySQL.R) 21 | * [A SQLite copy of the Sandpit database](sqlite-R/Sandpit.sqlite) 22 | * [(R script) Tidy data in R](https://github.com/frycast/SQL_course/raw/master/R/tidy-data.R) 23 | * [The 'umbrella' example project directory (includes RMarkdown code for the day 4 slides)](https://github.com/frycast/umbrella) 24 | * [More on using SQLite in R](sqlite-R) 25 | -------------------------------------------------------------------------------- /R/beginners.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/beginners.zip -------------------------------------------------------------------------------- /R/connecting-R/README.md: -------------------------------------------------------------------------------- 1 | # Connecting to R 2 | 3 | This folder contains a guide for connecting to a MySQL or T-SQL server in R. 4 | 5 | * [Click here to view the guide](https://htmlpreview.github.io/?https://github.com/frycast/SQL_course/blob/master/R/connecting-R/databases-in-R.html) 6 | -------------------------------------------------------------------------------- /R/connecting-R/databases-in-R.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Connecting to SQL databases with R" 3 | author: Daniel Fryer 4 | date: November 11, 2021 5 | output: 6 | prettydoc::html_pretty: 7 | theme: leonids 8 | highlight: github 9 | --- 10 | 11 | ```{r setup, include=FALSE} 12 | knitr::opts_chunk$set(echo = TRUE) 13 | ``` 14 | 15 | ## Introduction 16 | 17 | Setting up and connecting to SQL servers can get tricky, with all the authentication methods and security measures, hardware differences, etc. 18 | 19 | This is a general guide, for both MySQL and T-SQL. We will learn to: 20 | 21 | 1. Create and connect to a local server. 22 | 2. Connect to a remote database. 23 | 24 | If you're here to get connected to a remote MySQL database, 25 | then I've prepared an R script to get you started. 26 | [Click here to access the MySQL connection script](./remote-MySQL.R). 27 | 28 | ## 1. Create and connect to a local server 29 | 30 | Before connecting to a local server in R, you should create the server, install a popular SQL code editor, and insert some practice data. For this, I've created the two guides linked below. Please go through one of them before continuing. 31 | 32 | * [For T-SQL installation (easier on windows) click here](https://github.com/frycast/SQL_course/tree/master/create-database/T-SQL) 33 | 34 | * [For MySQL installation (easier on macOS) click here](https://github.com/frycast/SQL_course/tree/master/create-database/MySQL) 35 | 36 | You'll also need to have R and RStudio set up on your computer. Here is a good tutorial for that: 37 | 38 | * [Click here for the ModernDive R installation tutorial](https://moderndive.netlify.app/1-getting-started.html#installing) 39 | 40 | Once the SQL server and R are both set up, sometimes the rest will go really smoothly, but sometimes it won't. If you run into trouble, you might need to use a search engine and/or browse through forums to find some suggestions. If you're a student taking one of my SQL courses, you can contact me any time for help! 41 | 42 | #### Connect to a local MySQL server 43 | 44 | First, run the code below to install the required R package: 45 | 46 | ```{r, eval=FALSE} 47 | install.packages("RMySQL") 48 | library(RMySQL) 49 | ``` 50 | 51 | Then, the following R code should get you connected. Note that the database named 'Sandpit' was created within the above MySQL installation guide. 52 | 53 | ```{r, eval=FALSE} 54 | con <- DBI::dbConnect( 55 | RMySQL::MySQL(), 56 | dbname = "Sandpit", 57 | host = "localhost") 58 | ``` 59 | 60 | If the above causes an error that mentions `caching_sha2_password could not be loaded`, then connect to localhost using MySQL Workbench (or Sequel Ace, or whichever SQL editor you have installed), and run the SQL code below. *Warning:* this creates a very insecure user account, so you should never do this on a server that is open to the public and contains sensitive or private data. I'm assuming you're setting up this database on your home computer just to experiment with fake data. 61 | 62 | ```{SQL, eval=FALSE} 63 | CREATE USER 'R'@'localhost' IDENTIFIED WITH mysql_native_password BY 'password'; 64 | GRANT ALL PRIVILEGES ON *.* TO 'R'@'localhost' WITH GRANT OPTION; 65 | ``` 66 | 67 | Once you've run the above SQL code, you should be able to connect with: 68 | 69 | ```{r, eval=FALSE} 70 | con <- DBI::dbConnect( 71 | RMySQL::MySQL(), 72 | dbname = "Sandpit", 73 | host = "localhost", 74 | user = "R", 75 | password = "password") 76 | ``` 77 | 78 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names. 79 | 80 | ```{r, eval=FALSE} 81 | DBI::dbListTables(con) 82 | ``` 83 | 84 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect: 85 | 86 | ```{r, eval=FALSE} 87 | DBI::dbDisconnect(con) 88 | ``` 89 | 90 | #### Connect to a local T-SQL server 91 | 92 | First, run the code below to install the required R package: 93 | 94 | ```{r, eval=FALSE} 95 | install.packages(c("odbc","DBI")) 96 | library(odbc) 97 | library(DBI) 98 | ``` 99 | 100 | Then, the following R code should get you connected. Note that the database named 'Sandpit' was created within the above T-SQL installation guide. 101 | 102 | ```{r, eval=FALSE} 103 | con <- DBI::dbConnect(odbc::odbc(), 104 | Driver = "ODBC Driver 17 for SQL Server", 105 | Server = "localhost", 106 | Database = "Sandpit", 107 | Trusted_Connection = "yes") 108 | ``` 109 | 110 | If the above fails, there may be alternative drivers that can be used in place of `ODBC Driver 17 for SQL Server`. However, the other parameters will change too. A common alternative is: 111 | 112 | ```{r, eval=FALSE} 113 | con <- DBI::dbConnect(odbc::odbc(), 114 | Driver = "SQL Server", 115 | Server = "localhost", 116 | Database = "master", 117 | Trusted_Connection = "True") 118 | ``` 119 | 120 | Small details make a difference. For example, the `Trusted_Connection` parameter in the second example is set to `"True"`, while in the first example it is `"yes"`. A large collection of alternative connection strings can be found on [connectionstrings.com](https://www.connectionstrings.com/sql-server/). 121 | 122 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names. 123 | 124 | ```{r, eval=FALSE} 125 | DBI::dbListTables(con) 126 | ``` 127 | 128 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect: 129 | 130 | ```{r, eval=FALSE} 131 | DBI::dbDisconnect(con) 132 | ``` 133 | 134 | ## 2. Connect to a remote server 135 | 136 | Connecting to a remote database means you can avoid the hassle of installing a local MySQL or T-SQL server. You will need some connection details specific to the database you plan to connect to. The types of details you need depend on whether you're connecting to a MySQL or T-SQL server. 137 | 138 | #### Connect to a remote MySQL server 139 | 140 | First, run the code below to install the required R package: 141 | 142 | ```{r, eval=FALSE} 143 | install.packages("RMySQL") 144 | library(RMySQL) 145 | ``` 146 | 147 | Then, use the following to connect: 148 | 149 | ```{r, eval=FALSE} 150 | con <- DBI::dbConnect(RMySQL::MySQL(), 151 | host = "write_host_address_here", 152 | port = 0000, # Replace this number with the actual port number 153 | dbname = "write_database_name_here", 154 | user = "write_user_name_here", 155 | password = "write_your_password_here") 156 | ``` 157 | 158 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names. 159 | 160 | ```{r, eval=FALSE} 161 | DBI::dbListTables(con) 162 | ``` 163 | 164 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect: 165 | 166 | ```{r, eval=FALSE} 167 | DBI::dbDisconnect(con) 168 | ``` 169 | 170 | #### Connect to a remote T-SQL server 171 | 172 | First, run the code below to install the required R package: 173 | 174 | ```{r, eval=FALSE} 175 | install.packages(c("odbc","DBI")) 176 | library(odbc) 177 | library(DBI) 178 | ``` 179 | 180 | Then, use the following to connect: 181 | 182 | ```{r, eval=FALSE} 183 | con <- odbc::dbConnect(odbc::odbc(), 184 | Driver = "SQL Server", 185 | Server = "write_server_address_here", 186 | Database = "write_database_name_here", 187 | UID = "write_user_name_here", 188 | PWD = "write_your_password_here") 189 | ``` 190 | 191 | Once the code is executed without error, it means the connection is established. To test it, you can check that the following returns a list of table names. 192 | 193 | ```{r, eval=FALSE} 194 | DBI::dbListTables(con) 195 | ``` 196 | 197 | Guidelines for using the connection are given in the R chapter of the course notes. Once you're done using the connection, remember to disconnect: 198 | 199 | ```{r, eval=FALSE} 200 | DBI::dbDisconnect(con) 201 | ``` -------------------------------------------------------------------------------- /R/connecting-R/remote-MySQL.R: -------------------------------------------------------------------------------- 1 | # Install and load RMySQL 2 | install.packages("RMySQL") 3 | library(RMySQL) 4 | 5 | # Choose the database name and password 6 | database <- "Sandpit" 7 | password <- "insert_password_here" 8 | 9 | # Connect to the database 10 | con <- DBI::dbConnect(RMySQL::MySQL(), 11 | host = "db-intro-sql-do-user-9289996-0.b.db.ondigitalocean.com", 12 | port = 25060, # Replace this number with the actual port number 13 | dbname = database, 14 | user = "RSCloud", 15 | password = password) 16 | 17 | # List the tables 18 | DBI::dbListTables(con) 19 | 20 | ##### USE DATABASE HERE ####### 21 | 22 | library(dbplyr) 23 | library(dplyr) 24 | d1 <- tbl(con, "Notes_Friends") 25 | d2 <- tbl(con, "Notes_Pets") 26 | 27 | # See: 28 | # https://dbplyr.tidyverse.org/ 29 | 30 | ############################## 31 | 32 | # Disconnect 33 | DBI::dbDisconnect(con) 34 | -------------------------------------------------------------------------------- /R/example-R.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/example-R.zip -------------------------------------------------------------------------------- /R/examples/bioinformatics/analysis.R: -------------------------------------------------------------------------------- 1 | # This script will give an example analysis: 2 | # * How many times have we seen this variation before? 3 | # * Analyse by patient info (e.g., ethnicity) -------------------------------------------------------------------------------- /R/examples/bioinformatics/app.R: -------------------------------------------------------------------------------- 1 | # This script will create a Shiny app that 2 | # enables users to work with the database safely. -------------------------------------------------------------------------------- /R/examples/bioinformatics/automation.R: -------------------------------------------------------------------------------- 1 | # This script will read all csvs in data/new_records and then 2 | # insert the data into the variations database. -------------------------------------------------------------------------------- /R/examples/bioinformatics/create_variants_database.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE variant ( 2 | name text NOT NULL, 3 | variant_id integer PRIMARY KEY AUTOINCREMENT, 4 | change_from text NOT NULL, 5 | change_to text NOT NULL, 6 | position integer NOT NULL 7 | ); 8 | 9 | -- -- TODO: See email for update: 10 | -- CREATE TABLE variant ( 11 | -- name text NOT NULL, 12 | -- variant_id integer PRIMARY KEY AUTOINCREMENT, 13 | -- chromosome text NOT NULL, 14 | -- position int NOT NULL, 15 | -- reference text NOT NULL, 16 | -- alternate text NOT NULL, 17 | -- ); 18 | 19 | WHERE chromosome = '' AND 20 | 21 | INSERT INTO variant 22 | (NAME, variant_ID, change_from, change_to, position) 23 | VALUES 24 | ('E1038G', 1, 'E', 'G', '1038'), 25 | ('D835Y', 2, 'D', 'Y', '835'), 26 | ('R396C', 3, 'R', 'C', '396'), 27 | ('V617F', 4, 'V', 'F', '617'), 28 | ('K57N', 5, 'K', 'N', '57'), 29 | ('P2514R', 6, 'P', 'R', '2514'), 30 | ('E542K', 7, 'E', 'K', '542'), 31 | ('R130G', 8, 'R', 'G', '130'), 32 | ('R661W', 9, 'R', 'W', '661'), 33 | ('R361H', 10, 'R', 'H', '361'), 34 | ('N496K', 11, 'N', 'K', '496'), 35 | ('N1020Y', 12, 'N', 'Y', '1020'), 36 | ('Y640F', 13, 'Y', 'F', '640'), 37 | ('G1664R', 14, 'G', 'R', '1664'), 38 | ('R537P', 15, 'R', 'P', '537'), 39 | ('R175H', 16, 'R', 'H', '175'), 40 | ('R167W', 17, 'R', 'W', '167'), 41 | ('R228H', 18, 'R', 'H', '228'), 42 | ('R188H', 19, 'R', 'H', '188'), 43 | ('R622W', 20, 'R', 'W', '622'), 44 | ('R623W', 21, 'R', 'W', '622'); -- not referenced 45 | 46 | CREATE TABLE Gene ( 47 | name text NOT NULL, 48 | gene_id integer PRIMARY KEY AUTOINCREMENT 49 | ); 50 | 51 | INSERT INTO Gene 52 | (NAME, GENE_ID) 53 | VALUES 54 | ('BRCA1', 1), 55 | ('FLT3', 2), 56 | ('GATA2', 3), 57 | ('JAK2', 4), 58 | ('MAP2K1', 5), 59 | ('NOTCH1', 6), 60 | ('PIK3CA', 7), 61 | ('PTEN', 8), 62 | ('RB1', 9), 63 | ('SMAD4', 10), 64 | ('SMC1A', 11), 65 | ('SMC3', 12), 66 | ('STAT3', 13), 67 | ('TET2', 14), 68 | ('TGFBR2', 15), 69 | ('TP53', 16), 70 | ('VHL', 17), 71 | ('XPA', 18), 72 | ('XRCC2', 19), 73 | ('ZFHX3', 20), 74 | ('STAT4', 21), -- not referenced 75 | ('P53', 22), -- not referenced 76 | ('PT E N', 23); -- not referenced 77 | 78 | CREATE TABLE Patient( 79 | patient_id integer PRIMARY KEY AUTOINCREMENT, 80 | cancer_type text, 81 | ethnicity text 82 | ); 83 | 84 | INSERT INTO Patient 85 | (PATIENT_ID, CANCER_TYPE, ETHNICITY) 86 | VALUES 87 | (1, 'Breast', 'Asian'), 88 | (2, 'Ovarian', 'Caucasian'), 89 | (3, 'Acute myeloid leukemia', 'African American'), 90 | (4, 'Myelodysplastic syndrome', 'Hispanic'), 91 | (5, 'Myeloproliferative neoplasm', 'Caucasian'), 92 | (6, 'Colorectal', 'African American'), 93 | (7, 'T-cell acute lymphoblastic leukemia', 'Hispanic'), 94 | (8, 'Lung cancer', 'Caucasian'), 95 | (9, 'Prostate cancer', 'Asian'), 96 | (10, 'Skin cancer', 'Caucasian'), 97 | (11, 'Retinoblastoma', 'African American'), 98 | (12, 'Pancreatic cancer', 'Hispanic'), 99 | (13, 'Cornelia de Lange syndrome', 'Caucasian'), 100 | (14, 'Colorectal', 'Asian'), 101 | (15, 'Lymphoma African', 'American'), 102 | (16, 'Myelodysplastic syndrome', 'Hispanic'), 103 | (17, 'Hereditary nonpolyposis colorectal cancer', 'Caucasian'), 104 | (18, 'Breast cancer', 'African American' ), 105 | (19, 'Renal cell carcinoma', 'Hispanic' ), 106 | (20, 'Skin cancer', 'Caucasian' ), 107 | (21, 'Breast cancer', 'Asian' ), 108 | (22, 'Schizophrenia', 'Caucasian' ), 109 | (23, 'Lung Cancer', 'Asian' ), 110 | (24, 'Lung Cancer', 'African' ), 111 | (25, 'Breast Cancer', 'Caucasian' ), 112 | (26, 'Breast Cancer', 'Asian' ), 113 | (27, 'Lung Cancer', 'Caucasian' ), 114 | (28, 'Lung Cancer', 'African' ), 115 | (29, 'Prostate', 'Caucasian' ), 116 | (30, 'Prostate Cancer', 'Asian' ), 117 | (31, 'Lung Cancer', 'Hispanic' ), 118 | (32, 'Lung Cancer', 'Asian' ), 119 | (33, 'Lung', 'African' ), 120 | (34, 'Lung', 'Caucasian' ), 121 | (35, 'Breast Cancer', 'Caucasian' ), 122 | (36, 'Breast Cancer', 'Asian' ), 123 | (37, 'Leukemia', 'Caucasian' ), 124 | (38, 'Colorectal Cancer', 'Asian' ), 125 | (39, 'Colorectal Cancer', 'African' ), 126 | (40, 'Prostate Cancer', 'African' ), 127 | (41, 'Retinoblastoma', 'Caucasian' ), 128 | (42, 'Pancreatic Cancer', 'Hispanic' ), 129 | (43, 'Ovarian Cancer', 'African' ), 130 | (44, 'Ovarian', 'Caucasian' ), 131 | (45, 'Leukemia', 'African' ), 132 | (46, 'Myelodysplastic Syndrome', 'African'), 133 | (47, 'Myelodysplastic Syndrome', 'African'); -- not referenced 134 | 135 | CREATE TABLE Gene_variant ( 136 | gene_variant_id integer PRIMARY KEY AUTOINCREMENT, 137 | gene_id text NOT NULL, 138 | variant_id text NOT NULL, 139 | pop_freq numeric, 140 | comment text, 141 | FOREIGN KEY (gene_id) REFERENCES Gene (gene_id), 142 | FOREIGN KEY (variant_id) REFERENCES variant (variant_id), 143 | UNIQUE(gene_id, variant_id) 144 | ); 145 | 146 | INSERT INTO Gene_variant 147 | (GENE_variant_ID, GENE_ID, variant_ID, POP_FREQ, COMMENT) 148 | VALUES 149 | (1, 1, 1, 0.01, 'Associated with increased risk of breast and ovarian cancer'), 150 | (2, 2, 2, 0.02, 'Associated with poor prognosis in acute myeloid leukemia patients'), 151 | (3, 3, 3, 0.05, 'Associated with susceptibility to myelodysplastic syndrome'), 152 | (4, 4, 4, 0.02, 'Found in many cases of myeloproliferative neoplasms'), 153 | (5, 5, 5, 0.1, 'Associated with increased risk of colorectal cancer'), 154 | (6, 6, 6, 0.07, 'Found in many cases of T-cell acute lymphoblastic leukemia'), 155 | (7, 7, 7, 0.03, 'Frequently mutated in various cancers, including breast, colon, and lung'), 156 | (8, 8, 8, 0.08, 'Associated with increased risk of multiple types of cancer'), 157 | (9, 9, 9, 0.04, 'Frequently mutated in various types of cancer, including retinoblastoma'), 158 | (10, 10, 10, 0.01, 'Associated with increased risk of pancreatic cancer'), 159 | (11, 11, 11, 0.02, 'Associated with increased risk of Cornelia de Lange syndrome'), 160 | (12, 12, 12, 0.09, 'Associated with increased risk of colorectal cancer'), 161 | (13, 13, 13, 0.05, 'Frequently mutated in various types of cancer, including lymphoma and leukemia'), 162 | (14, 14, 14, 0.03, 'Frequently mutated in myelodysplastic syndromes and acute myeloid leukemia'), 163 | (15, 15, 15, 0.01, 'Associated with increased risk of hereditary nonpolyposis colorectal cancer'), 164 | (16, 16, 16, 0.02, 'Associated with increased risk of many types of cancer, including breast and colon'), 165 | (17, 17, 17, 0.05, 'Associated with increased risk of renal cell carcinoma'), 166 | (18, 18, 18, 0.06, 'Associated with increased risk of skin cancer and xeroderma pigmentosum'), 167 | (19, 19, 19, 0.08, 'Associated with increased risk of breast cancer and ovarian cancer'), 168 | (20, 20, 20, 0.02, 'Associated with increased risk of schizophrenia and bipolar disorder'), 169 | (21, 01, 10, 0.01, 'A study found that individuals with this variant had a higher risk of developing breast and ovarian cancer.'), 170 | (22, 01, 02, 0.03, 'This variant has been found to be associated with increased sensitivity to chemotherapy.'), 171 | (23, 01, 03, 0.02, NULL), 172 | (24, 02, 01, 0.04, 'Individuals with this variant have an increased risk of developing acute myeloid leukemia.'), 173 | (25, 02, 20, 0.06, 'A study found that this variant is associated with a higher response rate to FLT3 inhibitors in patients with acute myeloid leukemia.'), 174 | (26, 03, 01, 0.09, 'This variant has been found to be associated with an increased risk of developing myelodysplastic syndrome.'), 175 | (27, 04, 05, 0.05, 'This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.'), 176 | (28, 05, 06, 0.07, 'A study found that individuals with this variant had a higher risk of developing colorectal cancer.'), 177 | (29, 05, 07, 0.02, 'This variant has been found to be associated with a poorer response to chemotherapy in patients with colorectal cancer.'), 178 | (30, 06, 04, 0.01, 'This variant has been found to be associated with a poorer prognosis in patients with T-cell acute lymphoblastic leukemia.'), 179 | (31, 07, 09, 0.08, 'This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.'), 180 | (32, 07, 03, 0.06, 'This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.'), 181 | (33, 08, 10, 0.02, 'A study found that individuals with this variant had a higher risk of developing prostate cancer.'), 182 | (34, 09, 11, 0.03, 'This variant is commonly found in patients with retinoblastoma and is associated with a higher risk of developing secondary cancers.'), 183 | (35, 10, 19, 0.04, 'This variant has been found to be associated with a higher risk of developing pancreatic cancer.'), 184 | (36, 11, 10, 0.05, 'This variant is commonly found in patients with Cornelia de Lange syndrome and is associated with a more severe phenotype.'), 185 | (37, 12, 03, 0.07, 'This variant is commonly found in patients with colorectal cancer and is associated with a poorer prognosis.'), 186 | (38, 13, 04, 0.01, 'A study found that individuals with this variant had a higher risk of developing lymphoma.'), 187 | (39, 14, 12, 0.02, 'This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.'); 188 | 189 | CREATE TABLE Patient_Gene_variant( 190 | patient_id integer, 191 | gene_variant_id integer, 192 | vaf numeric, 193 | FOREIGN KEY (gene_variant_id) REFERENCES Gene_variant (gene_variant_id), 194 | FOREIGN KEY (patient_id) REFERENCES Patient (patient_id), 195 | PRIMARY KEY (patient_id, gene_variant_id) 196 | ); 197 | 198 | INSERT INTO Patient_Gene_variant 199 | (PATIENT_ID, GENE_variant_ID, vaf) 200 | VALUES 201 | (1, 1 , 0.35), 202 | (2, 1 , 0.48), 203 | (3, 2 , 0.15), 204 | (4, 3 , 0.22), 205 | (5, 4 , 0.67), 206 | (6, 5 , 0.41), 207 | (7, 6 , 0.58), 208 | (8, 7 , 0.72), 209 | (9, 7 , 0.31), 210 | (10, 8 , 0.44), 211 | (11, 9 , 0.27), 212 | (12, 10, 0.39), 213 | (13, 11, 0.18), 214 | (14, 12, 0.51), 215 | (15, 13, 0.62), 216 | (16, 14, 0.29), 217 | (17, 15, 0.76), 218 | (18, 16, 0.47), 219 | (19, 17, 0.53), 220 | (20, 18, 0.61), 221 | (21, 19, 0.36), 222 | (22, 20, 0.12), 223 | (23, 21, 0.63), 224 | (24, 1 , 0.21), 225 | (25, 1 , 0.48), 226 | (26, 22, 0.36), 227 | (27, 22, 0.42), 228 | (28, 22, 0.18), 229 | (29, 23, 0.75), 230 | (30, 24, 0.31), 231 | (31, 25, 0.28), 232 | (32, 2 , 0.59), 233 | (33, 26, 0.14), 234 | (34, 27, 0.67), 235 | (35, 28, 0.51), 236 | (36, 29, 0.42), 237 | (37, 30, 0.23), 238 | (38, 31, 0.57), 239 | (39, 32, 0.41), 240 | (40, 33, 0.62), 241 | (41, 34, 0.28), 242 | (42, 35, 0.39), 243 | (43, 36, 0.18), 244 | (44, 37, 0.35), 245 | (45, 38, 0.49), 246 | (46, 39, 0.27), 247 | (5, 05, 0.62), 248 | (6, 10, 0.29), 249 | (7, 15, 0.76), 250 | (8, 20, 0.47), 251 | (9, 25, 0.53), 252 | (10, 24, 0.61), 253 | (11, 24, 0.36), 254 | (12, 24, 0.12), 255 | (13, 1 , 0.63), 256 | (26, 1 , 0.21), 257 | (27, 1 , 0.48), 258 | (28, 23, 0.12), 259 | (29, 39, 0.63), 260 | (30, 22, 0.21), 261 | (31, 31, 0.48), 262 | (43, 12, 0.36), 263 | (44, 9 , 0.42), 264 | (45, 8 , 0.18), 265 | (6, 6 , 0.75), 266 | (7, 7 , 0.31), 267 | (8, 21, 0.28), 268 | (9, 22, 0.59), 269 | (10, 23, 0.61), 270 | (11, 27, 0.36), 271 | (12, 28, 0.12), 272 | (28, 31, 0.63), 273 | (29, 32, 0.21), 274 | (30, 33, 0.48), 275 | (31, 3 , 0.36), 276 | (43, 8 , 0.42), 277 | (44, 18, 0.18), 278 | (8, 6 , 0.75), 279 | (9, 4 , 0.31), 280 | (10, 32, 0.15), 281 | (11, 33, 0.03), 282 | (12, 34, 0.37), 283 | (28, 27, 0.25), 284 | (31, 29, 0.58), 285 | (43, 9 , 0.21), 286 | (44, 10, 0.32), 287 | (10, 2 , 0.23), 288 | (11, 1 , 0.19), 289 | (12, 2 , 0.18), 290 | (28, 1 , 0.38), 291 | (31, 5 , 0.37), 292 | (11, 15, 0.18), 293 | (12, 15, 0.26), 294 | (28, 15, 0.48), 295 | (11, 16, 0.45), 296 | (12, 16, 0.28), 297 | (28, 16, 0.26), 298 | (11, 17, 0.55), 299 | (12, 17, 0.34), 300 | (28, 17, 0.43), 301 | (12, 39, 0.42); -------------------------------------------------------------------------------- /R/examples/bioinformatics/data/new_records/new_records.csv: -------------------------------------------------------------------------------- 1 | NHI,variant_code,gene_name,vaf,pop_freq,tumour_type 2 | NTTTTAX111,E1038G,BRCA1,0.33,0.07,osteosarcoma 3 | NTTTTAX112,D835Y,XRCC2,0.74,0.01,meningioma 4 | -------------------------------------------------------------------------------- /R/examples/bioinformatics/data/patient_variants.csv: -------------------------------------------------------------------------------- 1 | "","patient_id","gene_name","variant_name","comment","vaf","pop_freq","ethnicity","cancer_type" 2 | "1",1,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.35,0.01,"Asian","Breast" 3 | "2",2,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.48,0.01,"Caucasian","Ovarian" 4 | "3",3,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.15,0.02,"African American","Acute myeloid leukemia" 5 | "4",4,"GATA2","R396C","Associated with susceptibility to myelodysplastic syndrome",0.22,0.05,"Hispanic","Myelodysplastic syndrome" 6 | "5",5,"JAK2","V617F","Found in many cases of myeloproliferative neoplasms",0.67,0.02,"Caucasian","Myeloproliferative neoplasm" 7 | "6",6,"MAP2K1","K57N","Associated with increased risk of colorectal cancer",0.41,0.1,"African American","Colorectal" 8 | "7",7,"NOTCH1","P2514R","Found in many cases of T-cell acute lymphoblastic leukemia",0.58,0.07,"Hispanic","T-cell acute lymphoblastic leukemia" 9 | "8",8,"PIK3CA","E542K","Frequently mutated in various cancers, including breast, colon, and lung",0.72,0.03,"Caucasian","Lung cancer" 10 | "9",9,"PIK3CA","E542K","Frequently mutated in various cancers, including breast, colon, and lung",0.31,0.03,"Asian","Prostate cancer" 11 | "10",10,"PTEN","R130G","Associated with increased risk of multiple types of cancer",0.44,0.08,"Caucasian","Skin cancer" 12 | "11",11,"RB1","R661W","Frequently mutated in various types of cancer, including retinoblastoma",0.27,0.04,"African American","Retinoblastoma" 13 | "12",12,"SMAD4","R361H","Associated with increased risk of pancreatic cancer",0.39,0.01,"Hispanic","Pancreatic cancer" 14 | "13",13,"SMC1A","N496K","Associated with increased risk of Cornelia de Lange syndrome",0.18,0.02,"Caucasian","Cornelia de Lange syndrome" 15 | "14",14,"SMC3","N1020Y","Associated with increased risk of colorectal cancer",0.51,0.09,"Asian","Colorectal" 16 | "15",15,"STAT3","Y640F","Frequently mutated in various types of cancer, including lymphoma and leukemia",0.62,0.05,"American","Lymphoma African" 17 | "16",16,"TET2","G1664R","Frequently mutated in myelodysplastic syndromes and acute myeloid leukemia",0.29,0.03,"Hispanic","Myelodysplastic syndrome" 18 | "17",17,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.76,0.01,"Caucasian","Hereditary nonpolyposis colorectal cancer" 19 | "18",18,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.47,0.02,"African American","Breast cancer" 20 | "19",19,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.53,0.05,"Hispanic","Renal cell carcinoma" 21 | "20",20,"XPA","R228H","Associated with increased risk of skin cancer and xeroderma pigmentosum",0.61,0.06,"Caucasian","Skin cancer" 22 | "21",21,"XRCC2","R188H","Associated with increased risk of breast cancer and ovarian cancer",0.36,0.08,"Asian","Breast cancer" 23 | "22",22,"ZFHX3","R622W","Associated with increased risk of schizophrenia and bipolar disorder",0.12,0.02,"Caucasian","Schizophrenia" 24 | "23",23,"BRCA1","R361H","A study found that individuals with this variant had a higher risk of developing breast and ovarian cancer.",0.63,0.01,"Asian","Lung Cancer" 25 | "24",24,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.21,0.01,"African","Lung Cancer" 26 | "25",25,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.48,0.01,"Caucasian","Breast Cancer" 27 | "26",26,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.36,0.03,"Asian","Breast Cancer" 28 | "27",27,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.42,0.03,"Caucasian","Lung Cancer" 29 | "28",28,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.18,0.03,"African","Lung Cancer" 30 | "29",29,"BRCA1","R396C",NA,0.75,0.02,"Caucasian","Prostate" 31 | "30",30,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.31,0.04,"Asian","Prostate Cancer" 32 | "31",31,"FLT3","R622W","A study found that this variant is associated with a higher response rate to FLT3 inhibitors in patients with acute myeloid leukemia.",0.28,0.06,"Hispanic","Lung Cancer" 33 | "32",32,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.59,0.02,"Asian","Lung Cancer" 34 | "33",33,"GATA2","E1038G","This variant has been found to be associated with an increased risk of developing myelodysplastic syndrome.",0.14,0.09,"African","Lung" 35 | "34",34,"JAK2","K57N","This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.",0.67,0.05,"Caucasian","Lung" 36 | "35",35,"MAP2K1","P2514R","A study found that individuals with this variant had a higher risk of developing colorectal cancer.",0.51,0.07,"Caucasian","Breast Cancer" 37 | "36",36,"MAP2K1","E542K","This variant has been found to be associated with a poorer response to chemotherapy in patients with colorectal cancer.",0.42,0.02,"Asian","Breast Cancer" 38 | "37",37,"NOTCH1","V617F","This variant has been found to be associated with a poorer prognosis in patients with T-cell acute lymphoblastic leukemia.",0.23,0.01,"Caucasian","Leukemia" 39 | "38",38,"PIK3CA","R661W","This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.",0.57,0.08,"Asian","Colorectal Cancer" 40 | "39",39,"PIK3CA","R396C","This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.",0.41,0.06,"African","Colorectal Cancer" 41 | "40",40,"PTEN","R361H","A study found that individuals with this variant had a higher risk of developing prostate cancer.",0.62,0.02,"African","Prostate Cancer" 42 | "41",41,"RB1","N496K","This variant is commonly found in patients with retinoblastoma and is associated with a higher risk of developing secondary cancers.",0.28,0.03,"Caucasian","Retinoblastoma" 43 | "42",42,"SMAD4","R188H","This variant has been found to be associated with a higher risk of developing pancreatic cancer.",0.39,0.04,"Hispanic","Pancreatic Cancer" 44 | "43",43,"SMC1A","R361H","This variant is commonly found in patients with Cornelia de Lange syndrome and is associated with a more severe phenotype.",0.18,0.05,"African","Ovarian Cancer" 45 | "44",44,"SMC3","R396C","This variant is commonly found in patients with colorectal cancer and is associated with a poorer prognosis.",0.35,0.07,"Caucasian","Ovarian" 46 | "45",45,"STAT3","V617F","A study found that individuals with this variant had a higher risk of developing lymphoma.",0.49,0.01,"African","Leukemia" 47 | "46",46,"TET2","N1020Y","This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.",0.27,0.02,"African","Myelodysplastic Syndrome" 48 | "47",5,"MAP2K1","K57N","Associated with increased risk of colorectal cancer",0.62,0.1,"Caucasian","Myeloproliferative neoplasm" 49 | "48",6,"SMAD4","R361H","Associated with increased risk of pancreatic cancer",0.29,0.01,"African American","Colorectal" 50 | "49",7,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.76,0.01,"Hispanic","T-cell acute lymphoblastic leukemia" 51 | "50",8,"ZFHX3","R622W","Associated with increased risk of schizophrenia and bipolar disorder",0.47,0.02,"Caucasian","Lung cancer" 52 | "51",9,"FLT3","R622W","A study found that this variant is associated with a higher response rate to FLT3 inhibitors in patients with acute myeloid leukemia.",0.53,0.06,"Asian","Prostate cancer" 53 | "52",10,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.61,0.04,"Caucasian","Skin cancer" 54 | "53",11,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.36,0.04,"African American","Retinoblastoma" 55 | "54",12,"FLT3","E1038G","Individuals with this variant have an increased risk of developing acute myeloid leukemia.",0.12,0.04,"Hispanic","Pancreatic cancer" 56 | "55",13,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.63,0.01,"Caucasian","Cornelia de Lange syndrome" 57 | "56",26,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.21,0.01,"Asian","Breast Cancer" 58 | "57",27,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.48,0.01,"Caucasian","Lung Cancer" 59 | "58",28,"BRCA1","R396C",NA,0.12,0.02,"African","Lung Cancer" 60 | "59",29,"TET2","N1020Y","This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.",0.63,0.02,"Caucasian","Prostate" 61 | "60",30,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.21,0.03,"Asian","Prostate Cancer" 62 | "61",31,"PIK3CA","R661W","This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.",0.48,0.08,"Hispanic","Lung Cancer" 63 | "62",43,"SMC3","N1020Y","Associated with increased risk of colorectal cancer",0.36,0.09,"African","Ovarian Cancer" 64 | "63",44,"RB1","R661W","Frequently mutated in various types of cancer, including retinoblastoma",0.42,0.04,"Caucasian","Ovarian" 65 | "64",45,"PTEN","R130G","Associated with increased risk of multiple types of cancer",0.18,0.08,"African","Leukemia" 66 | "65",6,"NOTCH1","P2514R","Found in many cases of T-cell acute lymphoblastic leukemia",0.75,0.07,"African American","Colorectal" 67 | "66",7,"PIK3CA","E542K","Frequently mutated in various cancers, including breast, colon, and lung",0.31,0.03,"Hispanic","T-cell acute lymphoblastic leukemia" 68 | "67",8,"BRCA1","R361H","A study found that individuals with this variant had a higher risk of developing breast and ovarian cancer.",0.28,0.01,"Caucasian","Lung cancer" 69 | "68",9,"BRCA1","D835Y","This variant has been found to be associated with increased sensitivity to chemotherapy.",0.59,0.03,"Asian","Prostate cancer" 70 | "69",10,"BRCA1","R396C",NA,0.61,0.02,"Caucasian","Skin cancer" 71 | "70",11,"JAK2","K57N","This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.",0.36,0.05,"African American","Retinoblastoma" 72 | "71",12,"MAP2K1","P2514R","A study found that individuals with this variant had a higher risk of developing colorectal cancer.",0.12,0.07,"Hispanic","Pancreatic cancer" 73 | "72",28,"PIK3CA","R661W","This variant is commonly found in patients with lung cancer and is associated with a poorer prognosis.",0.63,0.08,"African","Lung Cancer" 74 | "73",29,"PIK3CA","R396C","This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.",0.21,0.06,"Caucasian","Prostate" 75 | "74",30,"PTEN","R361H","A study found that individuals with this variant had a higher risk of developing prostate cancer.",0.48,0.02,"Asian","Prostate Cancer" 76 | "75",31,"GATA2","R396C","Associated with susceptibility to myelodysplastic syndrome",0.36,0.05,"Hispanic","Lung Cancer" 77 | "76",43,"PTEN","R130G","Associated with increased risk of multiple types of cancer",0.42,0.08,"African","Ovarian Cancer" 78 | "77",44,"XPA","R228H","Associated with increased risk of skin cancer and xeroderma pigmentosum",0.18,0.06,"Caucasian","Ovarian" 79 | "78",8,"NOTCH1","P2514R","Found in many cases of T-cell acute lymphoblastic leukemia",0.75,0.07,"Caucasian","Lung cancer" 80 | "79",9,"JAK2","V617F","Found in many cases of myeloproliferative neoplasms",0.31,0.02,"Asian","Prostate cancer" 81 | "80",10,"PIK3CA","R396C","This variant has been found to be associated with a higher response rate to targeted therapy in patients with lung cancer.",0.15,0.06,"Caucasian","Skin cancer" 82 | "81",11,"PTEN","R361H","A study found that individuals with this variant had a higher risk of developing prostate cancer.",0.03,0.02,"African American","Retinoblastoma" 83 | "82",12,"RB1","N496K","This variant is commonly found in patients with retinoblastoma and is associated with a higher risk of developing secondary cancers.",0.37,0.03,"Hispanic","Pancreatic cancer" 84 | "83",28,"JAK2","K57N","This variant is commonly found in patients with myeloproliferative neoplasms and is associated with a poorer prognosis.",0.25,0.05,"African","Lung Cancer" 85 | "84",31,"MAP2K1","E542K","This variant has been found to be associated with a poorer response to chemotherapy in patients with colorectal cancer.",0.58,0.02,"Hispanic","Lung Cancer" 86 | "85",43,"RB1","R661W","Frequently mutated in various types of cancer, including retinoblastoma",0.21,0.04,"African","Ovarian Cancer" 87 | "86",44,"SMAD4","R361H","Associated with increased risk of pancreatic cancer",0.32,0.01,"Caucasian","Ovarian" 88 | "87",10,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.23,0.02,"Caucasian","Skin cancer" 89 | "88",11,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.19,0.01,"African American","Retinoblastoma" 90 | "89",12,"FLT3","D835Y","Associated with poor prognosis in acute myeloid leukemia patients",0.18,0.02,"Hispanic","Pancreatic cancer" 91 | "90",28,"BRCA1","E1038G","Associated with increased risk of breast and ovarian cancer",0.38,0.01,"African","Lung Cancer" 92 | "91",31,"MAP2K1","K57N","Associated with increased risk of colorectal cancer",0.37,0.1,"Hispanic","Lung Cancer" 93 | "92",11,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.18,0.01,"African American","Retinoblastoma" 94 | "93",12,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.26,0.01,"Hispanic","Pancreatic cancer" 95 | "94",28,"TGFBR2","R537P","Associated with increased risk of hereditary nonpolyposis colorectal cancer",0.48,0.01,"African","Lung Cancer" 96 | "95",11,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.45,0.02,"African American","Retinoblastoma" 97 | "96",12,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.28,0.02,"Hispanic","Pancreatic cancer" 98 | "97",28,"TP53","R175H","Associated with increased risk of many types of cancer, including breast and colon",0.26,0.02,"African","Lung Cancer" 99 | "98",11,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.55,0.05,"African American","Retinoblastoma" 100 | "99",12,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.34,0.05,"Hispanic","Pancreatic cancer" 101 | "100",28,"VHL","R167W","Associated with increased risk of renal cell carcinoma",0.43,0.05,"African","Lung Cancer" 102 | "101",12,"TET2","N1020Y","This variant is commonly found in patients with myelodysplastic syndrome and is associated with a poorer prognosis.",0.42,0.02,"Hispanic","Pancreatic cancer" 103 | "102",47,NA,NA,NA,NA,NA,"African","Myelodysplastic Syndrome" 104 | -------------------------------------------------------------------------------- /R/examples/bioinformatics/data/variants.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/examples/bioinformatics/data/variants.sqlite -------------------------------------------------------------------------------- /R/examples/bioinformatics/data/variants_dodgy.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/examples/bioinformatics/data/variants_dodgy.sqlite -------------------------------------------------------------------------------- /R/examples/bioinformatics/functions.R: -------------------------------------------------------------------------------- 1 | # ----------------------- 2 | # 3 | # Author: Daniel Fryer 4 | # Date: 2023-02-23 5 | # 6 | # Helper functions for bioinformatics example 7 | # 8 | # ----------------------- 9 | 10 | 11 | # create_dummy_database 12 | # 13 | # This fuction loads '.sql' file and then executes 14 | # every statement in the file, on the sql database. 15 | # 16 | # It breaks the script up and executes one statement 17 | # at a time because dbExecute is unable to 18 | # execute multiple stataments. 19 | # 20 | execute_sql_script <- function(con, file) { 21 | sql <- readr::read_file(file) 22 | sql <- glue::glue_collapse(sql) 23 | sql <- strsplit(sql, ";") 24 | lapply( 25 | sql[[1]], 26 | function(s) {DBI::dbExecute(con, s)} 27 | ) 28 | } 29 | 30 | 31 | # split_variant_name 32 | # 33 | # The first letter of a variant name refers to the amino acid that is normally 34 | # at the position within the gene. 35 | # 36 | # Sandwiched between the first and last letter is a number. 37 | # This number is the position within the gene. 38 | # 39 | # The last letter of a variant name refers to the amino acid resulting from 40 | # the change. 41 | # 42 | split_variant_name <- function(variant_name) { 43 | first_letter <- stringr::str_sub(variant_name, 1, 1) 44 | last_letter <- stringr::str_sub(variant_name, - 1, - 1) 45 | middle_part <- stringr::str_sub(variant_name, 2, -2) 46 | return( 47 | data.frame(change_from = first_letter, 48 | position = as.integer(middle_part), 49 | change_to = last_letter) 50 | ) 51 | } 52 | 53 | 54 | # find_gene_variant_by_name 55 | # 56 | # check if a gene_variant is already in the database, searching 57 | # only by gene name and variant name 58 | # 59 | # if it is present, return details from the database= 60 | # otherwise, return an empty table 61 | # 62 | find_gene_variant_by_name <- function(con, gene_name, variant_name) { 63 | 64 | query <- glue::glue(" 65 | SELECT G.name AS gene_name, 66 | V.name AS variant_name, 67 | GV.pop_freq, GV.comment, 68 | G.gene_id, V.variant_id, GV.gene_variant_id 69 | FROM gene_variant GV 70 | JOIN gene G ON GV.gene_id = G.gene_id 71 | JOIN variant V ON GV.variant_id = V.variant_id 72 | WHERE G.name = '{gene_name}' AND V.name = '{variant_name}' 73 | ") 74 | return(DBI::dbGetQuery(con, query)) 75 | } 76 | 77 | 78 | # find_gene_variant_by_name 79 | # 80 | # check if a gene_variant is already in the database, searching 81 | # only by gene_id and variant_id 82 | # 83 | # if it is present, return details from the database= 84 | # otherwise, return an empty table 85 | # 86 | find_gene_variant <- function(con, gene_id, variant_id) { 87 | 88 | query <- glue::glue(" 89 | SELECT G.name AS gene_name, 90 | V.name AS variant_name, 91 | GV.pop_freq, GV.comment, 92 | G.gene_id, V.variant_id, GV.gene_variant_id 93 | FROM gene_variant GV 94 | JOIN gene G ON GV.gene_id = G.gene_id 95 | JOIN variant V ON GV.variant_id = V.variant_id 96 | WHERE G.name = '{gene_id}' AND V.name = '{variant_id}' 97 | ") 98 | return(DBI::dbGetQuery(con, query)) 99 | } 100 | -------------------------------------------------------------------------------- /R/examples/bioinformatics/start_here.R: -------------------------------------------------------------------------------- 1 | # ----------------------- 2 | # 3 | # Author: Daniel Fryer 4 | # Date: 2023-02-23 5 | # 6 | # This script creates two sqlite databases: 7 | # 8 | # 1. variants 9 | # 2. variants_dodgy 10 | # 11 | # Database 1 (variants) contains 4 tables: 12 | # GENE, VARIANT, COMBINATION and PATIENT. 13 | # 14 | # Database 2 (variants_dodgy) contains 1 table: 15 | # PATIENT_VARIANT 16 | # 17 | # In database 1, we use primary/foreign key pairs 18 | # to specify relationships between tables. 19 | # 20 | # In database 2, we avoid primary/foreign keys, 21 | # and just insert everything into one table. 22 | # 23 | # The same dummy data is used in both databases. 24 | # 25 | # Database 1 (variants) is then 26 | # compared to database 2 (variants_dodgy). 27 | # 28 | # The comparison highlights why we prefer 29 | # database 1 over database 2. 30 | # 31 | # CAUTION: 32 | # A lot of the data for this exercise was generated by ChatGPT. 33 | # It is NOT a real dataset. 34 | # 35 | # ----------------------- 36 | 37 | # See RSQLite tutorial here: 38 | # https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html 39 | 40 | install.packages("RSQLite") 41 | library(RSQLite) 42 | 43 | # We will also use these libraries (some are used by functions in functions.R) 44 | library(readr) 45 | library(glue) 46 | library(tibble) 47 | library(dplyr) 48 | library(magrittr) 49 | 50 | # We will use these helper functions too 51 | source("functions.R") 52 | 53 | # ------------------------------------------------------------------------- 54 | 55 | # create an empty database and connect to it 56 | con <- DBI::dbConnect(RSQLite::SQLite(), "data/variants.sqlite") 57 | 58 | # execute the script create_variants_database.sql 59 | # NOTE: if the database already exists, this will produce an error. 60 | # you may want to delete data/variants.sqlite first. 61 | execute_sql_script(con, "create_variants_database.sql") 62 | 63 | # list the table name 64 | DBI::dbListTables(con) 65 | 66 | # take a look at the Gene table for example 67 | gene <- dplyr::tbl(con, "Gene") 68 | gene <- gene %>% dplyr::collect() 69 | 70 | # This joins all the tables in the variants database 71 | query <- ' 72 | SELECT P.patient_id, 73 | G.name AS gene_name, V.name AS variant_name, 74 | GV.comment, 75 | PGV.vaf, GV.pop_freq, 76 | P.ethnicity, P.cancer_type 77 | FROM Patient_Gene_variant PGV 78 | RIGHT JOIN Gene_variant GV ON PGV.gene_variant_id = GV.gene_variant_id 79 | RIGHT JOIN Gene G ON GV.gene_id = G.gene_id 80 | RIGHT JOIN Variant V ON GV.variant_id = V.variant_id 81 | RIGHT JOIN Patient P ON PGV.patient_id = P.patient_id 82 | ' 83 | one_table <- DBI::dbGetQuery(con, query) 84 | 85 | # disconnect from the variants database 86 | DBI::dbDisconnect(con) 87 | 88 | # create an empty variants_dodgy database 89 | con <- DBI::dbConnect(RSQLite::SQLite(), "data/variants_dodgy.sqlite") 90 | 91 | # add table to the database 92 | DBI::dbWriteTable(con, "patient_variants", one_table) 93 | 94 | # disconnect from the variants_dodgy database 95 | DBI::dbDisconnect(con) 96 | 97 | # write the patient_variants table as a csv 98 | write.csv(one_table, "data/patient_variants.csv") 99 | 100 | # Example ----------------------------------------------------------------- 101 | 102 | con1 <- DBI::dbConnect(RSQLite::SQLite(), "data/variants_dodgy.sqlite") 103 | con2 <- DBI::dbConnect(RSQLite::SQLite(), "data/variants.sqlite") 104 | 105 | # The variants (like G13D) always have pattern . 106 | # Final all the variants starting with D. Count the number of times a patient 107 | # has had that variant. 108 | 109 | # query1 and query2 both achieve the same thing, just on different database 110 | # structures. 111 | 112 | # This one uses variants_dodgy database. It makes use of LIKE 113 | query1 <- " 114 | SELECT COUNT(*) AS num 115 | FROM patient_variants 116 | WHERE variant_name LIKE 'D%' 117 | " 118 | 119 | # # This one uses variants database. It makes use of JOIN and '=' 120 | query2 <- " 121 | SELECT COUNT(*) AS num 122 | FROM Patient_Gene_variant PGV 123 | JOIN Gene_Variant GV ON PGV.gene_variant_id = GV.gene_variant_id 124 | JOIN Variant V ON GV.variant_id = V.variant_id 125 | WHERE V.change_from = 'D' 126 | " 127 | 128 | result1 <- DBI::dbGetQuery(con1, query1) 129 | result2 <- DBI::dbGetQuery(con2, query2) 130 | 131 | DBI::dbDisconnect(con1) 132 | DBI::dbDisconnect(con2) 133 | 134 | # Auto-generated primary key (AUTOINCREMENT) ------------------------------- 135 | 136 | # The tables are set up so that they automatically 137 | # generate a primary key if we don't insert one explicitly 138 | con <- DBI::dbConnect(RSQLite::SQLite(), "data/variants.sqlite") 139 | p <- data.frame(ethnicity = "TEST") 140 | DBI::dbAppendTable(con, "Patient", p) # success! 141 | 142 | # We can retrieve the patient_id that was generated, and use it to find the patient record 143 | auto_patient_id <- DBI::dbGetQuery(con, "SELECT last_insert_rowid()")$`last_insert_rowid()` 144 | DBI::dbGetQuery(con, glue::glue( 145 | "SELECT * FROM Patient WHERE patient_id = {auto_patient_id}")) 146 | 147 | DBI::dbDisconnect(con) 148 | 149 | # the columns of the new data are: 150 | # NHI, variant_code, gene_name, vaf, pop_freq, tumour_type 151 | 152 | new_records <- read.csv("data/new_records.csv") 153 | 154 | # no ethnicity (NULL) 155 | new_patients <- data.frame(patient_id = new_records$NHI, 156 | cancer_type = new_records$tumour_type) 157 | 158 | # TODO: check if the patient is in the table 159 | # TODO: if new patient then insert row, retrieve patient_id 160 | 161 | # no variant_id 162 | new_variants <- data.frame(name = new_records$variant_code) 163 | new_variants <- cbind(new_variants, split_variant_name(new_variants$name)) 164 | 165 | # TODO: check if the variant is in the table 166 | # TODO: if new variant then insert row, retrieve the generated variant_id 167 | 168 | # no gene_id 169 | new_genes <- data.frame(name = new_records$gene_name) 170 | 171 | # TODO: check if the gene is in the table 172 | # TODO: if new gene then insert row, retrieve the generated gene_id 173 | 174 | # no gene_variant_id, gene_id or variant_id 175 | new_gene_variants <- data.frame(pop_freq = new_records$pop_freq) 176 | 177 | # TODO: use gene_id and variant_id in find_gene_variant() to check if the gene_variant is in the table 178 | # TODO: if new gene_variant then insert row, retrieve the gene_variant_id 179 | 180 | # no gene_variant_id 181 | new_patient_gene_variants <- data.frame(patient_id = new_records$NHI, 182 | vaf = new_records$vaf) 183 | 184 | # TODO: if it's an existing patient, check if they already have that gene_variant recorded 185 | # TODO: if new patient_gene_variant then insert row 186 | 187 | # Exploring --------------------------------------------------------------- 188 | 189 | # # convert one_table to a tibble and arrange by patient_id 190 | # one_table <- one_table %>% 191 | # tibble::as_tibble() %>% 192 | # dplyr::arrange(patient_id) 193 | # 194 | # # try out the function for splitting variant names 195 | # split_variant_name(one_table$variant_name) 196 | 197 | # Invalid missense variant nomenclature examples for G13D: 198 | # '13D', 'G 13 D', '13>GD', etc 199 | # 'KRAS: c.38G>A','p.Gly13Asp' 200 | 201 | # Invalid gene nomenclature examples for KRAS: 202 | # 'C-K-RAS', 'CFC2', 'K-RAS2A', 'K-RAS2B' 203 | 204 | # vaf: 205 | # the proportion of DNA molecules in a 206 | # sample that carry a specific genetic variant. 207 | 208 | # pop_freq: 209 | # proportion of individuals in a given population 210 | # who carry a specific genetic variant. 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /R/intro-to-R.R: -------------------------------------------------------------------------------- 1 | message(" 2 | This R script aims to get you acquainted with R for the first time. 3 | 4 | Prerequisites are that you: 5 | 6 | * have R and RStudio installed 7 | * know how to execute code in R 8 | * know what a script is, and how to execute chunks of code from a script 9 | 10 | If you are a student in my course, feel free to contact me any time for help! 11 | 12 | Each comment below explains what the code underneath it is doing. 13 | 14 | Read each comment, and then execute the code below the comment (in RStudio). 15 | 16 | ") 17 | 18 | # Step 1: install the Swirl R package 19 | install.packages("swirl") 20 | 21 | # Step 2: load the swirl library 22 | library(swirl) 23 | 24 | # Step 3: run the swirl tutorial 25 | swirl() 26 | 27 | # Done! Now, move your cursor down into the console (bottom left of RStudio) 28 | # and follow the instructions in the console (given by the swirl tutorial). 29 | 30 | # The instructions in the console will let you choose which 'courses' to do. 31 | # Please complete as many courses as you can, but at least complete: 32 | # 1: R Programming: The basics of programming in R. 33 | 34 | # Have fun. -------------------------------------------------------------------------------- /R/sqlite-R/MyDB.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/sqlite-R/MyDB.sqlite -------------------------------------------------------------------------------- /R/sqlite-R/README.md: -------------------------------------------------------------------------------- 1 | # Using SQLite in R 2 | 3 | This folder contains an R script and practice database to guide you through using SQLite in R. 4 | 5 | To download the scripts, you can right-click (or control-click) the links below, and then click "save link as". 6 | 7 | * [Click here for the R script](https://github.com/frycast/SQL_course/raw/master/R/sqlite-R/sqlite.R) 8 | * [Click here for the Sandpit practice database file](https://github.com/frycast/SQL_course/raw/master/R/sqlite-R/Sandpit.sqlite) 9 | -------------------------------------------------------------------------------- /R/sqlite-R/Sandpit.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/R/sqlite-R/Sandpit.sqlite -------------------------------------------------------------------------------- /R/sqlite-R/sqlite.R: -------------------------------------------------------------------------------- 1 | # See RSQLite tutorial here: 2 | # https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html 3 | 4 | install.packages("RSQLite") 5 | library(RSQLite) 6 | 7 | # Connect to the Sandpit database ----------------------------------------- 8 | 9 | # If the SQLite Sandpit database is not already in your working directory, 10 | # then first download it from the link below, 11 | # and save it in your working directory. 12 | # https://github.com/frycast/SQL_course/raw/master/R/Sandpit.sqlite 13 | 14 | # connect to Sandpit 15 | con <- DBI::dbConnect(RSQLite::SQLite(), "Sandpit.sqlite") 16 | 17 | # List all the tables in Sandpit 18 | DBI::dbListTables(con) 19 | 20 | # disconnect 21 | DBI::dbDisconnect(con) 22 | 23 | # Create a new empty database -------------------------------------------- 24 | 25 | # create (or connect to) the database 26 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite") 27 | 28 | # disconnect 29 | DBI::dbDisconnect(con) 30 | 31 | # Save data to a database ----------------------------------------------- 32 | 33 | # We'll save the built-in mtcars and iris datasets to a SQLite database 34 | mtcars 35 | iris 36 | 37 | # create (or connect to) the database 38 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite") 39 | 40 | # Write data to the SQLite database 41 | DBI::dbWriteTable(con, "mtcars", mtcars) 42 | DBI::dbWriteTable(con, "iris", iris) 43 | 44 | # View the tables in our database 45 | DBI::dbListTables(con) 46 | 47 | # Connect to one of the tables 48 | mtcars_con <- dplyr::tbl(con, "mtcars") 49 | 50 | # write queries 51 | library(dbplyr) 52 | library(dplyr) 53 | 54 | # query 1 55 | mtcars_q1 <- mtcars_con %>% 56 | dplyr::select(mpg, cyl, disp) 57 | 58 | # query 2 59 | mtcars_q2 <- mtcars_con %>% 60 | dplyr::filter(gear == 4) %>% 61 | dplyr::select(mpg, cyl) 62 | 63 | # query 3 64 | mtcars_q3 <- mtcars_con %>% 65 | dplyr::filter(qsec > 16) %>% 66 | dplyr::group_by(gear) %>% 67 | dplyr::summarise( 68 | mpg_max = max(mpg, na.rm=T), 69 | cyl_mean = mean(cyl, na.rm=T)) 70 | 71 | # query 4 72 | mtcars_q4 <- mtcars_con %>% 73 | dplyr::group_by(gear) %>% 74 | dplyr::filter(qsec > 16) %>% 75 | dplyr::summarise( 76 | mpg_max = max(mpg, na.rm=T), 77 | cyl_mean = mean(cyl, na.rm=T)) %>% 78 | dplyr::filter(cyl_mean <= 5) 79 | 80 | # look at the SQL code that dplyr generated 81 | mtcars_q1 %>% show_query() 82 | mtcars_q2 %>% show_query() 83 | mtcars_q3 %>% show_query() 84 | mtcars_q4 %>% show_query() # this one uses a subquery to avoid using HAVING. 85 | 86 | # execute the sql code and retrieve the full results 87 | mtcars_q1 %>% collect() 88 | mtcars_q2 %>% collect() 89 | mtcars_q3 %>% collect() 90 | mtcars_q4 %>% collect() 91 | 92 | # Or we manually write SQL instead! 93 | result <- DBI::dbGetQuery( 94 | con, " 95 | SELECT gear, MAX(mpg) AS mpg_max, AVG(cyl) AS cyl_mean 96 | FROM mtcars 97 | WHERE qsec > 16 98 | GROUP BY gear 99 | HAVING cyl_mean <= 5 100 | ") 101 | 102 | # view the result 103 | result 104 | 105 | # A neat trick is to use string interpolation to edit the queries 106 | # with variables from your R environment: 107 | # See https://stringr.tidyverse.org/reference/str_interp.html 108 | for (next_cyl in c(4,6,8)) { 109 | res <- DBI::dbGetQuery(con, stringr::str_interp(" 110 | SELECT * 111 | FROM mtcars 112 | WHERE cyl = ${next_cyl} 113 | ")) 114 | cat("\nResults for cyl = ", next_cyl, "\n") 115 | print(res) 116 | } 117 | 118 | # disconnect 119 | DBI::dbDisconnect(con) 120 | 121 | # Side note: 122 | # You may want to browse the database we created, outside of R. 123 | # Assuming the database file ("MyDB.sqlite") is saved on your computer 124 | # you can browse it with DB Browser (a great open-source app). 125 | # Download DB Browser here: 126 | # https://sqlitebrowser.org/dl/ 127 | 128 | # After installing DB browser, you may have to choose "open with" 129 | # on the "MyDB.sqlite" file, and then navigate to your programs/apps directory 130 | # to find DB Browser and open it. 131 | 132 | # Batched queries --------------------------------------------------------- 133 | 134 | # create (or connect to) the database 135 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite") 136 | 137 | # Send a query but don't retrieve the results yet 138 | rs <- DBI::dbSendQuery(con, " 139 | SELECT * 140 | FROM mtcars 141 | ") 142 | rs 143 | 144 | # Retrieve the first 3 results 145 | df <- DBI::dbFetch(rs, n = 3) 146 | 147 | # df is a regular data.frame 148 | df 149 | mean(df$mpg) 150 | 151 | # We can see how many results have been retrieved 152 | rs 153 | 154 | # This will return TRUE if all results have been retrieved 155 | DBI::dbHasCompleted(rs) 156 | 157 | # Retrieve the next 3 results 158 | df <- DBI::dbFetch(rs, n = 3) 159 | df 160 | 161 | # Retrieve the rest of the results, 3 at a time 162 | while (!DBI::dbHasCompleted(rs)) { 163 | df <- DBI::dbFetch(rs, n = 3) 164 | 165 | # << insert processing on df here >> 166 | 167 | print(nrow(df)) 168 | } 169 | 170 | # Clear/delete the query 171 | DBI::dbClearResult(rs) 172 | 173 | # disconnect 174 | DBI::dbDisconnect(con) 175 | 176 | # Saving CSV files to a database ----------------------------------------- 177 | 178 | # create (or connect to) the database 179 | con <- DBI::dbConnect(RSQLite::SQLite(), "MyDB.sqlite") 180 | 181 | # First let's make some CSVs to practice with (in the working directory) 182 | write.csv(mtcars, "mtcars_example.csv") 183 | write.csv(iris, "iris_example.csv") 184 | 185 | # Find all the filenames in the working directory 186 | all_filenames <- list.files(".") 187 | 188 | # Find all the filenames that end in ".csv" 189 | csv_filenames <- all_filenames %>% stringr::str_subset(".csv") 190 | 191 | # Loop over the CSVs filenames 192 | for (f in csv_filenames) { 193 | 194 | # Read in the CSV 195 | data <- read.csv(f) 196 | 197 | # Remove ".csv" from the name 198 | table_name <- f %>% stringr::str_remove(".csv") 199 | 200 | # Write the table to our database 201 | RSQLite::dbWriteTable(con, name = table_name, value = data) 202 | } 203 | 204 | # List all the tables in our database 205 | DBI::dbListTables(con) 206 | 207 | # disconnect 208 | DBI::dbDisconnect(con) 209 | -------------------------------------------------------------------------------- /R/tidy-data.R: -------------------------------------------------------------------------------- 1 | # TODO: Check out https://tidydatatutor.com/ 2 | 3 | # To get started, we need the tidyr package 4 | install.packages("tidyr") 5 | library(tidyr) 6 | 7 | # Relig_income data ------------------------------------------------------- 8 | 9 | # The package comes with its own datasets to practice on, for example: 10 | relig_income 11 | 12 | # Pivoting the table will make it 'tidy' 13 | pivot_longer( 14 | data=relig_income, 15 | cols=!religion, 16 | names_to = "income", 17 | values_to = "count" 18 | ) 19 | 20 | # Non-atomic cell values -------------------------------------------------- 21 | 22 | # Create the address data 23 | address_data <- data.frame( 24 | name = c("Raleigh Smith", 25 | "Hoa Pham", 26 | "Ram Singh"), 27 | address = c("109 Tenterfield Rd, North Sydney, NSW, 2060", 28 | "36 Feather St, Kings Beach, QLD, 4551", 29 | "90 Sunnyside Rd, Renmark, SA, 5341") 30 | ) 31 | 32 | # Separate the address column 33 | separate( 34 | data = address_data, 35 | col = address, 36 | into = c("street","city","state","postcode"), 37 | sep = ", ") 38 | 39 | # Anscombe's Quartet data ------------------------------------------------- 40 | 41 | anscombe_tidy <- pivot_longer( 42 | data = anscombe, 43 | cols = everything(), 44 | names_to = c(".value", "set"), 45 | names_pattern = "(.)(.)" 46 | ) 47 | 48 | anscombe_tidy 49 | 50 | ## Figuring out the rest of this example is an advanced exercise 51 | ## (not usually suitable for new R programmers). 52 | 53 | # Plot the data with lines of best fit 54 | library(ggplot2) 55 | anscombe_tidy %>% 56 | ggplot(aes(x = x, y = y)) + 57 | geom_point() + 58 | facet_wrap(~set) + 59 | geom_smooth(method = "lm", se = FALSE) + 60 | theme_minimal() 61 | 62 | # apply linear regression to each model and obtain results for comparison 63 | library(dplyr) 64 | library(map) 65 | library(broom) 66 | library(purrr) 67 | anscombe_long %>% 68 | nest(data = !set) %>% 69 | mutate( 70 | model = map(data, ~lm(y~x, data = .)), 71 | tidied = map(model, tidy), 72 | data = map(model, augment)) %>% 73 | unnest(tidied) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction to SQL 2 | 3 | Textbook and course material, for the Introduction to SQL course, taught by [Daniel Fryer](https://danielvfryer.com). 4 | 5 | This repository includes slides for the full 4-day course. 6 | 7 | There is also a 2-day course. If you are attending the 2-day course, we won't cover all of the 4-day material, but you still get access to all of it. 8 | 9 | ## Set up guides 10 | 11 | For this course, you need to choose between T-SQL and MySQL. If you don't know which one you want, then consider this: if you are on Windows, T-SQL is the easiest to set up and use. If you are on macOS, then MySQL is the easiest to set up and use. Please do not leave the set up process to the last minute. Leave some time to iron out difficulties getting set up! Feel free to contact me if you need help. The guides below will help you get set up. 12 | 13 | In my experience, it is best to do this on a personal computer, since some people have experienced difficulty getting set up on a work computer (e.g., if you don't have administrator rights). 14 | 15 | * **Set up your database with T-SQL (easiest for Windows):** [click here](create-database/T-SQL/README.md) 16 | * **Set up your database with MySQL (easiest for macOS):** [click here](create-database/MySQL/README.md) 17 | 18 | ## Textbook and slides 19 | 20 | The textbook and slides are linked below. 21 | 22 | * [Course text (The Necessary SQL, featuring MySQL and T-SQL)](textbook-and-slides/SQL_Course_Textbook.pdf) 23 | * [Day 1 slides](textbook-and-slides/SQL_Course_Slides_Day_1.pdf) 24 | * [Day 2 slides](textbook-and-slides/SQL_Course_Slides_Day_2.pdf) 25 | * [Day 3 slides](textbook-and-slides/SQL_Course_Slides_Day_3.pdf) 26 | * Day 4 slides: 27 | - [Day 4 non-interactive pdf version](textbook-and-slides/SQL_Course_Slides_Day_4.pdf) 28 | - [Day 4 interactive web version](https://rpubs.com/frycast/sql-with-r) 29 | 30 | 31 | ## Beginner R tutorial 32 | 33 | R is covered in 4-day course, though we may have time for some R material during the 2-day course, depending on the audience. For beginner R programmers, day 4 may move a bit fast. This tutorial (by [SwirlStats](https://swirlstats.com/students.html)) will help you get prepared (or brush up). Right-click (or control-click) the link below, and choose 'save link as'. This will download the R script. Once downloaded, open the script in RStudio. 34 | 35 | * [Beginner R tutorial (R script)](https://github.com/frycast/SQL_course/raw/master/R/intro-to-R.R) 36 | 37 | If this is the first time you've used RStudio, I've created a video for you to guide you through using the above script. 38 | 39 | * [Brief intro to RStudio (video)](https://youtu.be/rdcVS7CrWPw) 40 | 41 | To be fully prepared for day 4, I suggest you complete all lessons from Lesson 1 (Basic Building Blocks), to Lesson 9 (Functions), of the Intro to R course, within the Swirl tutorial. You will see these lessons when you start the Swirl tutorial (also explained in the video linked above). I suspect this will take about 1 hour. 42 | 43 | ## IDI Stuff 44 | 45 | If you are working with the New Zealand Integrated Data Infrastructure (4-day course only), you may be interested in the links below. 46 | 47 | * [Day 3 slides include a section on the IDI](textbook-and-slides/SQL_Course_Slides_Day_2.pdf) 48 | * [The 'IDI Stuff' folder includes some useful papers and two data dictionaries](IDI-stuff) 49 | * [Many more data dictionaries on StatsNZ DataInfo+ website](http://datainfoplus.stats.govt.nz/Search?query=idi&search=Search&itemType=4bd6eef6-99df-40e6-9b11-5b8f64e5cb23) 50 | * [A searchable list of variables available in the IDI](https://idi-search.web.app/claims/acc_cla_accident_in_NZ_ind) 51 | 52 | -------------------------------------------------------------------------------- /create-database/MySQL/MySQL-database.sql: -------------------------------------------------------------------------------- 1 | SET SQL_REQUIRE_PRIMARY_KEY = OFF; 2 | 3 | -- ----------------------------------------------------------------- 4 | 5 | -- Practice data for the Intro to SQL Course by Daniel Fryer. 6 | 7 | -- ----------------------------------------------------------------- 8 | 9 | -- ----------------------------------------------------------------- 10 | -- CREATE IDI_CLEAN DATABASE -------------------------------------- 11 | -- ----------------------------------------------------------------- 12 | 13 | DROP DATABASE IF EXISTS IDI_Clean; 14 | CREATE DATABASE IDI_Clean; 15 | 16 | USE IDI_Clean; 17 | 18 | CREATE TABLE DIA_Clean_births ( 19 | snz_uid int not null UNIQUE, 20 | snz_dia_uid int not null UNIQUE, 21 | parent1_snz_uid int null, 22 | parent1_snz_dia_uid int null, 23 | dia_bir_parent1_sex_snz_code varchar(100) null, 24 | dia_bir_parent1_occupation_text varchar(60) null, 25 | parent2_snz_uid int null, 26 | parent2_snz_dia_uid int null, 27 | dia_bir_parent2_sex_snz_code varchar(100) null, 28 | dia_bir_parent2_occupation_text varchar(60) null, 29 | dia_bir_birth_month_nbr tinyint null, 30 | dia_bir_birth_year_nbr smallint null, 31 | dia_bir_sex_snz_code varchar(100) null, 32 | dia_bir_ethnic_grp1_snz_ind bit not null, -- European 33 | dia_bir_ethnic_grp2_snz_ind bit not null, -- Maori 34 | dia_bir_ethnic_grp3_snz_ind bit not null, -- Pacific 35 | dia_bir_ethnic_grp4_snz_ind bit not null, -- Asian 36 | dia_bir_ethnic_grp5_snz_ind bit not null, -- MELAA 37 | dia_bir_ethnic_grp6_snz_ind bit not null); -- Other 38 | 39 | CREATE TABLE DIA_Clean_deaths ( 40 | snz_uid int not null UNIQUE, 41 | snz_dia_uid int not null UNIQUE, 42 | dia_dth_death_month_nbr tinyint null, 43 | dia_dth_death_year_nbr smallint null, 44 | dia_dth_last_seen_month_nbr tinyint null, 45 | dia_dth_last_seen_year_nbr smallint null); 46 | 47 | CREATE TABLE DIA_Clean_marriages ( 48 | partnr1_snz_uid int not null UNIQUE, 49 | partnr1_snz_dia_uid int not null UNIQUE, 50 | partnr2_snz_uid int not null UNIQUE, 51 | partnr2_snz_dia_uid int not null UNIQUE, 52 | dia_mar_partnr1_birth_month_nbr tinyint null, 53 | dia_mar_partnr1_birth_year_nbr smallint null, 54 | dia_mar_partnr1_sex_snz_code varchar(100) null, 55 | dia_mar_partnr1_occupation_text varchar(60) null, 56 | dia_mar_partnr2_birth_month_nbr tinyint null, 57 | dia_mar_partnr2_birth_year_nbr smallint null, 58 | dia_mar_partnr2_sex_snz_code varchar(100) null, 59 | dia_mar_partnr2_occupation_text varchar(60) null); 60 | 61 | CREATE TABLE DIA_Clean_civil_unions ( 62 | partnr1_snz_uid int not null UNIQUE, 63 | partnr1_snz_dia_uid int not null UNIQUE, 64 | partnr2_snz_uid int not null UNIQUE, 65 | partnr2_snz_dia_uid int not null UNIQUE, 66 | dia_civ_partnr1_birth_month_nbr tinyint null, 67 | dia_civ_partnr1_birth_year_nbr smallint null, 68 | dia_civ_partnr1_sex_snz_code varchar(100) null, 69 | dia_civ_partnr1_occupation_text varchar(60) null, 70 | dia_civ_partnr2_birth_month_nbr tinyint null, 71 | dia_civ_partnr2_birth_year_nbr smallint null, 72 | dia_civ_partnr2_sex_snz_code varchar(100) null, 73 | dia_civ_partnr2_occupation_text varchar(60) null); 74 | 75 | -- Linkage process information. 76 | -- One row for each snz_uid. 77 | -- Includes links between individual identifiers. 78 | -- Most values are NULL. 79 | CREATE TABLE security_concordance ( 80 | snz_uid int not null UNIQUE, 81 | snz_dia_uid int, 82 | snz_ird_uid int, 83 | snz_moh_uid int, 84 | snz_cen_uid int, 85 | snz_acc_uid int, 86 | snz_dol_uid int, 87 | snz_spine_uid int); 88 | 89 | INSERT INTO security_concordance VALUES 90 | (10 ,34 , 0 , NULL, NULL ,29 ,NULL,100 ), 91 | (2 ,55 , 1 , NULL, NULL ,23 ,NULL,143 ), 92 | (1 ,32 , 2 , NULL, NULL ,22 ,NULL,412 ), 93 | (4 ,1 , 3 , NULL, NULL ,21 ,NULL,563 ), 94 | (7 ,67 , 4 , NULL, NULL ,26 ,NULL,213 ), 95 | (9 ,NULL , 5 , NULL, NULL ,25 ,NULL,553 ), 96 | (5 ,32 , 6 , NULL, NULL ,24 ,NULL,153 ), 97 | (8 ,43 , 7 , NULL, NULL ,28 ,NULL,562 ), 98 | (6 ,23 , 8 , NULL, NULL ,27 ,NULL,643 ), 99 | (3 ,123 , 9 , NULL, NULL ,20 ,NULL,142 ), 100 | (11 ,NULL , 7 , NULL, NULL ,NULL,NULL,154 ), 101 | (12 ,65 , 5 , NULL, NULL ,NULL,NULL,853 ), 102 | (13 ,NULL , 10 , NULL, NULL ,6 ,NULL,128 ), 103 | (14 ,NULL , 12 , NULL, NULL ,1 ,NULL,732 ), 104 | (15 ,NULL , 43 , NULL, NULL ,NULL,NULL,129 ), 105 | (16 ,765 , 44 , NULL, NULL ,NULL,NULL,923 ), 106 | (17 ,NULL , 34 , NULL, NULL ,7 ,NULL,132 ), 107 | (18 ,76 , 100 , NULL, NULL ,NULL,NULL,731 ), 108 | (19 ,NULL , 101 , NULL, 32 ,3 ,NULL,766 ), 109 | (20 ,NULL , NULL , 1 , NULL ,31 ,NULL,NULL), 110 | (21 ,NULL , NULL , 2 , NULL ,32 ,NULL,NULL), 111 | (22 ,NULL , NULL , 3 , NULL ,33 ,NULL,NULL), 112 | (23 ,NULL , NULL , 4 , NULL ,34 ,NULL,NULL), 113 | (24 ,NULL , NULL , 5 , NULL ,35 ,NULL,NULL), 114 | (25 ,NULL , 15 , NULL, NULL ,36 ,NULL,NULL), 115 | (26 ,NULL , 16 , NULL, NULL ,37 ,NULL,NULL), 116 | (27 ,NULL , 17 , NULL, NULL ,38 ,NULL,NULL), 117 | (28 ,NULL , 18 , NULL, NULL ,39 ,NULL,NULL), 118 | (29 ,NULL , 19 , NULL, NULL ,41 ,NULL,NULL), 119 | (30 ,NULL , 20 , NULL, NULL ,42 ,NULL,NULL), 120 | (31 ,NULL , 21 , NULL, NULL ,43 ,NULL,NULL), 121 | (32 ,NULL , 22 , NULL, NULL ,44 ,NULL,NULL), 122 | (33 ,NULL , 23 , NULL, NULL ,NULL,1 ,NULL), 123 | (34 ,NULL , 42 , NULL, NULL ,NULL,2 ,NULL), 124 | (36 ,NULL , 65 , NULL, NULL ,NULL,3 ,NULL), 125 | (37 ,NULL , 54 , NULL, NULL ,NULL,NULL,NULL), 126 | (38 ,NULL , 32 , NULL, NULL ,NULL,NULL,NULL); 127 | 128 | -- Contains start and end of overseas spells. 129 | -- Researchers may wish to exclude people who 130 | -- were overseas. 131 | CREATE TABLE data_person_overseas_spell ( 132 | snz_uid int not null UNIQUE, 133 | pos_applied_date date null, 134 | pos_ceased_date date null); 135 | 136 | -- One row for each snz_uid. 137 | -- The column with 0 or 1 indicates 138 | -- if an individual is in the spine. 139 | CREATE TABLE data_personal_detail ( 140 | snz_uid int not null UNIQUE, 141 | snz_in_spine int not null 142 | ); 143 | 144 | -- In this table there should be 145 | -- one row for each unique combination 146 | -- of snz_uid and year. 147 | -- A subset of the people in the spine. 148 | CREATE TABLE data_snz_res_pop ( 149 | snz_uid int not null UNIQUE, 150 | year int not null 151 | ); 152 | 153 | -- I CREATED THESE EMPTY TABLES SO THE TABLE 154 | -- NAMES WOULD MATCH SOME OF THOSE IN THE 155 | -- DATA SCHEMA 156 | CREATE TABLE data_source_ranked_ethnicity ( 157 | snz_uid int not null UNIQUE, 158 | dummy1 int, 159 | dummy2 int, 160 | dummy3 int); 161 | 162 | CREATE TABLE data_income_tax_yr_summary ( 163 | snz_uid int not null UNIQUE, 164 | dummy1 int, 165 | dummy2 int, 166 | dummy3 int); 167 | 168 | CREATE TABLE data_income_cal_yr_summary ( 169 | snz_uid int not null UNIQUE, 170 | dummy1 int, 171 | dummy2 int, 172 | dummy3 int); 173 | 174 | CREATE TABLE data_address_notification ( 175 | snz_uid int not null UNIQUE, 176 | dummy1 int, 177 | dummy2 int, 178 | dummy3 int); 179 | 180 | CREATE TABLE ACC_Clean_Medical_Codes ( 181 | snz_acc_claim_uid int not null, 182 | acc_med_injury_count_nbr smallint, 183 | acc_med_read_code varchar(6), 184 | acc_med_read_code_text varchar(255), 185 | acc_med_injury_precedence_nbr int not null 186 | ); 187 | 188 | INSERT INTO ACC_Clean_Medical_Codes VALUES 189 | (1 ,2, 'a', 'broken leg' ,1 ), 190 | (2 ,3, 'b', 'popped out eyeball',2 ), 191 | (3 ,1, 'a', 'broken leg' ,1 ), 192 | (4 ,2, 'a', 'broken leg' ,1 ), 193 | (5 ,3, 'b', 'popped out eyeball',1 ), 194 | (6 ,4, 'b', 'popped out eyeball',2 ), 195 | (7 ,2, 'a', 'broken leg' ,2 ), 196 | (8 ,2, 'c', 'exploded lung' ,3 ), 197 | (9 ,3, 'c', 'exploded lung' ,3 ), 198 | (10,3, 'c', 'exploded lung' ,3 ); 199 | 200 | CREATE TABLE ACC_Clean_Serious_Injury ( 201 | snz_uid int not null, 202 | snz_acc_uid int not null, 203 | snz_employee_ird_uid int null, 204 | snz_employer_ird_uid int null, 205 | acc_cla_accident_date date null, 206 | acc_cla_claim_costs_to_date_ex_gst_amt decimal(13,2), 207 | acc_cla_ethnic_grp1_snz_uid bit not null, 208 | acc_cla_ethnic_grp2_snz_uid bit not null, 209 | acc_cla_ethnic_grp3_snz_uid bit not null, 210 | acc_cla_ethnic_grp4_snz_uid bit not null, 211 | acc_cla_ethnic_grp5_snz_uid bit not null, 212 | acc_cla_ethnic_grp6_snz_uid bit not null, 213 | snz_acc_claim_uid int not null, 214 | acc_cla_meshblock_code varchar(7) null 215 | ); 216 | 217 | INSERT INTO ACC_Clean_Serious_Injury VALUES 218 | (10, 29 ,0 ,42 , '20160901', 15000 ,1 ,0 ,0 ,1 ,0 ,1 ,1 , 'MZ321' ), 219 | (2 , 23 ,1 ,32 , '20160912', 12000 ,1 ,0 ,0 ,0 ,0 ,0 ,2 , 'KL653' ), 220 | (1 , 22 ,2 ,32 , '20160913', 130000 ,0 ,1 ,0 ,0 ,0 ,0 ,3 , 'DF24' ), 221 | (4 , 21 ,3 ,54 , '20160923', 132000 ,0 ,1 ,0 ,0 ,0 ,0 ,4 , 'EW321' ), 222 | (7 , 26 ,4 ,65 , '20160902', 23000 ,0 ,1 ,0 ,0 ,0 ,0 ,5 , 'EW321' ), 223 | (9 , 25 ,5 ,65 , '20160921', 32000 ,0 ,0 ,0 ,1 ,0 ,0 ,6 , 'KL432' ), 224 | (5 , 24 ,6 ,23 , '20160918', 500 ,1 ,0 ,0 ,0 ,0 ,0 ,7 , 'EW234' ), 225 | (8 , 28 ,7 ,42 , '20160916', 120 ,0 ,0 ,0 ,0 ,1 ,0 ,8 , 'FD432' ), 226 | (6 , 27 ,8 ,42 , '20160918', 130 ,0 ,1 ,0 ,0 ,0 ,0 ,9 , 'HFD432'), 227 | (3 , 20 ,9 ,42 , '20160919', 45000 ,1 ,1 ,0 ,0 ,0 ,0 ,10 , 'FGV432'), 228 | (20, 31 ,NULL,42 , '20170601', 20000 ,1 ,0 ,0 ,0 ,0 ,0 ,11 , 'EW321' ), 229 | (21, 32 ,NULL,23 , '20170602', 20000 ,1 ,0 ,0 ,0 ,0 ,0 ,12 , 'EW321' ), 230 | (22, 33 ,NULL,65 , '20170603', 20000 ,1 ,0 ,0 ,0 ,0 ,0 ,13 , 'EW234' ), 231 | (23, 34 ,NULL,65 , '20170604', 20000 ,0 ,1 ,0 ,0 ,0 ,0 ,14 , 'EW234' ), 232 | (24, 35 ,NULL,54 , '20170605', 20000 ,0 ,1 ,0 ,0 ,0 ,0 ,15 , 'FD432' ), 233 | (25, 36 ,15 ,32 , '20171206', 20000 ,0 ,1 ,0 ,0 ,0 ,0 ,16 , 'FD432' ), 234 | (26, 37 ,16 ,32 , '20170207', 30000 ,0 ,1 ,0 ,0 ,0 ,0 ,17 , 'FD432' ), 235 | (27, 38 ,17 ,42 , '20170608', 30000 ,0 ,1 ,0 ,0 ,0 ,0 ,18 , 'FD432' ), 236 | (28, 39 ,18 ,65 , '20180309', 30000 ,0 ,0 ,1 ,0 ,0 ,0 ,19 , 'FD432' ), 237 | (29, 41 ,19 ,54 , '20181110', 30000 ,0 ,0 ,1 ,0 ,0 ,0 ,20 , 'FD432' ), 238 | (30, 42 ,20 ,32 , '20180711', 45000 ,0 ,0 ,1 ,0 ,0 ,0 ,21 , 'DF24' ), 239 | (31, 43 ,21 ,32 , '20180612', 45000 ,0 ,0 ,0 ,1 ,0 ,0 ,22 , 'DF24' ), 240 | (32, 44 ,22 ,42 , '20180513', 45000 ,0 ,0 ,0 ,1 ,0 ,0 ,23 , 'MZ321' ), 241 | (33, 45 ,23 ,32 , '20180614', 45000 ,0 ,0 ,0 ,0 ,1 ,0 ,24 , 'MZ321' ); 242 | 243 | -- INSERT INTO DIA_Clean_civil_unions VALUES (10, 34, 6 , 23 , 1, 1975, 1, NULL, 1, 1976, 1, NULL); 244 | -- INSERT INTO DIA_Clean_civil_unions VALUES (2, 55, 3 , 123 , 2, 1966, 0, NULL, 6, 1969, 1, NULL); 245 | -- INSERT INTO DIA_Clean_civil_unions VALUES (1, 32, 12, 65 , 5, 1977, 0, NULL, 4, 1973, 1, NULL); 246 | -- INSERT INTO DIA_Clean_civil_unions VALUES (4, 1, 16, 765 , 5, 1988, 1, NULL, 4, 1989, 0, NULL); 247 | -- INSERT INTO DIA_Clean_civil_unions VALUES (7, 67, 18, 76 , 9, 1999, 0, NULL, 12, 1995, 0, NULL); 248 | 249 | -- --------------------------------------------------------------- 250 | -- CREATE IDI_Metadata DATABASE --------------------------------- 251 | -- --------------------------------------------------------------- 252 | DROP DATABASE IF EXISTS IDI_Metadata; 253 | CREATE DATABASE IDI_Metadata; 254 | 255 | USE IDI_Metadata; 256 | 257 | CREATE TABLE clean_read_CLASSIFICATIONS_acc_ethnicity_code ( 258 | ethnic_grp int not null, -- a number from 1 to 6 259 | description varchar(100) not null 260 | ); 261 | 262 | CREATE TABLE clean_read_CLASSIFICATIONS_post_codes ( 263 | post_code int not null, 264 | description varchar(100) 265 | ); 266 | 267 | INSERT INTO clean_read_CLASSIFICATIONS_acc_ethnicity_code 268 | (ethnic_grp, description) 269 | VALUES 270 | (1, 'European'), 271 | (2, 'Maori'), 272 | (3, 'Pacific Peoples'), 273 | (4, 'Asian'), 274 | (5, 'Middle Eastern/Latin American/African'), 275 | (6, 'Other ethnicity'); 276 | 277 | CREATE TABLE clean_read_CLASSIFICATIONS_cor_ethnicity_code ( 278 | ethnic_grp int not null, -- a number from 1 to 6 279 | description varchar(100) not null 280 | ); 281 | 282 | INSERT INTO clean_read_CLASSIFICATIONS_cor_ethnicity_code 283 | (ethnic_grp, description) 284 | VALUES 285 | (2, 'European'), 286 | (1, 'Maori/Pacific Peoples'), 287 | (3, 'Asian'), 288 | (4, 'Other ethnicity'); 289 | 290 | -- --------------------------------------------------------------- 291 | -- CREATE OLDER IDI_Clean REFRESH DATABASE ---------------------- 292 | -- --------------------------------------------------------------- 293 | 294 | -- INSERT INTO security_concordance ( 295 | -- snz_uid , 296 | -- snz_dia_uid , 297 | -- snz_ird_uid , 298 | -- snz_moh_uid , 299 | -- snz_cen_uid , 300 | -- snz_acc_uid , 301 | -- snz_dol_uid , 302 | -- snz_in_spine) 303 | -- VALUES 304 | -- (13, 34 , NULL , NULL, NULL ,NULL,NULL, 1), 305 | -- (21 ,55 , NULL , NULL, NULL ,2 ,NULL,1), 306 | -- (34 ,32 , NULL , NULL, NULL ,NULL,NULL,1), 307 | -- (24 ,1 , NULL , NULL, NULL ,4 ,NULL,1), 308 | -- (55 ,67 , NULL , NULL, NULL ,5 ,NULL,1), 309 | -- (7 ,NULL , 3 , NULL, NULL ,NULL,NULL,0), 310 | -- (17 ,32 , 1 , NULL, NULL ,NULL,NULL,0), 311 | -- (28 ,43 , 2 , NULL, NULL ,8 ,NULL,1), 312 | -- (59 ,23 , 4 , NULL, NULL ,9 ,NULL,1), 313 | -- (1237,123 , 6 , NULL, NULL ,10 ,NULL,1), 314 | -- (121,NULL , 7 , NULL, NULL ,NULL,NULL,1), 315 | -- (345,65 , 5 , NULL, NULL ,NULL,NULL,0), 316 | -- (765, NULL , 10 , NULL, NULL ,6 ,NULL,1), 317 | -- (1432, NULL , 12 , NULL, NULL ,1 ,NULL,1), 318 | -- (873, NULL , 43 , NULL, NULL ,NULL,NULL,1), 319 | -- (3,765 , 44 , NULL, NULL ,NULL,NULL,0), 320 | -- (5,NULL , 34 , NULL, NULL ,7 ,NULL,1), 321 | -- (78,76 , 100 , NULL, NULL ,NULL,NULL,0), 322 | -- (1,NULL , 101 , NULL, 32 ,3 ,NULL,0); 323 | -- 324 | 325 | -- --------------------------------------------------------------- 326 | -- CREATE Sandpit DATABASE --------------------------------------- 327 | -- --------------------------------------------------------------- 328 | 329 | DROP DATABASE IF EXISTS Sandpit; 330 | CREATE DATABASE Sandpit; 331 | 332 | USE Sandpit; 333 | 334 | 335 | CREATE TABLE Ape_Colours ( 336 | ColourID int not null, 337 | ColourName varchar(20) not null, 338 | Comments varchar(100), -- I find this Colour strange etc. 339 | PRIMARY KEY (ColourID) 340 | ); 341 | 342 | CREATE TABLE Ape_Friends ( 343 | FriendID int not null, 344 | FirstName varchar(20), 345 | LastName varchar(20), 346 | FavColourID int, 347 | FOREIGN KEY (FavColourID) REFERENCES Ape_Colours (ColourID), 348 | PRIMARY KEY (FriendID) 349 | ); 350 | 351 | CREATE TABLE Ape_BananaTree ( 352 | TreeID int not null, 353 | Height decimal(5,2), 354 | YearPlanted int, 355 | MonthPlanted int, 356 | Width decimal(5,2), 357 | PRIMARY KEY (TreeID) 358 | ); 359 | 360 | CREATE TABLE Ape_EatingFrom ( 361 | FriendID int not null, 362 | TreeID int not null, 363 | FOREIGN KEY (FriendID) REFERENCES Ape_Friends (FriendID), 364 | FOREIGN KEY (TreeID) REFERENCES Ape_BananaTree (TreeID) 365 | ); 366 | 367 | CREATE TABLE Ape_Banana ( 368 | BananaID int not null, 369 | TasteRank int, -- from 1 to 5 370 | DatePicked date not null, 371 | DateEaten date not null, 372 | Ripe bit, 373 | TreeID int not null, 374 | Comments varchar(100), 375 | FOREIGN KEY (TreeID) REFERENCES Ape_BananaTree (TreeID), 376 | PRIMARY KEY (BananaID) 377 | ); 378 | 379 | 380 | INSERT INTO Ape_Colours VALUES 381 | (1 ,'blue' , 'similar to sky' ), 382 | (2 ,'green' , 'bad tasting bananas'), 383 | (3 ,'yellow' , 'my favourite because banana'), 384 | (4 ,'orange' , 'no connection to orangutan'), 385 | (5 ,'red' , 'generally a bad sign'), 386 | (6 ,'purple' , 'never seen this before'), 387 | (7 ,'brown' , 'many things are brown'), 388 | (8 ,'magenta' , 'one of the primary subtractive colours'), 389 | (9 ,'pink' , 'very manly'), 390 | (10,'lime' , 'almost green'), 391 | (11,'turquoise', 'not to be confused with tortoise'); 392 | 393 | 394 | INSERT INTO Ape_Friends VALUES 395 | (1 , 'Caesar' , 'Serkis', 3 ), 396 | (2 , 'Harambe' , 'Porter', 1 ), 397 | (3 , 'Aldo' , 'Atkins', 3 ), 398 | (4 , 'Cornelius', 'Porter', 1 ), 399 | (5 , 'Zira' , 'Porter', 4 ), 400 | (6 , 'Ishmael' , 'Serkis', 3 ), 401 | (7 , 'Monsieur' , 'Mallah', 3 ), 402 | (8 , 'Titano' , 'Atkins', 6 ), 403 | (9 , 'King' , 'Kong' , 3 ), 404 | (10, 'Bobo' , 'Kong' , 8 ), 405 | (11, 'Myster' , 'Ious' , NULL); 406 | 407 | 408 | INSERT INTO Ape_BananaTree VALUES 409 | (1, 5.5, 2018, 08, 31), 410 | (2, 4.3, 2018, 08, 27), 411 | (3, 4.7, 2018, 08, 36), 412 | (4, 3.8, 2018, 08, 20), 413 | (5, 6.2, 2018, 08, 40), 414 | (6, 6.4, 2016, 07, 23), 415 | (7, 5.4, 2016, 07, 32), 416 | (8, 5.2, 2016, 07, 31), 417 | (9, 4.8, 2016, 07, 19), 418 | (10, 4.5, 2015, 09, 28), 419 | (11, 5.8, 2015, 09, 35), 420 | (12, 7.5, 2015, 09, 45), 421 | (13, 6.4, 2015, 09, 30), 422 | (14, 5.0, 2015, 09, 24), 423 | (15, 5.1, 2014, 10, 34), 424 | (16, 4.2, 2014, 10, 23), 425 | (17, 5.4, 2014, 08, 39), 426 | (18, 5.2, 2014, 08, 28); 427 | 428 | 429 | INSERT INTO Ape_EatingFrom VALUES 430 | (3 ,1 ), 431 | (2 ,1 ), 432 | (1 ,1 ), 433 | (6 ,3 ), 434 | (10,5 ), 435 | (4 ,5 ), 436 | (9 ,5 ), 437 | (8 ,8 ), 438 | (7 ,8 ), 439 | (10,8 ), 440 | (5 ,8 ), 441 | (2 ,8 ), 442 | (3 ,8 ), 443 | (4 ,8 ), 444 | (6 ,2 ), 445 | (6 ,2 ), 446 | (6 ,2 ), 447 | (8 ,2 ), 448 | (9 ,1 ), 449 | (2 ,1 ), 450 | (1 ,1 ), 451 | (6 ,2 ), 452 | (6 ,2 ), 453 | (10,8 ), 454 | (2 ,18), 455 | (6 ,15), 456 | (7 ,11), 457 | (2 ,14), 458 | (2 ,1 ); 459 | 460 | 461 | INSERT INTO Ape_Banana VALUES 462 | (1 , 2, '20181003', '20181004', 0, 1 , NULL), 463 | (2 , 4, '20181003', '20181004', 1, 2 , NULL), 464 | (3 , 4, '20181003', '20181004', 1, 2 , NULL), 465 | (4 , 5, '20181003', '20181006', 1, 1 , NULL), 466 | (5 , 5, '20181003', '20181006', 1, 2 , 'best banana ever'), 467 | (6 , 3, '20181003', '20181004', 1, 2 , NULL), 468 | (7 , 2, '20181002', '20181004', 0, 3 , NULL), 469 | (8 , 5, '20181002', '20181005', 1, 3 , 'smooth and delectable'), 470 | (9 , 3, '20181002', '20181003', 1, 4 , NULL), 471 | (10, 3, '20181002', '20181003', 1, 5 , NULL), 472 | (11, 2, '20181002', '20181003', 0, 5 , NULL), 473 | (12, 5, '20181002', '20181005', 1, 4 , NULL), 474 | (13, 1, '20181002', '20181002', 0, 9 , NULL), 475 | (14, 1, '20181001', '20181001', 0, 16, NULL), 476 | (15, 1, '20181001', '20181001', 0, 16, NULL), 477 | (16, 5, '20181001', '20181004', 1, 5 , 'a culinary delight'), 478 | (17, 5, '20181001', '20181004', 1, 6 , NULL), 479 | (18, 5, '20181001', '20181004', 1, 7 , NULL), 480 | (19, 5, '20181001', '20181004', 1, 8 , NULL), 481 | (20, 5, '20181001', '20181004', 1, 9 , 'soft with nutty undertones'), 482 | (21, 4, '20181001', '20181003', 1, 18, NULL), 483 | (22, 4, '20180930', '20181001', 1, 14, NULL), 484 | (23, 4, '20180930', '20181001', 1, 17, NULL), 485 | (24, 4, '20180930', '20181001', 1, 12, NULL), 486 | (25, 2, '20180930', '20181030', 1, 11, NULL), 487 | (26, 3, '20180930', '20181030', 0, 5 , 'good mid-range banana'), 488 | (27, 4, '20180930', '20181003', 1, 2 , NULL), 489 | (28, 5, '20180930', '20181003', 1, 10, NULL), 490 | (29, 5, '20180929', '20181003', 1, 11, NULL), 491 | (30, 1, '20180929', '20181001', 0, 4 , 'tasted like ape-shit'), 492 | (31, 1, '20180929', '20180929', 0, 7 , NULL), 493 | (32, 2, '20180929', '20181002', 0, 8 , NULL), 494 | (33, 5, '20180929', '20181002', 1, 12, NULL), 495 | (34, 4, '20180929', '20180930', 1, 2 , NULL), 496 | (35, 2, '20180929', '20180930', 0, 2 , NULL), 497 | (36, 3, '20180929', '20180930', 0, 18, NULL), 498 | (37, 3, '20180929', '20180930', 0, 13, NULL), 499 | (38, 4, '20180928', '20180929', 1, 15, NULL), 500 | (39, 5, '20180928', '20181001', 1, 13, NULL), 501 | (40, 2, '20180928', '20180930', 1, 12, NULL), 502 | (41, 1, '20180928', '20180928', 0, 15, NULL), 503 | (42, 1, '20180928', '20180928', 1, 12, 'had ants in it'), 504 | (43, 1, '20180928', '20180928', 0, 7 , NULL), 505 | (44, 4, '20180927', '20180929', 0, 7 , NULL), 506 | (45, 5, '20180927', '20180930', 1, 14, NULL), 507 | (46, 5, '20180927', '20180930', 1, 15, NULL), 508 | (47, 5, '20180927', '20180930', 1, 16, NULL), 509 | (48, 5, '20180927', '20180930', 1, 17, NULL), 510 | (49, 5, '20180927', '20180930', 1, 18, NULL), 511 | (50, 3, '20180927', '20180928', 1, 3 , NULL); 512 | 513 | 514 | -- - NOTES SCHEMA -- 515 | 516 | CREATE TABLE Notes_Friends ( 517 | FriendID int not null, 518 | FirstName varchar(20), 519 | LastName varchar(20), 520 | FavColour varchar(20), 521 | PRIMARY KEY (FriendID) 522 | ); 523 | 524 | CREATE TABLE Notes_Pets ( 525 | PetID int not null, 526 | PetName varchar(20), 527 | PetDOB date, 528 | FriendID int not null, 529 | FOREIGN KEY (FriendID) REFERENCES Notes_Friends (FriendID), 530 | PRIMARY KEY (PetID) 531 | ); 532 | 533 | CREATE TABLE Notes_Scratched ( 534 | ScratcherID int not null, 535 | ScratchDate date, 536 | ScratchTime time, 537 | ScratcheeID int not null, 538 | FOREIGN KEY (ScratcherID) REFERENCES Notes_Friends (FriendID), 539 | FOREIGN KEY (ScratcheeID) REFERENCES Notes_Friends (FriendID), 540 | PRIMARY KEY (ScratcherID, ScratcheeID, ScratchDate, ScratchTime) 541 | ); 542 | 543 | CREATE TABLE Notes_PlayCount ( 544 | PetID int not null, 545 | PlayCounter int, 546 | FriendID int not null, 547 | FOREIGN KEY (PetID) REFERENCES Notes_Pets (PetID), 548 | FOREIGN KEY (FriendID) REFERENCES Notes_Friends (FriendID), 549 | PRIMARY KEY (PetID, FriendID) 550 | ); 551 | 552 | CREATE TABLE Notes_Passports ( 553 | PptNo varchar(5) not null, 554 | PptCountry varchar(20), 555 | PptExpiry date, 556 | FriendID int, 557 | FOREIGN KEY (FriendID) REFERENCES Notes_Friends (FriendID), 558 | PRIMARY KEY (PptNo) 559 | ); 560 | 561 | CREATE TABLE Notes_Table1 ( 562 | A int not null, 563 | B varchar(20), 564 | C varchar(20), 565 | PRIMARY KEY (A) 566 | ); 567 | 568 | CREATE TABLE Notes_Table2 ( 569 | D varchar(20), 570 | E int not null, 571 | A int not null, 572 | FOREIGN KEY (A) REFERENCES Notes_Table1 (A), 573 | PRIMARY KEY (E) 574 | ); 575 | 576 | CREATE TABLE Notes_Letters ( 577 | A char, 578 | B char, 579 | Num int not null, 580 | PRIMARY KEY (Num) 581 | ); 582 | 583 | CREATE TABLE Notes_RandomPeople ( 584 | PersonName varchar(20), 585 | Gender char(2), 586 | Age int 587 | ); 588 | 589 | CREATE TABLE Notes_Houses ( 590 | house_ID varchar(5) not null, 591 | house_owner varchar(50), 592 | house_address varchar(200), 593 | post_code varchar(4), 594 | house_price decimal(10,2), 595 | PRIMARY KEY (house_ID) 596 | ); 597 | 598 | CREATE TABLE Notes_Suburbs ( 599 | post_code varchar(5) not null, 600 | suburb_name varchar(100) not null, 601 | vaccination_rate decimal(3,2), 602 | PRIMARY KEY (post_code, suburb_name) 603 | ); 604 | 605 | CREATE TABLE Notes_Numbers ( 606 | Num int, 607 | NumString varchar(10) 608 | ); 609 | 610 | CREATE TABLE Notes_EduStudy ( 611 | Id varchar(6) not null, 612 | Income varchar(8), 613 | Education int, 614 | PRIMARY KEY (Id) 615 | ); 616 | 617 | CREATE TABLE Notes_Orders ( 618 | OrderID int not null, 619 | Item varchar(30), 620 | Price decimal(5,2), 621 | OrderDT datetime, 622 | PRIMARY KEY (OrderID) 623 | ); 624 | 625 | INSERT INTO Notes_Orders VALUES 626 | (1, 'Boiled leaves' , 2.99 , '2021-12-31 15:13:00'), 627 | (2, 'Bow wow' , 15 , '2021-12-31 15:34:00'), 628 | (3, 'Cackleberry stew', 32.55, '2022-01-01 09:32:00'), 629 | (4, 'Mug of murk' , 4.40 , '2022-01-01 10:16:00'); 630 | 631 | INSERT INTO Notes_EduStudy VALUES 632 | ('EI13', 'low', 5), 633 | ('EI122', 'low', 1), 634 | ('EI281', 'low-mid', 4), 635 | ('EI3332', 'middle', 3), 636 | ('EI4751', 'high-mid', 3), 637 | ('EI12', 'high', 2); 638 | 639 | INSERT INTO Notes_Numbers VALUES 640 | (111, '111'), 641 | (31, '31'), 642 | (32, '32'), 643 | (211, '211'); 644 | 645 | INSERT INTO Notes_Houses VALUES 646 | ('H0001', 'Millard Claassen' , '7235 East Van Dyke St' , '3128', 300000), 647 | ('H0002', 'Jamie Pew' , '8914 South Sunnyslope Dr', '3128', 150000), 648 | ('H0003', 'Bethel Viviano' , '87 South West Halifax St', '3142', 400000), 649 | ('H0004', 'Brandi Hovis' , '676 Ocean St' , '3142', 360000), 650 | ('H0005', 'Mei Colby' , '62 West Park Ave' , '3556', 220000), 651 | ('H0006', 'Marilu Munz' , '62 Elm Road' , '3083', 120000), 652 | ('H0007', 'Rhiannon Carwile' , '7005 Anderson Ave' , '3779', 500000), 653 | ('H0008', 'Joycelyn Hamburger' , '7410 Park Drive' , '3778', 550000), 654 | ('H0009', 'Leopoldo Flaherty' , '3 Dogwood Dr' , '3083', 1200000), 655 | ('H0010', 'Xavier Farrer' , '767 Rockville Street' , '3083', 100000), 656 | ('H0011', 'Waldo Wingboard' , '8712 Thorne Street' , NULL, 640000), 657 | ('H0012', 'Jimmy Jenkins' , '32 Rosey Cres' , NULL, 70000); 658 | 659 | -- There is no primary / foreign key pair for Houses and Suburbs. 660 | -- The primary key of suburbs is not as may be expected (not post_code). 661 | -- 3218 connects to 2 suburbs 662 | -- some houses have NULL suburb 663 | -- 3142 has no corresponding suburb 664 | -- the data type for post_code in suburb is varchar(6), one suburb has postcode '33128' 665 | 666 | INSERT INTO Notes_Suburbs VALUES 667 | ('3128' , 'Erebor' , 0.8), 668 | ('33128', 'Erberor' , 0.8), 669 | ('3128' , 'Fangorn' , 0.2), 670 | ('3779' , 'Durin' , 0.4), 671 | ('3556' , 'Gondor' , 0.65), 672 | ('3083' , 'Isengaard', 0.35); 673 | 674 | INSERT INTO Notes_Friends VALUES 675 | (1, 'X', 'A', 'red'), 676 | (2, 'Y', 'B', 'blue'), 677 | (3, 'Z', 'C', NULL); 678 | 679 | INSERT INTO Notes_Pets VALUES 680 | (1, 'Chikin', '20160924', 2), 681 | (2, 'Cauchy', '20120301', 3), 682 | (3, 'Gauss', '20120301', 3); 683 | 684 | INSERT INTO Notes_Scratched VALUES 685 | (1, '20180905', '12:00', 2), 686 | (1, '20180905', '12:30', 3), 687 | (2, '20180906', '11:00', 1), 688 | (3, '20180907', '10:00', 1), 689 | (2, '20180801', '16:15', 1), 690 | (2, '20180801', '13:00', 3), 691 | (1, '20170214', '04:30', 2), 692 | (3, '20200128', '18:00', 1); 693 | 694 | INSERT INTO Notes_PlayCount VALUES 695 | (1, 3, 1), 696 | (1, 5, 2), 697 | (3, 4, 2); 698 | 699 | INSERT INTO Notes_Passports VALUES 700 | ('E1321', 'Australia', '20210312', NULL), 701 | ('LA123', 'New Zealand', '20320901', 2), 702 | ('S9876', 'Monaco', '20280619', 3); 703 | 704 | INSERT INTO Notes_Table1 ( 705 | A, B, C 706 | ) VALUES 707 | (1, 'Ignorance', 'is'), 708 | (2, 'War', 'is'), 709 | (3, 'Freedom', 'is'), 710 | (4, 'Friendship', 'is'); 711 | 712 | INSERT INTO Notes_Table2 ( 713 | D, E, A 714 | ) VALUES 715 | ('slavery.', 3, 1), 716 | ('weakness.', 4, 2), 717 | ('strength.', 1, 3), 718 | ('peace.', 2, 4); 719 | 720 | INSERT INTO Notes_Letters VALUES 721 | ('a', 'b', 1), 722 | ('a', 'c', 2), 723 | ('a', 'b', 3), 724 | ('a', 'c', 4); 725 | 726 | INSERT INTO Notes_RandomPeople VALUES 727 | ('Beyonce', 'F', 37), 728 | ('Laura Marling', 'F', 28), 729 | ('Darren Hayes', 'M', 46), 730 | ('Bret McKenzie', 'M', 42), 731 | ('Jack Monroe', 'NB', 30); 732 | 733 | -- --------------------------------------------------------------- 734 | -- CREATE Analytics DATABASE ------------------------------------- 735 | -- --------------------------------------------------------------- 736 | 737 | DROP DATABASE IF EXISTS Analytics; 738 | 739 | CREATE DATABASE Analytics; 740 | 741 | USE Analytics; 742 | 743 | CREATE TABLE Membership ( 744 | memberID INT not null, 745 | memberName VARCHAR(100), 746 | phone VARCHAR(20), 747 | joinDate DATE, 748 | PRIMARY KEY (memberID) 749 | ); 750 | 751 | INSERT INTO Membership 752 | VALUES 753 | (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'), 754 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'), 755 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'), 756 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'), 757 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'), 758 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29'); 759 | 760 | CREATE TABLE SausageSizzle ( 761 | saleId INT NOT NULL, 762 | saleDate DATE, 763 | product VARCHAR(20), 764 | quantity INT, 765 | friendId INT, 766 | PRIMARY KEY (saleId) 767 | ); 768 | 769 | INSERT INTO SausageSizzle VALUES 770 | (1 , '1999-12-31', 'pork' , 1, NULL), 771 | (2 , '1999-12-31', 'veggie', 3, NULL), 772 | (3 , '1999-12-31', 'pork' , 2, 1 ), 773 | (4 , '2000-01-01', 'veggie', 4, NULL), 774 | (5 , '2000-01-01', 'veggie', 2, 2 ), 775 | (6 , '2000-01-01', 'pork' , 1, NULL), 776 | (7 , '2000-01-01', 'veggie', 1, NULL), 777 | (8 , '2000-01-01', 'pork' , 1, NULL), 778 | (9 , '2000-01-02', 'veggie', 1, 2 ), 779 | (10, '2000-01-02', 'pork' , 2, NULL), 780 | (11, '2000-01-02', 'veggie', 3, NULL), 781 | (12, '2000-01-02', 'pork' , 4, NULL), 782 | (13, '2000-01-02', 'veggie', 2, 3 ), 783 | (14, '2000-01-03', 'veggie', 1, NULL), 784 | (15, '2000-01-03', 'pork' , 6, NULL), 785 | (16, '2000-01-03', 'veggie', 1, 1 ), 786 | (17, '2000-01-04', 'pork', 1, 1 ), 787 | (18, '2000-01-05', 'veggie', 5, 1 ); 788 | 789 | CREATE VIEW SausageSizzleSummary AS 790 | SELECT Product, SaleDate, SUM(Quantity) AS Sales 791 | FROM SausageSizzle 792 | GROUP BY Product, SaleDate; 793 | 794 | -------------------------------------------------------------------------------- /create-database/MySQL/README.md: -------------------------------------------------------------------------------- 1 | # MySQL create databases guide 2 | 3 | This is a guide to installing a MySQL server on Windows or Mac, so you can start using MySQL. 4 | 5 | This will guide you through **installing the server**, **installing the editor**, and then **creating the databases**. 6 | 7 | ## Windows 8 | 9 | ### Windows: installing the server 10 | 1. Go to [the 'MySQL installer' download page](https://dev.mysql.com/downloads/installer/). 11 | 2. On that page, there are two options. Download the **bottom** one (`mysql-installer-community`). On the next page, you do not have to log in, just click 'no thanks...'. 12 | 3. Run the installer that you just downloaded. 13 | 4. The 'Choosing a Setup Type' menu appears. Choose 'Developer Default' and press 'Next'. If an issue is then displayed related to Visual Studio, press 'Back', choose 'Custom' setup type, choose 'Next', then remove 'MySQL for Visual Studio' from the products to be installed. Then click 'Next'. 14 | 5. The 'Installation' overview menu appears. Click 'Execute'. 15 | 6. Wait for installation to complete. Then click 'Next'. 16 | 7. The 'Product Configuration' menu appears. Click 'Next'. 17 | 8. The 'Type and Networking' menu appears. Click 'Next'. 18 | 9. The 'Authentication Method' menu appears. Click 'Next'. 19 | 10. The 'Accounts and Roles' menu appears. Choose a secure MySQL Root Password, and write it down. You do not need to create any other user accounts. Click 'Next'. 20 | 11. The 'Windows Service' menu appears. Do not change the defaults. Click 'Next'. 21 | 12. When the configuration is done, click 'Finish'. 22 | 13. Product configuration continues. Take a deep breath. Click 'Next'. 23 | 14. The 'Connect To Server' menu appears. Input the root password that you chose in step 11. Click 'check' and then click 'Next'. 24 | 15. The 'Apply Configuration' menu appears. Click 'Execute'. 25 | 16. When the configuration is done, click 'Finish'. 26 | 17. Product configuration continues. Take another deep breath. Click 'Next'. 27 | 18. The 'MySQL Router Configuration' menu appears. Click 'Finish'. 28 | 19. The 'Product Configuration' menu appears again. Click 'Next'. 29 | 20. Installation is complete. Click 'Finish'. 30 | 31 | ### Windows: installing the editor 32 | 33 | The above process also installed MySQL Workbench. This is the MySQL editor we will use. 34 | 35 | 21. Open MySQL Workbench (e.g., press the 'Windows' key on your keyboard, and type 'MySQL Workbench', then press Enter). 36 | 22. Next to the word 'MySQL Connections', there is a plus symbol. Click it to add a new connection. 37 | 23. Under 'Connection Name' type 'MyLocal'. 38 | 24. Under 'Hostname' type 'localhost'. 39 | 25. Under 'Username' type 'root'. 40 | 26. Click 'OK'. 41 | 27. The 'MyLocal' connection box has been created. Click on it, then enter the password that you chose in step 11. If you like, you can check the box that says 'Save password in vault'. Click 'OK'. 42 | 28. MySQL Workbench is now connected. 43 | 44 | ### Windows: creating the databases 45 | 46 | 29. **Right click** [this link to the MySQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/MySQL/MySQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'MySQL-database.sql'. 47 | 30. In MySQL Workbench click 'File >> Open SQL Script' (or press CTRL+SHIFT+O). 48 | 31. Find the file 'MySQL-database.sql' and open it. 49 | 32. In MySQL Workbench press the small lightning bolt symbol to run the script (or press CTRL+SHIFT+ENTER). 50 | 33. The databases have now been created. 51 | 52 | ## Mac 53 | 54 | ### Mac: installing the server 55 | 56 | 1. Go to [the MySQL download page](https://dev.mysql.com/downloads/mysql/). 57 | 2. On that page, there are many options. The two top ones have 'DMG Archive' in the name. If you are on a mac M1 device (these are new mac devices made since 2020), then choose the one that says '(ARM, 64-bit), DMG Archive'. If you are on an older mac (not an M1), then choose the one that says '(x86, 64-bit), DMG Archive'. 58 | 3. Mount the DMG. Then a box opens with a pkg file. Run the pkg file. 59 | 4. The installation menu begins. Click 'Continue'. 60 | 5. The next menu opens. Click 'Install'. 61 | 6. The 'Configure MySQL Server' menu appears. Click 'Next'. 62 | 7. Enter a password for the MySQL root user, and write it down. Click 'Finish'. 63 | 8. When the installation completes, click 'Close'. 64 | 65 | ### Mac: installing the editor 66 | 67 | 9. Go to the app store and search for 'SQL Ace'. Install it (free). 68 | 10. Once installed, open SQL Ace. A connection menu appears. 69 | 11. Under 'host' type 'localhost'. 70 | 12. Under 'Password' type the password you chose in step 7. 71 | 13. Click 'Connect'. If the connection fails, you may need to restart your mac first. 72 | 14. SQL Ace is now connected. 73 | 74 | ### Mac: creating the databases 75 | 76 | 15. Control-click [this link to the MySQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/MySQL/MySQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'MySQL-database.sql'. 77 | 16. In SQL Ace click 'File >> Open' (or press Command+O). 78 | 17. Find the file 'MySQL-database.sql' and open it. 79 | 18. In SQL Ace, find the drop-down arrow next to 'Run Current', and press it. A drop-down menu opens. Press 'Run All Queries' (alternatively, just press Option+Command+R). 80 | -------------------------------------------------------------------------------- /create-database/SQL-data-definition-examples.sql: -------------------------------------------------------------------------------- 1 | -- preparation 2 | DROP TABLE IF EXISTS MySchema.MyFriends; 3 | DROP TABLE IF EXISTS MySchema.MyFriendsNames; 4 | DROP VIEW IF EXISTS MySchema.MyFriendsNames_view 5 | DROP SCHEMA IF EXISTS MySchema; 6 | DELETE FROM Notes.Friends WHERE Notes.Friends.friendID > 995; 7 | GO 8 | 9 | -- slide 1 CREATE SCHEMA to store your tables 10 | CREATE SCHEMA MySchema; 11 | GO 12 | 13 | -- slide 2 CREATE VIEW to store a query like a table 14 | CREATE OR ALTER VIEW MySchema.MyFriendsNames_view AS 15 | SELECT firstName, lastName 16 | FROM Notes.Friends; 17 | GO 18 | 19 | -- see result 20 | SELECT * 21 | FROM MySchema.MyFriendsNames_view; 22 | GO 23 | 24 | -- slide 3 SELECT INTO to store result in a table 25 | SELECT friendID, firstName, lastName 26 | INTO MySchema.MyFriends 27 | FROM Notes.Friends; 28 | GO 29 | 30 | -- slide 4 ALTER to add columns to a stored table 31 | ALTER TABLE MySchema.MyFriends 32 | ADD initials varchar(4); 33 | GO 34 | 35 | -- see result 36 | SELECT * 37 | FROM MySchema.MyFriends; 38 | GO 39 | 40 | -- slide 5 UPDATE to change the entries in a table 41 | UPDATE MySchema.MyFriends 42 | SET initials = CONCAT( 43 | SUBSTRING(firstName, 1, 1), 44 | SUBSTRING(lastName, 1, 1) 45 | ) 46 | WHERE firstName IS NOT NULL 47 | AND lastName IS NOT NULL; 48 | GO 49 | 50 | -- see result 51 | SELECT * 52 | FROM MySchema.MyFriends; 53 | GO 54 | 55 | -- slide 6 note we could instead just alter the view 56 | CREATE OR ALTER VIEW MySchema.MyFriendsNames_view AS 57 | SELECT friendID, firstName, lastName, 58 | (CONCAT(SUBSTRING(firstName, 1, 1), 59 | SUBSTRING(lastName, 1, 1)) 60 | ) AS initials 61 | FROM Notes.Friends; 62 | GO 63 | 64 | -- see result 65 | SELECT * 66 | FROM MySchema.MyFriendsNames_view; 67 | 68 | -- slide 7 INSERT INTO to create a whole record 69 | INSERT INTO Notes.Friends 70 | (friendID, firstName, lastName, favColour) 71 | VALUES 72 | (997, NULL, NULL, NULL), 73 | (998, '', '', ''), 74 | (999, 'NA', 'NA', 'NA'); 75 | GO 76 | 77 | -- see result 78 | SELECT * 79 | FROM MySchema.MyFriendsNames_view; 80 | 81 | -- slide 8 But our table doesn't change 82 | SELECT * 83 | FROM MySchema.MyFriends; 84 | 85 | -- update stored table 86 | INSERT INTO MySchema.MyFriends 87 | (friendID, firstName, lastName) 88 | SELECT friendID, firstName, lastName 89 | FROM Notes.Friends 90 | WHERE friendID > 995 91 | GO 92 | 93 | -- see result 94 | SELECT * 95 | FROM MySchema.MyFriends; 96 | 97 | -- slide 9 the CASE clause 98 | UPDATE MySchema.MyFriends 99 | SET initials = (CASE 100 | WHEN (firstName IS NULL or lastName IS NULL) THEN 'none' 101 | WHEN (firstName = '' and lastName = '') THEN 'none' 102 | WHEN (firstName = 'NA' and lastName = 'NA') THEN 'none' 103 | ELSE (CONCAT(SUBSTRING(firstName, 1, 1), 104 | SUBSTRING(lastName, 1, 1))) 105 | END); 106 | GO 107 | 108 | -- see result 109 | SELECT * 110 | FROM MySchema.MyFriends; 111 | 112 | -- cleaning up 113 | DROP TABLE IF EXISTS MySchema.MyFriends; 114 | DROP TABLE IF EXISTS MySchema.MyFriendsNames; 115 | DROP VIEW IF EXISTS MySchema.MyFriendsNames_view 116 | DROP SCHEMA IF EXISTS MySchema; 117 | DELETE FROM Notes.Friends WHERE Notes.Friends.friendID > 995; 118 | GO 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /create-database/SQLFiddle/README.md: -------------------------------------------------------------------------------- 1 | ## Should you use SQL Fiddle? 2 | 3 | Many of the exercises in this course can be completed through a browser using [sqlfiddle.com](http://sqlfiddle.com/). 4 | 5 | The drawbacks are that you can't work with multiple schemas and databases at the same time, and you need to build each schema as you use it. 6 | 7 | The advantage is that you can use SQL Fiddle from any browser with installation steps. 8 | 9 | ## Using SQL Fiddle 10 | 11 | Navigate to [sqlfiddle.com](http://sqlfiddle.com/), select MS SQL Server 2017 from the drop-down menu at the top left of the page, and then build a schema by copying the schema code (linked below) into the schema panel then pressing 'build schema'. 12 | 13 | Here are links to the schema code for the exercises: 14 | 15 | * [Notes schema](https://github.com/frycast/SQL_course/raw/master/create-database/SQLFiddle/T-SQL-notes-schema.sql) 16 | * [Ape schema](https://github.com/frycast/SQL_course/raw/master/create-database/SQLFiddle/T-SQL-ape-schema.sql) 17 | * [Analytics schema](https://github.com/frycast/SQL_course/raw/master/create-database/SQLFiddle/T-SQL-analytics-schema.sql) 18 | 19 | -------------------------------------------------------------------------------- /create-database/SQLFiddle/T-SQL-analytics-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Membership ( 2 | memberID INT not null, 3 | memberName VARCHAR(100), 4 | phone VARCHAR(20), 5 | joinDate DATE, 6 | PRIMARY KEY (memberID) 7 | ); 8 | 9 | 10 | INSERT INTO Membership 11 | VALUES 12 | (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'), 13 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'), 14 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'), 15 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'), 16 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'), 17 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29'); 18 | 19 | 20 | CREATE TABLE SausageSizzle ( 21 | saleId INT NOT NULL, 22 | saleDate DATE, 23 | product VARCHAR(20), 24 | quantity INT, 25 | friendId INT, 26 | PRIMARY KEY (saleId) 27 | ); 28 | 29 | 30 | INSERT INTO SausageSizzle VALUES 31 | (1 , '1999-12-31', 'pork' , 1, NULL), 32 | (2 , '1999-12-31', 'veggie', 3, NULL), 33 | (3 , '1999-12-31', 'pork' , 2, 1 ), 34 | (4 , '2000-01-01', 'veggie', 4, NULL), 35 | (5 , '2000-01-01', 'veggie', 2, 2 ), 36 | (6 , '2000-01-01', 'pork' , 1, NULL), 37 | (7 , '2000-01-01', 'veggie', 1, NULL), 38 | (8 , '2000-01-01', 'pork' , 1, NULL), 39 | (9 , '2000-01-02', 'veggie', 1, 2 ), 40 | (10, '2000-01-02', 'pork' , 2, NULL), 41 | (11, '2000-01-02', 'veggie', 3, NULL), 42 | (12, '2000-01-02', 'pork' , 4, NULL), 43 | (13, '2000-01-02', 'veggie', 2, 3 ), 44 | (14, '2000-01-03', 'veggie', 1, NULL), 45 | (15, '2000-01-03', 'pork' , 6, NULL), 46 | (16, '2000-01-03', 'veggie', 1, 1 ), 47 | (17, '2000-01-04', 'pork', 1, 1 ), 48 | (18, '2000-01-05', 'veggie', 5, 1 ); 49 | 50 | 51 | CREATE VIEW SausageSizzleSummary AS 52 | SELECT Product, SaleDate, SUM(Quantity) AS Sales 53 | FROM SausageSizzle 54 | GROUP BY Product, SaleDate; 55 | -------------------------------------------------------------------------------- /create-database/SQLFiddle/T-SQL-ape-schema.sql: -------------------------------------------------------------------------------- 1 | --- APE SCHEMA -- 2 | CREATE TABLE Colours ( 3 | ColourID int not null, 4 | ColourName varchar(20) not null, 5 | Comments varchar(100), -- I find this Colour strange etc. 6 | PRIMARY KEY (ColourID) 7 | ); 8 | GO 9 | CREATE TABLE Friends ( 10 | FriendID int not null, 11 | FirstName varchar(20), 12 | LastName varchar(20), 13 | FavColourID int, 14 | FOREIGN KEY (FavColourID) REFERENCES Colours (ColourID), 15 | PRIMARY KEY (FriendID) 16 | ); 17 | GO 18 | CREATE TABLE BananaTree ( 19 | TreeID int not null, 20 | Height decimal(5,2), 21 | YearPlanted int, 22 | MonthPlanted int, 23 | Width decimal(5,2), 24 | PRIMARY KEY (TreeID) 25 | ); 26 | GO 27 | CREATE TABLE EatingFrom ( 28 | FriendID int not null, 29 | TreeID int not null, 30 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID), 31 | FOREIGN KEY (TreeID) REFERENCES BananaTree (TreeID) 32 | ); 33 | GO 34 | CREATE TABLE Banana ( 35 | BananaID int not null, 36 | TasteRank int, -- from 1 to 5 37 | DatePicked date not null, 38 | DateEaten date not null, 39 | Ripe bit, 40 | TreeID int not null, 41 | Comments varchar(100), 42 | FOREIGN KEY (TreeID) REFERENCES BananaTree (TreeID), 43 | PRIMARY KEY (BananaID) 44 | ); 45 | GO 46 | 47 | INSERT INTO Colours VALUES 48 | (1 ,'blue' , 'similar to sky' ), 49 | (2 ,'green' , 'bad tasting bananas'), 50 | (3 ,'yellow' , 'my favourite because banana'), 51 | (4 ,'orange' , 'no connection to orangutan'), 52 | (5 ,'red' , 'generally a bad sign'), 53 | (6 ,'purple' , 'never seen this before'), 54 | (7 ,'brown' , 'many things are brown'), 55 | (8 ,'magenta' , 'one of the primary subtractive colours'), 56 | (9 ,'pink' , 'very manly'), 57 | (10,'lime' , 'almost green'), 58 | (11,'turquoise', 'not to be confused with tortoise'); 59 | GO 60 | 61 | INSERT INTO Friends VALUES 62 | (1 , 'Caesar' , 'Serkis', 3 ), 63 | (2 , 'Harambe' , 'Porter', 1 ), 64 | (3 , 'Aldo' , 'Atkins', 3 ), 65 | (4 , 'Cornelius', 'Porter', 1 ), 66 | (5 , 'Zira' , 'Porter', 4 ), 67 | (6 , 'Ishmael' , 'Serkis', 3 ), 68 | (7 , 'Monsieur' , 'Mallah', 3 ), 69 | (8 , 'Titano' , 'Atkins', 6 ), 70 | (9 , 'King' , 'Kong' , 3 ), 71 | (10, 'Bobo' , 'Kong' , 8 ), 72 | (11, 'Myster' , 'Ious' , NULL); 73 | GO 74 | 75 | INSERT INTO BananaTree VALUES 76 | (1, 5.5, 2018, 08, 31), 77 | (2, 4.3, 2018, 08, 27), 78 | (3, 4.7, 2018, 08, 36), 79 | (4, 3.8, 2018, 08, 20), 80 | (5, 6.2, 2018, 08, 40), 81 | (6, 6.4, 2016, 07, 23), 82 | (7, 5.4, 2016, 07, 32), 83 | (8, 5.2, 2016, 07, 31), 84 | (9, 4.8, 2016, 07, 19), 85 | (10, 4.5, 2015, 09, 28), 86 | (11, 5.8, 2015, 09, 35), 87 | (12, 7.5, 2015, 09, 45), 88 | (13, 6.4, 2015, 09, 30), 89 | (14, 5.0, 2015, 09, 24), 90 | (15, 5.1, 2014, 10, 34), 91 | (16, 4.2, 2014, 10, 23), 92 | (17, 5.4, 2014, 08, 39), 93 | (18, 5.2, 2014, 08, 28); 94 | GO 95 | 96 | INSERT INTO EatingFrom VALUES 97 | (3 ,1 ), 98 | (2 ,1 ), 99 | (1 ,1 ), 100 | (6 ,3 ), 101 | (10,5 ), 102 | (4 ,5 ), 103 | (9 ,5 ), 104 | (8 ,8 ), 105 | (7 ,8 ), 106 | (10,8 ), 107 | (5 ,8 ), 108 | (2 ,8 ), 109 | (3 ,8 ), 110 | (4 ,8 ), 111 | (6 ,2 ), 112 | (6 ,2 ), 113 | (6 ,2 ), 114 | (8 ,2 ), 115 | (9 ,1 ), 116 | (2 ,1 ), 117 | (1 ,1 ), 118 | (6 ,2 ), 119 | (6 ,2 ), 120 | (10,8 ), 121 | (2 ,18), 122 | (6 ,15), 123 | (7 ,11), 124 | (2 ,14), 125 | (2 ,1 ); 126 | GO 127 | 128 | INSERT INTO Banana VALUES 129 | (1 , 2, '20181003', '20181004', 0, 1 , NULL), 130 | (2 , 4, '20181003', '20181004', 1, 2 , NULL), 131 | (3 , 4, '20181003', '20181004', 1, 2 , NULL), 132 | (4 , 5, '20181003', '20181006', 1, 1 , NULL), 133 | (5 , 5, '20181003', '20181006', 1, 2 , 'best banana ever'), 134 | (6 , 3, '20181003', '20181004', 1, 2 , NULL), 135 | (7 , 2, '20181002', '20181004', 0, 3 , NULL), 136 | (8 , 5, '20181002', '20181005', 1, 3 , 'smooth and delectable'), 137 | (9 , 3, '20181002', '20181003', 1, 4 , NULL), 138 | (10, 3, '20181002', '20181003', 1, 5 , NULL), 139 | (11, 2, '20181002', '20181003', 0, 5 , NULL), 140 | (12, 5, '20181002', '20181005', 1, 4 , NULL), 141 | (13, 1, '20181002', '20181002', 0, 9 , NULL), 142 | (14, 1, '20181001', '20181001', 0, 16, NULL), 143 | (15, 1, '20181001', '20181001', 0, 16, NULL), 144 | (16, 5, '20181001', '20181004', 1, 5 , 'a culinary delight'), 145 | (17, 5, '20181001', '20181004', 1, 6 , NULL), 146 | (18, 5, '20181001', '20181004', 1, 7 , NULL), 147 | (19, 5, '20181001', '20181004', 1, 8 , NULL), 148 | (20, 5, '20181001', '20181004', 1, 9 , 'soft with nutty undertones'), 149 | (21, 4, '20181001', '20181003', 1, 18, NULL), 150 | (22, 4, '20180930', '20181001', 1, 14, NULL), 151 | (23, 4, '20180930', '20181001', 1, 17, NULL), 152 | (24, 4, '20180930', '20181001', 1, 12, NULL), 153 | (25, 2, '20180930', '20181030', 1, 11, NULL), 154 | (26, 3, '20180930', '20181030', 0, 5 , 'good mid-range banana'), 155 | (27, 4, '20180930', '20181003', 1, 2 , NULL), 156 | (28, 5, '20180930', '20181003', 1, 10, NULL), 157 | (29, 5, '20180929', '20181003', 1, 11, NULL), 158 | (30, 1, '20180929', '20181001', 0, 4 , 'tasted like ape-shit'), 159 | (31, 1, '20180929', '20180929', 0, 7 , NULL), 160 | (32, 2, '20180929', '20181002', 0, 8 , NULL), 161 | (33, 5, '20180929', '20181002', 1, 12, NULL), 162 | (34, 4, '20180929', '20180930', 1, 2 , NULL), 163 | (35, 2, '20180929', '20180930', 0, 2 , NULL), 164 | (36, 3, '20180929', '20180930', 0, 18, NULL), 165 | (37, 3, '20180929', '20180930', 0, 13, NULL), 166 | (38, 4, '20180928', '20180929', 1, 15, NULL), 167 | (39, 5, '20180928', '20181001', 1, 13, NULL), 168 | (40, 2, '20180928', '20180930', 1, 12, NULL), 169 | (41, 1, '20180928', '20180928', 0, 15, NULL), 170 | (42, 1, '20180928', '20180928', 1, 12, 'had ants in it'), 171 | (43, 1, '20180928', '20180928', 0, 7 , NULL), 172 | (44, 4, '20180927', '20180929', 0, 7 , NULL), 173 | (45, 5, '20180927', '20180930', 1, 14, NULL), 174 | (46, 5, '20180927', '20180930', 1, 15, NULL), 175 | (47, 5, '20180927', '20180930', 1, 16, NULL), 176 | (48, 5, '20180927', '20180930', 1, 17, NULL), 177 | (49, 5, '20180927', '20180930', 1, 18, NULL), 178 | (50, 3, '20180927', '20180928', 1, 3 , NULL); 179 | GO -------------------------------------------------------------------------------- /create-database/SQLFiddle/T-SQL-notes-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Friends ( 2 | FriendID int not null, 3 | FirstName varchar(20), 4 | LastName varchar(20), 5 | FavColour varchar(20), 6 | PRIMARY KEY (FriendID) 7 | ); 8 | GO 9 | CREATE TABLE Pets ( 10 | PetID int not null, 11 | PetName varchar(20), 12 | PetDOB date, 13 | FriendID int not null, 14 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID), 15 | PRIMARY KEY (PetID) 16 | ); 17 | GO 18 | CREATE TABLE Scratched ( 19 | ScratcherID int not null, 20 | ScratchDate date, 21 | ScratchTime time, 22 | ScratcheeID int not null, 23 | FOREIGN KEY (ScratcherID) REFERENCES Friends (FriendID), 24 | FOREIGN KEY (ScratcheeID) REFERENCES Friends (FriendID), 25 | PRIMARY KEY (ScratcherID, ScratcheeID, ScratchDate, ScratchTime) 26 | ); 27 | GO 28 | CREATE TABLE PlayCount ( 29 | PetID int not null, 30 | PlayCounter int, 31 | FriendID int not null, 32 | FOREIGN KEY (PetID) REFERENCES Pets (PetID), 33 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID), 34 | PRIMARY KEY (PetID, FriendID) 35 | ); 36 | GO 37 | CREATE TABLE Passports ( 38 | PptNo varchar(5) not null, 39 | PptCountry varchar(20), 40 | PptExpiry date, 41 | FriendID int, 42 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID), 43 | PRIMARY KEY (PptNo) 44 | ); 45 | GO 46 | CREATE TABLE Table1 ( 47 | A int not null, 48 | B varchar(20), 49 | C varchar(20), 50 | PRIMARY KEY (A) 51 | ); 52 | GO 53 | CREATE TABLE Table2 ( 54 | D varchar(20), 55 | E int not null, 56 | A int not null, 57 | FOREIGN KEY (A) REFERENCES Table1 (A), 58 | PRIMARY KEY (E) 59 | ); 60 | GO 61 | CREATE TABLE Letters ( 62 | A char, 63 | B char, 64 | Num int not null, 65 | PRIMARY KEY (Num) 66 | ); 67 | GO 68 | CREATE TABLE RandomPeople ( 69 | PersonName varchar(20), 70 | Gender char(2), 71 | Age int 72 | ); 73 | GO 74 | CREATE TABLE Houses ( 75 | house_ID varchar(5) not null, 76 | house_owner varchar(50), 77 | house_address varchar(200), 78 | post_code varchar(4), 79 | house_price decimal(10,2), 80 | PRIMARY KEY (house_ID) 81 | ); 82 | GO 83 | CREATE TABLE Suburbs ( 84 | post_code varchar(5) not null, 85 | suburb_name varchar(100) not null, 86 | vaccination_rate decimal(3,2), 87 | PRIMARY KEY (post_code, suburb_name) 88 | ); 89 | GO 90 | 91 | CREATE TABLE Numbers ( 92 | Num int, 93 | NumString varchar(10) 94 | ); 95 | GO 96 | 97 | CREATE TABLE EduStudy ( 98 | Id varchar(6) not null, 99 | Income varchar(8), 100 | Education int, 101 | PRIMARY KEY (Id) 102 | ); 103 | GO 104 | 105 | CREATE TABLE Orders ( 106 | OrderID int not null, 107 | Item varchar(30), 108 | Price decimal(5,2), 109 | OrderDT datetime, 110 | PRIMARY KEY (OrderID) 111 | ); 112 | GO 113 | 114 | INSERT INTO Orders VALUES 115 | (1, 'Boiled leaves' , 2.99 , '2021-12-31 15:13:00'), 116 | (2, 'Bow wow' , 15 , '2021-12-31 15:34:00'), 117 | (3, 'Cackleberry stew', 32.55, '2022-01-01 09:32:00'), 118 | (4, 'Mug of murk' , 4.40 , '2022-01-01 10:16:00'); 119 | GO 120 | 121 | INSERT INTO EduStudy VALUES 122 | ('EI13', 'low', 5), 123 | ('EI122', 'low', 1), 124 | ('EI281', 'low-mid', 4), 125 | ('EI3332', 'middle', 3), 126 | ('EI4751', 'high-mid', 3), 127 | ('EI12', 'high', 2); 128 | GO 129 | 130 | INSERT INTO Numbers VALUES 131 | (111, '111'), 132 | (31, '31'), 133 | (32, '32'), 134 | (211, '211'); 135 | GO 136 | 137 | 138 | INSERT INTO Houses VALUES 139 | ('H0001', 'Millard Claassen' , '7235 East Van Dyke St' , '3128', 300000), 140 | ('H0002', 'Jamie Pew' , '8914 South Sunnyslope Dr', '3128', 150000), 141 | ('H0003', 'Bethel Viviano' , '87 South West Halifax St', '3142', 400000), 142 | ('H0004', 'Brandi Hovis' , '676 Ocean St' , '3142', 360000), 143 | ('H0005', 'Mei Colby' , '62 West Park Ave' , '3556', 220000), 144 | ('H0006', 'Marilu Munz' , '62 Elm Road' , '3083', 120000), 145 | ('H0007', 'Rhiannon Carwile' , '7005 Anderson Ave' , '3779', 500000), 146 | ('H0008', 'Joycelyn Hamburger' , '7410 Park Drive' , '3778', 550000), 147 | ('H0009', 'Leopoldo Flaherty' , '3 Dogwood Dr' , '3083', 1200000), 148 | ('H0010', 'Xavier Farrer' , '767 Rockville Street' , '3083', 100000), 149 | ('H0011', 'Waldo Wingboard' , '8712 Thorne Street' , NULL, 640000), 150 | ('H0012', 'Jimmy Jenkins' , '32 Rosey Cres' , NULL, 70000); 151 | GO 152 | 153 | -- There is no primary / foreign key pair for Houses and Suburbs. 154 | -- The primary key of suburbs is not as may be expected (not post_code). 155 | -- 3218 connects to 2 suburbs 156 | -- some houses have NULL suburb 157 | -- 3142 has no corresponding suburb 158 | -- the data type for post_code in suburb is varchar(6), one suburb has postcode '33128' 159 | 160 | INSERT INTO Suburbs VALUES 161 | ('3128' , 'Erebor' , 0.8), 162 | ('33128', 'Erberor' , 0.8), 163 | ('3128' , 'Fangorn' , 0.2), 164 | ('3779' , 'Durin' , 0.4), 165 | ('3556' , 'Gondor' , 0.65), 166 | ('3083' , 'Isengaard', 0.35); 167 | GO 168 | 169 | INSERT INTO Friends VALUES 170 | (1, 'X', 'A', 'red'), 171 | (2, 'Y', 'B', 'blue'), 172 | (3, 'Z', 'C', NULL); 173 | GO 174 | INSERT INTO Pets VALUES 175 | (1, 'Chikin', '20160924', 2), 176 | (2, 'Cauchy', '20120301', 3), 177 | (3, 'Gauss', '20120301', 3); 178 | GO 179 | INSERT INTO Scratched VALUES 180 | (1, '20180905', '12:00', 2), 181 | (1, '20180905', '12:30', 3), 182 | (2, '20180906', '11:00', 1), 183 | (3, '20180907', '10:00', 1), 184 | (2, '20180801', '16:15', 1), 185 | (2, '20180801', '13:00', 3), 186 | (1, '20170214', '04:30', 2), 187 | (3, '20200128', '18:00', 1); 188 | GO 189 | INSERT INTO PlayCount VALUES 190 | (1, 3, 1), 191 | (1, 5, 2), 192 | (3, 4, 2); 193 | GO 194 | INSERT INTO Passports VALUES 195 | ('E1321', 'Australia', '20210312', NULL), 196 | ('LA123', 'New Zealand', '20320901', 2), 197 | ('S9876', 'Monaco', '20280619', 3); 198 | 199 | GO 200 | INSERT INTO Table1 ( 201 | A, B, C 202 | ) VALUES 203 | (1, 'Ignorance', 'is'), 204 | (2, 'War', 'is'), 205 | (3, 'Freedom', 'is'), 206 | (4, 'Friendship', 'is'); 207 | GO 208 | 209 | INSERT INTO Table2 ( 210 | D, E, A 211 | ) VALUES 212 | ('slavery.', 3, 1), 213 | ('weakness.', 4, 2), 214 | ('strength.', 1, 3), 215 | ('peace.', 2, 4); 216 | GO 217 | 218 | INSERT INTO Letters VALUES 219 | ('a', 'b', 1), 220 | ('a', 'c', 2), 221 | ('a', 'b', 3), 222 | ('a', 'c', 4); 223 | GO 224 | 225 | INSERT INTO RandomPeople VALUES 226 | ('Beyonce', 'F', 37), 227 | ('Laura Marling', 'F', 28), 228 | ('Darren Hayes', 'M', 46), 229 | ('Bret McKenzie', 'M', 42), 230 | ('Jack Monroe', 'NB', 30); -------------------------------------------------------------------------------- /create-database/T-SQL/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/create-database/T-SQL/.Rhistory -------------------------------------------------------------------------------- /create-database/T-SQL/README.md: -------------------------------------------------------------------------------- 1 | # T-SQL create database guide 2 | 3 | This is a guide to installing a T-SQL server on Windows or Mac, so you can start using T-SQL. 4 | 5 | This will guide you through **installing the server and editor** and then **creating the databases**. The editor we will use is Azure Data Studio. 6 | 7 | ## Windows 8 | 9 | ### Windows: installing the server and editor 10 | 11 | 1. Navigate to [Azure Data Studio](https://docs.microsoft.com/en-us/sql/azure-data-studio/download-azure-data-studio) and download the 'User Installer' for windows. 12 | 2. Run the installer, clicking through to accept all the default options. 13 | 3. Launch Azure Data Studio and click 'Deploy a Server'. 14 | 4. A window opens asking you to select deployment options. Choose 'SQL Server on Windows' and click 'Select'. 15 | 5. The deployment prerequisites window opens. Click 'Select'. This will cause SQL Server 2019 Developer Edition to download and begin installing. 16 | 6. In the SQL Server installer window, choose installation type 'Basic'. 17 | 7. Read and accept the terms and conditions. 18 | 8. The install location window opens. Leave the default install location. Click 'install'. 19 | 9. Once installation completes, click 'Close'. 20 | 10. In Azure Data Studio click 'Create a connection'. 21 | 11. The 'Connection Details' panel opens. Make sure the Connection type is 'Microsoft SQL Server' and the Authentication type is 'Windows Authentication'. For Server, type 'localhost'. Leave the username and password blank. Click 'Connect'. 22 | 12. Azure Data Studio is now connected to your SQL Server. 23 | 24 | ### Windows: creating the databases 25 | 26 | 13. **Right click** [this link to the T-SQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/T-SQL/T-SQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'T-SQL-database.sql'. 27 | 14. Find 'T-SQL-database.sql' on your computer and click and drag it into the Azure Data Studio window. 28 | 15. Click the green play button to execute the script, or press F5. 29 | 16. The databases have now been created. 30 | 31 | ## Mac 32 | 33 | ### Mac: installing the server and editor 34 | 35 | 1. Navigate to [Azure Data Studio](https://docs.microsoft.com/en-us/sql/azure-data-studio/download-azure-data-studio) and download the '.zip file' for macOS. 36 | 2. Extract 'Azure Data Studio' (app file) from the zip file. This should happen automatically after your download. 37 | 3. Drag 'Azure Data Studio' (app file) to the Applications folder. 38 | 4. The remainder of the process involves installing Docker and using it to pull a SQL Server container. Follow [the guide here](https://database.guide/how-to-install-sql-server-on-a-mac/). 39 | 5. Once you have connected Azure Data Studio to a local SQL Server 2019 instance, you are done. 40 | 41 | ### Mac: creating the databases 42 | 43 | 6. **Control-click** [this link to the T-SQL database script](https://raw.githubusercontent.com/frycast/SQL_course/master/create-database/T-SQL/T-SQL-database.sql) and click '**save link as**'. Save the file somewhere you can find it. The filename should be 'T-SQL-database.sql'. 44 | 7. Find 'T-SQL-database.sql' on your computer and click and drag it into the Azure Data Studio window. 45 | 8. Click the green play button to execute the script, or press F5. 46 | 9. The databases have now been created. 47 | -------------------------------------------------------------------------------- /create-database/T-SQL/T-SQL-database.sql: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------- 2 | 3 | -- Practice data for the Intro to SQL Course by Daniel Fryer. 4 | 5 | ------------------------------------------------------------------- 6 | 7 | ------------------------------------------------------------------- 8 | -- CREATE IDI_CLEAN DATABASE ------------------------------------- 9 | ------------------------------------------------------------------- 10 | USE master; 11 | GO 12 | 13 | DROP DATABASE IF EXISTS IDI_Clean; 14 | GO 15 | 16 | CREATE DATABASE IDI_Clean; 17 | GO -- GO is an T-SQL batch terminator 18 | 19 | USE IDI_Clean; 20 | GO 21 | 22 | -- DIA (Department of Internal Affairs) 23 | CREATE SCHEMA DIA_clean; 24 | GO 25 | 26 | CREATE TABLE DIA_Clean.births ( 27 | snz_uid int not null UNIQUE, 28 | snz_dia_uid int not null UNIQUE, 29 | parent1_snz_uid int null, 30 | parent1_snz_dia_uid int null, 31 | dia_bir_parent1_sex_snz_code varchar(100) null, 32 | dia_bir_parent1_occupation_text varchar(60) null, 33 | parent2_snz_uid int null, 34 | parent2_snz_dia_uid int null, 35 | dia_bir_parent2_sex_snz_code varchar(100) null, 36 | dia_bir_parent2_occupation_text varchar(60) null, 37 | dia_bir_birth_month_nbr tinyint null, 38 | dia_bir_birth_year_nbr smallint null, 39 | dia_bir_sex_snz_code varchar(100) null, 40 | dia_bir_ethnic_grp1_snz_ind bit not null, -- European 41 | dia_bir_ethnic_grp2_snz_ind bit not null, -- Maori 42 | dia_bir_ethnic_grp3_snz_ind bit not null, -- Pacific 43 | dia_bir_ethnic_grp4_snz_ind bit not null, -- Asian 44 | dia_bir_ethnic_grp5_snz_ind bit not null, -- MELAA 45 | dia_bir_ethnic_grp6_snz_ind bit not null); -- Other 46 | GO 47 | CREATE TABLE DIA_Clean.deaths ( 48 | snz_uid int not null UNIQUE, 49 | snz_dia_uid int not null UNIQUE, 50 | dia_dth_death_month_nbr tinyint null, 51 | dia_dth_death_year_nbr smallint null, 52 | dia_dth_last_seen_month_nbr tinyint null, 53 | dia_dth_last_seen_year_nbr smallint null,); 54 | GO 55 | CREATE TABLE DIA_Clean.marriages ( 56 | partnr1_snz_uid int not null UNIQUE, 57 | partnr1_snz_dia_uid int not null UNIQUE, 58 | partnr2_snz_uid int not null UNIQUE, 59 | partnr2_snz_dia_uid int not null UNIQUE, 60 | dia_mar_partnr1_birth_month_nbr tinyint null, 61 | dia_mar_partnr1_birth_year_nbr smallint null, 62 | dia_mar_partnr1_sex_snz_code varchar(100) null, 63 | dia_mar_partnr1_occupation_text varchar(60) null, 64 | dia_mar_partnr2_birth_month_nbr tinyint null, 65 | dia_mar_partnr2_birth_year_nbr smallint null, 66 | dia_mar_partnr2_sex_snz_code varchar(100) null, 67 | dia_mar_partnr2_occupation_text varchar(60) null); 68 | GO 69 | CREATE TABLE DIA_Clean.civil_unions ( 70 | partnr1_snz_uid int not null UNIQUE, 71 | partnr1_snz_dia_uid int not null UNIQUE, 72 | partnr2_snz_uid int not null UNIQUE, 73 | partnr2_snz_dia_uid int not null UNIQUE, 74 | dia_civ_partnr1_birth_month_nbr tinyint null, 75 | dia_civ_partnr1_birth_year_nbr smallint null, 76 | dia_civ_partnr1_sex_snz_code varchar(100) null, 77 | dia_civ_partnr1_occupation_text varchar(60) null, 78 | dia_civ_partnr2_birth_month_nbr tinyint null, 79 | dia_civ_partnr2_birth_year_nbr smallint null, 80 | dia_civ_partnr2_sex_snz_code varchar(100) null, 81 | dia_civ_partnr2_occupation_text varchar(60) null); 82 | GO 83 | 84 | CREATE SCHEMA [security]; 85 | GO 86 | GO 87 | 88 | CREATE TABLE security.concordance ( 89 | snz_uid int not null UNIQUE, 90 | snz_dia_uid int, 91 | snz_ird_uid int, 92 | snz_moh_uid int, 93 | snz_cen_uid int, 94 | snz_acc_uid int, 95 | snz_dol_uid int, 96 | snz_in_spine bit not null); 97 | GO 98 | 99 | CREATE SCHEMA [data]; 100 | GO 101 | 102 | -- Researchers may wish to exclude people who 103 | -- were overseas 104 | CREATE TABLE [data].person_overseas_spell ( 105 | snz_uid int not null UNIQUE, 106 | pos_applied_date date null, 107 | pos_ceased_date date null); 108 | GO 109 | 110 | -- The column with 0 or 1 indicates 111 | -- if an individual is in the spine 112 | CREATE TABLE [data].personal_detail ( 113 | snz_uid int not null UNIQUE, 114 | snz_in_spine int not null 115 | ); 116 | GO 117 | 118 | -- In this table there should be 119 | -- one row for each unique combination 120 | -- of snz_uid and year 121 | CREATE TABLE [data].snz_res_pop ( 122 | snz_uid int not null UNIQUE, 123 | year int not null 124 | ); 125 | GO 126 | 127 | -- I CREATED THESE EMPTY TABLES SO THE TABLE 128 | -- NAMES WOULD MATCH SOME OF THOSE IN THE 129 | -- DATA SCHEMA 130 | CREATE TABLE [data].source_ranked_ethnicity ( 131 | snz_uid int not null UNIQUE, 132 | dummy1 int, 133 | dummy2 int, 134 | dummy3 int); 135 | GO 136 | CREATE TABLE [data].income_tax_yr_summary ( 137 | snz_uid int not null UNIQUE, 138 | dummy1 int, 139 | dummy2 int, 140 | dummy3 int); 141 | GO 142 | CREATE TABLE [data].income_cal_yr_summary ( 143 | snz_uid int not null UNIQUE, 144 | dummy1 int, 145 | dummy2 int, 146 | dummy3 int); 147 | GO 148 | CREATE TABLE [data].address_notification ( 149 | snz_uid int not null UNIQUE, 150 | dummy1 int, 151 | dummy2 int, 152 | dummy3 int); 153 | GO 154 | 155 | CREATE SCHEMA ACC_Clean; 156 | GO 157 | 158 | CREATE TABLE ACC_Clean.Medical_Codes ( 159 | snz_acc_claim_uid int not null, 160 | acc_med_injury_count_nbr smallint, 161 | acc_med_read_code varchar(6), 162 | acc_med_read_code_text varchar(255), 163 | acc_med_injury_precedence_nbr int not null 164 | ); 165 | GO 166 | 167 | CREATE TABLE ACC_Clean.Serious_Injury ( 168 | snz_uid int not null, 169 | snz_acc_uid int not null, 170 | snz_employee_ird_uid int null, 171 | snz_employer_ird_uid int null, 172 | acc_cla_accident_date date null, 173 | acc_cla_claim_costs_to_date_ex_gst_amt decimal(13,2), 174 | acc_cla_ethnic_grp1_snz_uid bit not null, 175 | acc_cla_ethnic_grp2_snz_uid bit not null, 176 | acc_cla_ethnic_grp3_snz_uid bit not null, 177 | acc_cla_ethnic_grp4_snz_uid bit not null, 178 | acc_cla_ethnic_grp5_snz_uid bit not null, 179 | acc_cla_ethnic_grp6_snz_uid bit not null, 180 | snz_acc_claim_uid int not null, 181 | acc_cla_meshblock_code varchar(7) null 182 | ); 183 | GO 184 | 185 | INSERT INTO ACC_Clean.Medical_Codes 186 | ( snz_acc_claim_uid, 187 | acc_med_injury_count_nbr, 188 | acc_med_read_code, 189 | acc_med_read_code_text, 190 | acc_med_injury_precedence_nbr) 191 | VALUES 192 | (1 ,2, 'a', 'broken leg' ,1 ), 193 | (2 ,3, 'b', 'popped out eyeball',2 ), 194 | (3 ,1, 'a', 'broken leg' ,1 ), 195 | (4 ,2, 'a', 'broken leg' ,1 ), 196 | (5 ,3, 'b', 'popped out eyeball',1 ), 197 | (6 ,4, 'b', 'popped out eyeball',2 ), 198 | (7 ,2, 'a', 'broken leg' ,2 ), 199 | (8 ,2, 'c', 'exploded lung' ,3 ), 200 | (9 ,3, 'c', 'exploded lung' ,3 ), 201 | (10,3, 'c', 'exploded lung' ,3 ); 202 | GO 203 | 204 | INSERT INTO ACC_Clean.Serious_Injury 205 | ( snz_uid , 206 | snz_acc_uid , 207 | snz_employee_ird_uid , 208 | snz_employer_ird_uid , 209 | acc_cla_accident_date , 210 | acc_cla_claim_costs_to_date_ex_gst_amt , 211 | acc_cla_ethnic_grp1_snz_uid , 212 | acc_cla_ethnic_grp2_snz_uid , 213 | acc_cla_ethnic_grp3_snz_uid , 214 | acc_cla_ethnic_grp4_snz_uid , 215 | acc_cla_ethnic_grp5_snz_uid , 216 | acc_cla_ethnic_grp6_snz_uid , 217 | snz_acc_claim_uid , 218 | acc_cla_meshblock_code ) 219 | VALUES 220 | (10, 29 ,12 ,42 , '20160901', 15000 ,1 ,0 ,0 ,1 ,0 ,1 ,1 , 'MZ321' ), 221 | (2 , 23 ,14 ,32 , '20160912', 12000 ,1 ,0 ,0 ,0 ,0 ,0 ,2 , 'KL653' ), 222 | (1 , 22 ,17 ,32 , '20160913', 130000 ,0 ,1 ,0 ,0 ,0 ,0 ,3 , 'DF24' ), 223 | (4 , 21 ,18 ,54 , '20160923', 132000 ,0 ,1 ,0 ,0 ,0 ,0 ,4 , 'EW321' ), 224 | (7 , 26 ,12 ,65 , '20160902', 23000 ,0 ,1 ,0 ,0 ,0 ,0 ,5 , 'EW321' ), 225 | (9 , 25 ,19 ,65 , '20160921', 32000 ,0 ,0 ,0 ,1 ,0 ,0 ,6 , 'KL432' ), 226 | (5 , 24 ,19 ,23 , '20160918', 500 ,1 ,0 ,0 ,0 ,0 ,0 ,7 , 'EW234' ), 227 | (8 , 28 ,15 ,42 , '20160916', 120 ,0 ,0 ,0 ,0 ,1 ,0 ,8 , 'FD432' ), 228 | (6 , 27 ,14 ,42 , '20160918', 130 ,0 ,1 ,0 ,0 ,0 ,0 ,9 , 'HFD432'), 229 | (3 , 20 ,12 ,42 , '20160919', 45000 ,1 ,1 ,0 ,0 ,0 ,0 ,10 , 'FGV432'); 230 | GO 231 | 232 | INSERT INTO security.concordance ( 233 | snz_uid , 234 | snz_dia_uid , 235 | snz_ird_uid , 236 | snz_moh_uid , 237 | snz_cen_uid , 238 | snz_acc_uid , 239 | snz_dol_uid , 240 | snz_in_spine) 241 | VALUES 242 | (10 ,34 , NULL , NULL, NULL ,29 ,NULL, 1), 243 | (2 ,55 , NULL , NULL, NULL ,23 ,NULL,1), 244 | (1 ,32 , NULL , NULL, NULL ,22 ,NULL,1), 245 | (4 ,1 , NULL , NULL, NULL ,21 ,NULL,1), 246 | (7 ,67 , NULL , NULL, NULL ,26 ,NULL,1), 247 | (9 ,NULL , 3 , NULL, NULL ,25 ,NULL,0), 248 | (5 ,32 , 1 , NULL, NULL ,24 ,NULL,0), 249 | (8 ,43 , 2 , NULL, NULL ,28 ,NULL,1), 250 | (6 ,23 , 4 , NULL, NULL ,27 ,NULL,1), 251 | (3 ,123 , 6 , NULL, NULL ,20 ,NULL,1), 252 | (11 ,NULL , 7 , NULL, NULL ,NULL,NULL,1), 253 | (12 ,65 , 5 , NULL, NULL ,NULL,NULL,0), 254 | (13 ,NULL , 10 , NULL, NULL ,6 ,NULL,1), 255 | (14 ,NULL , 12 , NULL, NULL ,1 ,NULL,1), 256 | (15 ,NULL , 43 , NULL, NULL ,NULL,NULL,1), 257 | (16 ,765 , 44 , NULL, NULL ,NULL,NULL,0), 258 | (17 ,NULL , 34 , NULL, NULL ,7 ,NULL,1), 259 | (18 ,76 , 100 , NULL, NULL ,NULL,NULL,0), 260 | (19 ,NULL , 101 , NULL, 32 ,3 ,NULL,0); 261 | GO 262 | 263 | --CREATE TABLE dia_clean.births ( 264 | -- snz_uid int not null UNIQUE, 265 | -- snz_dia_uid int not null UNIQUE, 266 | -- parent1_snz_uid int null, 267 | -- parent1_snz_dia_uid int null, 268 | -- dia_bir_parent1_sex_snz_code varchar(100) null, 269 | -- dia_bir_parent1_occupation_text varchar(60) null, 270 | -- parent2_snz_uid int null, 271 | -- parent2_snz_dia_uid int null, 272 | -- dia_bir_parent2_sex_snz_code varchar(100) null, 273 | -- dia_bir_parent2_occupation_text varchar(60) null, 274 | -- dia_bir_birth_month_nbr tinyint null, 275 | -- dia_bir_birth_year_nbr smallint null, 276 | -- dia_bir_sex_snz_code varchar(100) null, 277 | -- dia_bir_ethnic_grp1_snz_ind bit not null, -- European 278 | -- dia_bir_ethnic_grp2_snz_ind bit not null, -- Maori 279 | -- dia_bir_ethnic_grp3_snz_ind bit not null, -- Pacific 280 | -- dia_bir_ethnic_grp4_snz_ind bit not null, -- Asian 281 | -- dia_bir_ethnic_grp5_snz_ind bit not null, -- MELAA 282 | -- dia_bir_ethnic_grp6_snz_ind bit not null); -- Other 283 | --GO 284 | --CREATE TABLE dia_clean.deaths ( 285 | -- snz_uid int not null UNIQUE, 286 | -- snz_dia_uid int not null UNIQUE, 287 | -- dia_dth_death_month_nbr tinyint null, 288 | -- dia_dth_death_year_nbr smallint null, 289 | -- dia_dth_last_seen_month_nbr tinyint null, 290 | -- dia_dth_last_seen_year_nbr smallint null,); 291 | --GO 292 | --CREATE TABLE dia_clean.marriages ( 293 | -- partnr1_snz_uid int not null UNIQUE, 294 | -- partnr1_snz_dia_uid int not null UNIQUE, 295 | -- partnr2_snz_uid int not null UNIQUE, 296 | -- partnr2_snz_dia_uid int not null UNIQUE, 297 | -- dia_mar_partnr1_birth_month_nbr tinyint null, 298 | -- dia_mar_partnr1_birth_year_nbr smallint null, 299 | -- dia_mar_partnr1_sex_snz_code varchar(100) null, 300 | -- dia_mar_partnr1_occupation_text varchar(60) null, 301 | -- dia_mar_partnr2_birth_month_nbr tinyint null, 302 | -- dia_mar_partnr2_birth_year_nbr smallint null, 303 | -- dia_mar_partnr2_sex_snz_code varchar(100) null, 304 | -- dia_mar_partnr2_occupation_text varchar(60) null); 305 | --GO 306 | --CREATE TABLE dia_clean.civil_unions ( 307 | -- partnr1_snz_uid int not null UNIQUE, 308 | -- partnr1_snz_dia_uid int not null UNIQUE, 309 | -- partnr2_snz_uid int not null UNIQUE, 310 | -- partnr2_snz_dia_uid int not null UNIQUE, 311 | -- dia_civ_partnr1_birth_month_nbr tinyint null, 312 | -- dia_civ_partnr1_birth_year_nbr smallint null, 313 | -- dia_civ_partnr1_sex_snz_code varchar(100) null, 314 | -- dia_civ_partnr1_occupation_text varchar(60) null, 315 | -- dia_civ_partnr2_birth_month_nbr tinyint null, 316 | -- dia_civ_partnr2_birth_year_nbr smallint null, 317 | -- dia_civ_partnr2_sex_snz_code varchar(100) null, 318 | -- dia_civ_partnr2_occupation_text varchar(60) null); 319 | --GO 320 | 321 | -- INSERT INTO DIA_Clean.civil_unions VALUES (10, 34, 6 , 23 , 1, 1975, 1, NULL, 1, 1976, 1, NULL); 322 | -- INSERT INTO DIA_Clean.civil_unions VALUES (2, 55, 3 , 123 , 2, 1966, 0, NULL, 6, 1969, 1, NULL); 323 | -- INSERT INTO DIA_Clean.civil_unions VALUES (1, 32, 12, 65 , 5, 1977, 0, NULL, 4, 1973, 1, NULL); 324 | -- INSERT INTO DIA_Clean.civil_unions VALUES (4, 1, 16, 765 , 5, 1988, 1, NULL, 4, 1989, 0, NULL); 325 | -- INSERT INTO DIA_Clean.civil_unions VALUES (7, 67, 18, 76 , 9, 1999, 0, NULL, 12, 1995, 0, NULL); 326 | -- GO 327 | 328 | ----------------------------------------------------------------- 329 | -- CREATE IDI_Metadata DATABASE --------------------------------- 330 | ----------------------------------------------------------------- 331 | USE master; 332 | GO 333 | 334 | DROP DATABASE IF EXISTS IDI_Metadata; 335 | GO 336 | 337 | CREATE DATABASE IDI_Metadata; 338 | GO 339 | 340 | USE IDI_Metadata; 341 | GO 342 | 343 | CREATE SCHEMA clean_read_CLASSIFICATIONS; 344 | GO 345 | 346 | -- I'm not sure what the table names or 347 | -- variable names are in here so these 348 | -- are just made up to simulate the functionality 349 | CREATE TABLE clean_read_CLASSIFICATIONS.ethnicity ( 350 | ethnic_grp int not null, --a number from 1 to 6 351 | description varchar(100) not null 352 | ); 353 | GO 354 | 355 | CREATE TABLE clean_read_CLASSIFICATIONS.post_codes ( 356 | post_code int not null, 357 | description varchar(100) 358 | ); 359 | GO 360 | 361 | INSERT INTO clean_read_CLASSIFICATIONS.ethnicity 362 | (ethnic_grp, description) 363 | VALUES 364 | (1, 'European'), 365 | (2, 'Maori'), 366 | (3, 'Pacific Peoples'), 367 | (4, 'Asian'), 368 | (5, 'Middle Eastern/Latin American/African'), 369 | (6, 'Other ethnicity'); 370 | 371 | ----------------------------------------------------------------- 372 | -- CREATE OLDER IDI_Clean REFRESH DATABASE ---------------------- 373 | ----------------------------------------------------------------- 374 | 375 | --INSERT INTO security.concordance ( 376 | -- snz_uid , 377 | -- snz_dia_uid , 378 | -- snz_ird_uid , 379 | -- snz_moh_uid , 380 | -- snz_cen_uid , 381 | -- snz_acc_uid , 382 | -- snz_dol_uid , 383 | -- snz_in_spine) 384 | --VALUES 385 | --(13, 34 , NULL , NULL, NULL ,NULL,NULL, 1), 386 | --(21 ,55 , NULL , NULL, NULL ,2 ,NULL,1), 387 | --(34 ,32 , NULL , NULL, NULL ,NULL,NULL,1), 388 | --(24 ,1 , NULL , NULL, NULL ,4 ,NULL,1), 389 | --(55 ,67 , NULL , NULL, NULL ,5 ,NULL,1), 390 | --(7 ,NULL , 3 , NULL, NULL ,NULL,NULL,0), 391 | --(17 ,32 , 1 , NULL, NULL ,NULL,NULL,0), 392 | --(28 ,43 , 2 , NULL, NULL ,8 ,NULL,1), 393 | --(59 ,23 , 4 , NULL, NULL ,9 ,NULL,1), 394 | --(1237,123 , 6 , NULL, NULL ,10 ,NULL,1), 395 | --(121,NULL , 7 , NULL, NULL ,NULL,NULL,1), 396 | --(345,65 , 5 , NULL, NULL ,NULL,NULL,0), 397 | --(765, NULL , 10 , NULL, NULL ,6 ,NULL,1), 398 | --(1432, NULL , 12 , NULL, NULL ,1 ,NULL,1), 399 | --(873, NULL , 43 , NULL, NULL ,NULL,NULL,1), 400 | --(3,765 , 44 , NULL, NULL ,NULL,NULL,0), 401 | --(5,NULL , 34 , NULL, NULL ,7 ,NULL,1), 402 | --(78,76 , 100 , NULL, NULL ,NULL,NULL,0), 403 | --(1,NULL , 101 , NULL, 32 ,3 ,NULL,0); 404 | --GO 405 | 406 | ----------------------------------------------------------------- 407 | -- CREATE Sandpit DATABASE -------------------------------------- 408 | -------------------------------------------------------------- --- 409 | USE master; 410 | GO 411 | 412 | DROP DATABASE IF EXISTS Sandpit; 413 | GO 414 | 415 | CREATE DATABASE Sandpit; 416 | GO 417 | 418 | USE Sandpit; 419 | GO 420 | 421 | --- APE SCHEMA -- 422 | CREATE SCHEMA Ape; 423 | GO 424 | CREATE TABLE Ape.Colours ( 425 | ColourID int not null, 426 | ColourName varchar(20) not null, 427 | Comments varchar(100), -- I find this Colour strange etc. 428 | PRIMARY KEY (ColourID) 429 | ); 430 | GO 431 | CREATE TABLE Ape.Friends ( 432 | FriendID int not null, 433 | FirstName varchar(20), 434 | LastName varchar(20), 435 | FavColourID int, 436 | FOREIGN KEY (FavColourID) REFERENCES Ape.Colours (ColourID), 437 | PRIMARY KEY (FriendID) 438 | ); 439 | GO 440 | CREATE TABLE Ape.BananaTree ( 441 | TreeID int not null, 442 | Height decimal(5,2), 443 | YearPlanted int, 444 | MonthPlanted int, 445 | Width decimal(5,2), 446 | PRIMARY KEY (TreeID) 447 | ); 448 | GO 449 | CREATE TABLE Ape.EatingFrom ( 450 | FriendID int not null, 451 | TreeID int not null, 452 | FOREIGN KEY (FriendID) REFERENCES Ape.Friends (FriendID), 453 | FOREIGN KEY (TreeID) REFERENCES Ape.BananaTree (TreeID) 454 | ); 455 | GO 456 | CREATE TABLE Ape.Banana ( 457 | BananaID int not null, 458 | TasteRank int, -- from 1 to 5 459 | DatePicked date not null, 460 | DateEaten date not null, 461 | Ripe bit, 462 | TreeID int not null, 463 | Comments varchar(100), 464 | FOREIGN KEY (TreeID) REFERENCES Ape.BananaTree (TreeID), 465 | PRIMARY KEY (BananaID) 466 | ); 467 | GO 468 | 469 | INSERT INTO Ape.Colours VALUES 470 | (1 ,'blue' , 'similar to sky' ), 471 | (2 ,'green' , 'bad tasting bananas'), 472 | (3 ,'yellow' , 'my favourite because banana'), 473 | (4 ,'orange' , 'no connection to orangutan'), 474 | (5 ,'red' , 'generally a bad sign'), 475 | (6 ,'purple' , 'never seen this before'), 476 | (7 ,'brown' , 'many things are brown'), 477 | (8 ,'magenta' , 'one of the primary subtractive colours'), 478 | (9 ,'pink' , 'very manly'), 479 | (10,'lime' , 'almost green'), 480 | (11,'turquoise', 'not to be confused with tortoise'); 481 | GO 482 | 483 | INSERT INTO Ape.Friends VALUES 484 | (1 , 'Caesar' , 'Serkis', 3 ), 485 | (2 , 'Harambe' , 'Porter', 1 ), 486 | (3 , 'Aldo' , 'Atkins', 3 ), 487 | (4 , 'Cornelius', 'Porter', 1 ), 488 | (5 , 'Zira' , 'Porter', 4 ), 489 | (6 , 'Ishmael' , 'Serkis', 3 ), 490 | (7 , 'Monsieur' , 'Mallah', 3 ), 491 | (8 , 'Titano' , 'Atkins', 6 ), 492 | (9 , 'King' , 'Kong' , 3 ), 493 | (10, 'Bobo' , 'Kong' , 8 ), 494 | (11, 'Myster' , 'Ious' , NULL); 495 | GO 496 | 497 | INSERT INTO Ape.BananaTree VALUES 498 | (1, 5.5, 2018, 08, 31), 499 | (2, 4.3, 2018, 08, 27), 500 | (3, 4.7, 2018, 08, 36), 501 | (4, 3.8, 2018, 08, 20), 502 | (5, 6.2, 2018, 08, 40), 503 | (6, 6.4, 2016, 07, 23), 504 | (7, 5.4, 2016, 07, 32), 505 | (8, 5.2, 2016, 07, 31), 506 | (9, 4.8, 2016, 07, 19), 507 | (10, 4.5, 2015, 09, 28), 508 | (11, 5.8, 2015, 09, 35), 509 | (12, 7.5, 2015, 09, 45), 510 | (13, 6.4, 2015, 09, 30), 511 | (14, 5.0, 2015, 09, 24), 512 | (15, 5.1, 2014, 10, 34), 513 | (16, 4.2, 2014, 10, 23), 514 | (17, 5.4, 2014, 08, 39), 515 | (18, 5.2, 2014, 08, 28); 516 | GO 517 | 518 | INSERT INTO Ape.EatingFrom VALUES 519 | (3 ,1 ), 520 | (2 ,1 ), 521 | (1 ,1 ), 522 | (6 ,3 ), 523 | (10,5 ), 524 | (4 ,5 ), 525 | (9 ,5 ), 526 | (8 ,8 ), 527 | (7 ,8 ), 528 | (10,8 ), 529 | (5 ,8 ), 530 | (2 ,8 ), 531 | (3 ,8 ), 532 | (4 ,8 ), 533 | (6 ,2 ), 534 | (6 ,2 ), 535 | (6 ,2 ), 536 | (8 ,2 ), 537 | (9 ,1 ), 538 | (2 ,1 ), 539 | (1 ,1 ), 540 | (6 ,2 ), 541 | (6 ,2 ), 542 | (10,8 ), 543 | (2 ,18), 544 | (6 ,15), 545 | (7 ,11), 546 | (2 ,14), 547 | (2 ,1 ); 548 | GO 549 | 550 | INSERT INTO Ape.Banana VALUES 551 | (1 , 2, '20181003', '20181004', 0, 1 , NULL), 552 | (2 , 4, '20181003', '20181004', 1, 2 , NULL), 553 | (3 , 4, '20181003', '20181004', 1, 2 , NULL), 554 | (4 , 5, '20181003', '20181006', 1, 1 , NULL), 555 | (5 , 5, '20181003', '20181006', 1, 2 , 'best banana ever'), 556 | (6 , 3, '20181003', '20181004', 1, 2 , NULL), 557 | (7 , 2, '20181002', '20181004', 0, 3 , NULL), 558 | (8 , 5, '20181002', '20181005', 1, 3 , 'smooth and delectable'), 559 | (9 , 3, '20181002', '20181003', 1, 4 , NULL), 560 | (10, 3, '20181002', '20181003', 1, 5 , NULL), 561 | (11, 2, '20181002', '20181003', 0, 5 , NULL), 562 | (12, 5, '20181002', '20181005', 1, 4 , NULL), 563 | (13, 1, '20181002', '20181002', 0, 9 , NULL), 564 | (14, 1, '20181001', '20181001', 0, 16, NULL), 565 | (15, 1, '20181001', '20181001', 0, 16, NULL), 566 | (16, 5, '20181001', '20181004', 1, 5 , 'a culinary delight'), 567 | (17, 5, '20181001', '20181004', 1, 6 , NULL), 568 | (18, 5, '20181001', '20181004', 1, 7 , NULL), 569 | (19, 5, '20181001', '20181004', 1, 8 , NULL), 570 | (20, 5, '20181001', '20181004', 1, 9 , 'soft with nutty undertones'), 571 | (21, 4, '20181001', '20181003', 1, 18, NULL), 572 | (22, 4, '20180930', '20181001', 1, 14, NULL), 573 | (23, 4, '20180930', '20181001', 1, 17, NULL), 574 | (24, 4, '20180930', '20181001', 1, 12, NULL), 575 | (25, 2, '20180930', '20181030', 1, 11, NULL), 576 | (26, 3, '20180930', '20181030', 0, 5 , 'good mid-range banana'), 577 | (27, 4, '20180930', '20181003', 1, 2 , NULL), 578 | (28, 5, '20180930', '20181003', 1, 10, NULL), 579 | (29, 5, '20180929', '20181003', 1, 11, NULL), 580 | (30, 1, '20180929', '20181001', 0, 4 , 'tasted like ape-shit'), 581 | (31, 1, '20180929', '20180929', 0, 7 , NULL), 582 | (32, 2, '20180929', '20181002', 0, 8 , NULL), 583 | (33, 5, '20180929', '20181002', 1, 12, NULL), 584 | (34, 4, '20180929', '20180930', 1, 2 , NULL), 585 | (35, 2, '20180929', '20180930', 0, 2 , NULL), 586 | (36, 3, '20180929', '20180930', 0, 18, NULL), 587 | (37, 3, '20180929', '20180930', 0, 13, NULL), 588 | (38, 4, '20180928', '20180929', 1, 15, NULL), 589 | (39, 5, '20180928', '20181001', 1, 13, NULL), 590 | (40, 2, '20180928', '20180930', 1, 12, NULL), 591 | (41, 1, '20180928', '20180928', 0, 15, NULL), 592 | (42, 1, '20180928', '20180928', 1, 12, 'had ants in it'), 593 | (43, 1, '20180928', '20180928', 0, 7 , NULL), 594 | (44, 4, '20180927', '20180929', 0, 7 , NULL), 595 | (45, 5, '20180927', '20180930', 1, 14, NULL), 596 | (46, 5, '20180927', '20180930', 1, 15, NULL), 597 | (47, 5, '20180927', '20180930', 1, 16, NULL), 598 | (48, 5, '20180927', '20180930', 1, 17, NULL), 599 | (49, 5, '20180927', '20180930', 1, 18, NULL), 600 | (50, 3, '20180927', '20180928', 1, 3 , NULL); 601 | GO 602 | 603 | --- NOTES SCHEMA -- 604 | CREATE SCHEMA Notes; 605 | GO 606 | CREATE TABLE Notes.Friends ( 607 | FriendID int not null, 608 | FirstName varchar(20), 609 | LastName varchar(20), 610 | FavColour varchar(20), 611 | PRIMARY KEY (FriendID) 612 | ); 613 | GO 614 | CREATE TABLE Notes.Pets ( 615 | PetID int not null, 616 | PetName varchar(20), 617 | PetDOB date, 618 | FriendID int not null, 619 | FOREIGN KEY (FriendID) REFERENCES Notes.Friends (FriendID), 620 | PRIMARY KEY (PetID) 621 | ); 622 | GO 623 | CREATE TABLE Notes.Scratched ( 624 | ScratcherID int not null, 625 | ScratchDate date, 626 | ScratchTime time, 627 | ScratcheeID int not null, 628 | FOREIGN KEY (ScratcherID) REFERENCES Notes.Friends (FriendID), 629 | FOREIGN KEY (ScratcheeID) REFERENCES Notes.Friends (FriendID), 630 | PRIMARY KEY (ScratcherID, ScratcheeID, ScratchDate, ScratchTime) 631 | ); 632 | GO 633 | CREATE TABLE Notes.PlayCount ( 634 | PetID int not null, 635 | PlayCounter int, 636 | FriendID int not null, 637 | FOREIGN KEY (PetID) REFERENCES Notes.Pets (PetID), 638 | FOREIGN KEY (FriendID) REFERENCES Notes.Friends (FriendID), 639 | PRIMARY KEY (PetID, FriendID) 640 | ); 641 | GO 642 | CREATE TABLE Notes.Passports ( 643 | PptNo varchar(5) not null, 644 | PptCountry varchar(20), 645 | PptExpiry date, 646 | FriendID int, 647 | FOREIGN KEY (FriendID) REFERENCES Notes.Friends (FriendID), 648 | PRIMARY KEY (PptNo) 649 | ); 650 | GO 651 | CREATE TABLE Notes.Table1 ( 652 | A int not null, 653 | B varchar(20), 654 | C varchar(20), 655 | PRIMARY KEY (A) 656 | ); 657 | GO 658 | CREATE TABLE Notes.Table2 ( 659 | D varchar(20), 660 | E int not null, 661 | A int not null, 662 | FOREIGN KEY (A) REFERENCES Notes.Table1 (A), 663 | PRIMARY KEY (E) 664 | ); 665 | GO 666 | CREATE TABLE Notes.Letters ( 667 | A char, 668 | B char, 669 | Num int not null, 670 | PRIMARY KEY (Num) 671 | ); 672 | GO 673 | CREATE TABLE Notes.RandomPeople ( 674 | PersonName varchar(20), 675 | Gender char(2), 676 | Age int 677 | ); 678 | GO 679 | CREATE TABLE Notes.Houses ( 680 | house_ID varchar(5) not null, 681 | house_owner varchar(50), 682 | house_address varchar(200), 683 | post_code varchar(4), 684 | house_price decimal(10,2), 685 | PRIMARY KEY (house_ID) 686 | ); 687 | GO 688 | CREATE TABLE Notes.Suburbs ( 689 | post_code varchar(5) not null, 690 | suburb_name varchar(100) not null, 691 | vaccination_rate decimal(3,2), 692 | PRIMARY KEY (post_code, suburb_name) 693 | ); 694 | GO 695 | 696 | CREATE TABLE Notes.Numbers ( 697 | Num int, 698 | NumString varchar(10) 699 | ); 700 | GO 701 | 702 | CREATE TABLE Notes.EduStudy ( 703 | Id varchar(6) not null, 704 | Income varchar(8), 705 | Education int, 706 | PRIMARY KEY (Id) 707 | ); 708 | GO 709 | 710 | CREATE TABLE Notes.Orders ( 711 | OrderID int not null, 712 | Item varchar(30), 713 | Price decimal(5,2), 714 | OrderDT datetime, 715 | PRIMARY KEY (OrderID) 716 | ); 717 | GO 718 | 719 | INSERT INTO Notes.Orders VALUES 720 | (1, 'Boiled leaves' , 2.99 , '2021-12-31 15:13:00'), 721 | (2, 'Bow wow' , 15 , '2021-12-31 15:34:00'), 722 | (3, 'Cackleberry stew', 32.55, '2022-01-01 09:32:00'), 723 | (4, 'Mug of murk' , 4.40 , '2022-01-01 10:16:00'); 724 | GO 725 | 726 | INSERT INTO Notes.EduStudy VALUES 727 | ('EI13', 'low', 5), 728 | ('EI122', 'low', 1), 729 | ('EI281', 'low-mid', 4), 730 | ('EI3332', 'middle', 3), 731 | ('EI4751', 'high-mid', 3), 732 | ('EI12', 'high', 2); 733 | GO 734 | 735 | INSERT INTO Notes.Numbers VALUES 736 | (111, '111'), 737 | (31, '31'), 738 | (32, '32'), 739 | (211, '211'); 740 | GO 741 | 742 | 743 | INSERT INTO Notes.Houses VALUES 744 | ('H0001', 'Millard Claassen' , '7235 East Van Dyke St' , '3128', 300000), 745 | ('H0002', 'Jamie Pew' , '8914 South Sunnyslope Dr', '3128', 150000), 746 | ('H0003', 'Bethel Viviano' , '87 South West Halifax St', '3142', 400000), 747 | ('H0004', 'Brandi Hovis' , '676 Ocean St' , '3142', 360000), 748 | ('H0005', 'Mei Colby' , '62 West Park Ave' , '3556', 220000), 749 | ('H0006', 'Marilu Munz' , '62 Elm Road' , '3083', 120000), 750 | ('H0007', 'Rhiannon Carwile' , '7005 Anderson Ave' , '3779', 500000), 751 | ('H0008', 'Joycelyn Hamburger' , '7410 Park Drive' , '3778', 550000), 752 | ('H0009', 'Leopoldo Flaherty' , '3 Dogwood Dr' , '3083', 1200000), 753 | ('H0010', 'Xavier Farrer' , '767 Rockville Street' , '3083', 100000), 754 | ('H0011', 'Waldo Wingboard' , '8712 Thorne Street' , NULL, 640000), 755 | ('H0012', 'Jimmy Jenkins' , '32 Rosey Cres' , NULL, 70000); 756 | GO 757 | 758 | -- There is no primary / foreign key pair for Houses and Suburbs. 759 | -- The primary key of suburbs is not as may be expected (not post_code). 760 | -- 3218 connects to 2 suburbs 761 | -- some houses have NULL suburb 762 | -- 3142 has no corresponding suburb 763 | -- the data type for post_code in suburb is varchar(6), one suburb has postcode '33128' 764 | 765 | INSERT INTO Notes.Suburbs VALUES 766 | ('3128' , 'Erebor' , 0.8), 767 | ('33128', 'Erberor' , 0.8), 768 | ('3128' , 'Fangorn' , 0.2), 769 | ('3779' , 'Durin' , 0.4), 770 | ('3556' , 'Gondor' , 0.65), 771 | ('3083' , 'Isengaard', 0.35); 772 | GO 773 | 774 | INSERT INTO Notes.Friends VALUES 775 | (1, 'X', 'A', 'red'), 776 | (2, 'Y', 'B', 'blue'), 777 | (3, 'Z', 'C', NULL); 778 | GO 779 | INSERT INTO Notes.Pets VALUES 780 | (1, 'Chikin', '20160924', 2), 781 | (2, 'Cauchy', '20120301', 3), 782 | (3, 'Gauss', '20120301', 3); 783 | GO 784 | INSERT INTO Notes.Scratched VALUES 785 | (1, '20180905', '12:00', 2), 786 | (1, '20180905', '12:30', 3), 787 | (2, '20180906', '11:00', 1), 788 | (3, '20180907', '10:00', 1), 789 | (2, '20180801', '16:15', 1), 790 | (2, '20180801', '13:00', 3), 791 | (1, '20170214', '04:30', 2), 792 | (3, '20200128', '18:00', 1); 793 | GO 794 | INSERT INTO Notes.PlayCount VALUES 795 | (1, 3, 1), 796 | (1, 5, 2), 797 | (3, 4, 2); 798 | GO 799 | INSERT INTO Notes.Passports VALUES 800 | ('E1321', 'Australia', '20210312', NULL), 801 | ('LA123', 'New Zealand', '20320901', 2), 802 | ('S9876', 'Monaco', '20280619', 3); 803 | 804 | GO 805 | INSERT INTO Notes.Table1 ( 806 | A, B, C 807 | ) VALUES 808 | (1, 'Ignorance', 'is'), 809 | (2, 'War', 'is'), 810 | (3, 'Freedom', 'is'), 811 | (4, 'Friendship', 'is'); 812 | GO 813 | 814 | INSERT INTO Notes.Table2 ( 815 | D, E, A 816 | ) VALUES 817 | ('slavery.', 3, 1), 818 | ('weakness.', 4, 2), 819 | ('strength.', 1, 3), 820 | ('peace.', 2, 4); 821 | GO 822 | 823 | INSERT INTO Notes.Letters VALUES 824 | ('a', 'b', 1), 825 | ('a', 'c', 2), 826 | ('a', 'b', 3), 827 | ('a', 'c', 4); 828 | GO 829 | 830 | INSERT INTO Notes.RandomPeople VALUES 831 | ('Beyonce', 'F', 37), 832 | ('Laura Marling', 'F', 28), 833 | ('Darren Hayes', 'M', 46), 834 | ('Bret McKenzie', 'M', 42), 835 | ('Jack Monroe', 'NB', 30); 836 | 837 | 838 | -- --------------------------------------------------------------- 839 | -- CREATE Analytics DATABASE ------------------------------------- 840 | -- --------------------------------------------------------------- 841 | 842 | USE master; 843 | GO 844 | 845 | DROP DATABASE IF EXISTS Analytics; 846 | GO 847 | 848 | CREATE DATABASE Analytics; 849 | GO 850 | 851 | USE Analytics; 852 | GO 853 | 854 | CREATE TABLE Membership ( 855 | memberID INT not null, 856 | memberName VARCHAR(100), 857 | phone VARCHAR(20), 858 | joinDate DATE, 859 | PRIMARY KEY (memberID) 860 | ); 861 | GO 862 | 863 | INSERT INTO Membership 864 | VALUES 865 | (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'), 866 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'), 867 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'), 868 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'), 869 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'), 870 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29'); 871 | GO 872 | 873 | CREATE TABLE SausageSizzle ( 874 | saleId INT NOT NULL, 875 | saleDate DATE, 876 | product VARCHAR(20), 877 | quantity INT, 878 | friendId INT, 879 | PRIMARY KEY (saleId) 880 | ); 881 | GO 882 | 883 | INSERT INTO SausageSizzle VALUES 884 | (1 , '1999-12-31', 'pork' , 1, NULL), 885 | (2 , '1999-12-31', 'veggie', 3, NULL), 886 | (3 , '1999-12-31', 'pork' , 2, 1 ), 887 | (4 , '2000-01-01', 'veggie', 4, NULL), 888 | (5 , '2000-01-01', 'veggie', 2, 2 ), 889 | (6 , '2000-01-01', 'pork' , 1, NULL), 890 | (7 , '2000-01-01', 'veggie', 1, NULL), 891 | (8 , '2000-01-01', 'pork' , 1, NULL), 892 | (9 , '2000-01-02', 'veggie', 1, 2 ), 893 | (10, '2000-01-02', 'pork' , 2, NULL), 894 | (11, '2000-01-02', 'veggie', 3, NULL), 895 | (12, '2000-01-02', 'pork' , 4, NULL), 896 | (13, '2000-01-02', 'veggie', 2, 3 ), 897 | (14, '2000-01-03', 'veggie', 1, NULL), 898 | (15, '2000-01-03', 'pork' , 6, NULL), 899 | (16, '2000-01-03', 'veggie', 1, 1 ), 900 | (17, '2000-01-04', 'pork', 1, 1 ), 901 | (18, '2000-01-05', 'veggie', 5, 1 ); 902 | GO 903 | 904 | CREATE VIEW SausageSizzleSummary AS 905 | SELECT Product, SaleDate, SUM(Quantity) AS Sales 906 | FROM SausageSizzle 907 | GROUP BY Product, SaleDate; 908 | GO -------------------------------------------------------------------------------- /other-languages.md: -------------------------------------------------------------------------------- 1 | ## Connecting from Other Languages 2 | 3 | This document aims to provide simple examples of ODBC connectivity with batch processing in a number of common statistical programming languages. 4 | 5 | A Data Source Name (DSN) is usually set up by your system administrator. If not, then there is usually a way to specify the connection settings yourself. 6 | 7 | ## R 8 | 9 | Here we establish a connection using a DSN provided by the system administrator. 10 | 11 | ```R 12 | install.packages("odbc") 13 | library(odbc) 14 | 15 | con <- odbc::dbConnect(odbc::odbc(), "insert_DSN_here") 16 | ``` 17 | 18 | If you have no DSN, then you'll need to specify the connection settings. Example: 19 | 20 | ```R 21 | install.packages("odbc") 22 | library(odbc) 23 | 24 | con <- odbc::dbConnect(odbc::odbc(), 25 | Driver = "SQL Server", 26 | Server = "write_server_address_here", 27 | Database = "write_database_name_here", 28 | UID = "write_user_name_here", 29 | PWD = "write_your_password_here") 30 | ``` 31 | 32 | Here is a more specific example of the above. Connecting to the master database from the course: 33 | 34 | ```R 35 | con <- odbc::dbConnect(odbc::odbc(), 36 | Driver = "SQL Server", 37 | Server = "nzssn.database.windows.net", 38 | Database = "master", 39 | UID = "Daniel", 40 | PWD = "insert_your_password") 41 | ``` 42 | 43 | If you are still unsuccessful, you may find more help on the RStudio website ([click here](https://db.rstudio.com/databases/microsoft-sql-server/)). 44 | 45 | Now lets send a basic query to the server and get the result. We will need to explicitly specify `PlayPen` as the database, since we are currently connected to `master` . Example: 46 | 47 | ```R 48 | myquery <- "SELECT * 49 | FROM PlayPen.Ape.Banana;" 50 | banana <- DBI::dbGetQuery(con, statement = myquery) 51 | ``` 52 | 53 | Now the result of the above query is stored in the variable named `banana` 54 | 55 | Let's take a closer look at this variable `banana`: 56 | 57 | ```R 58 | class(banana) 59 | names(banana) 60 | head(banana) 61 | ``` 62 | 63 | We can also loop though a table (e.g., a very large table) and fetch only a few rows at a time. Example: 64 | 65 | ```R 66 | myquery2 <- "SELECT * 67 | FROM PlayPen.Ape.EatingFrom;" 68 | results <- DBI::dbSendQuery(con, statement = myquery2) 69 | results 70 | ``` 71 | 72 | We have sent the query, and now it's time to fetch rows from the result. Here we fetch the first 5 rows 73 | 74 | ```R 75 | eating <- DBI::dbFetch(results, n = 5) 76 | eating 77 | ``` 78 | 79 | Now we fetch the next 5 rows and append them to the end of the `data.frame` named `eating` 80 | 81 | ```R 82 | next5 <- DBI::dbFetch(results, n = 5) 83 | eating <- rbind(eating, next5) 84 | eating 85 | ``` 86 | 87 | For info, the [RStudio guide for is useful (click here)](https://db.rstudio.com/getting-started/connect-to-database). 88 | 89 | ## STATA 90 | 91 | **Disclaimer:** *I am unable to execute this code since I don't have a copy of STATA, and I've never used STATA before. This is just my best effort at including STATA for completeness. If someone could please verify that this works (or not) and email me, that would be great.* 92 | 93 | ```SAS 94 | odbc load, exec( 95 | "SELECT TOP(100) * 96 | FROM Notes.Friends; 97 | ") 98 | dsn("ask_someone_for_the_DSN") lowercase sqlshow clear; 99 | ``` 100 | 101 | ```SAS 102 | odbc load, exec( 103 | "SELECT * 104 | FROM Notes.Friends;") 105 | dsn("ask_someone_for_the_DSN") lowercase sqlshow clear; 106 | ``` 107 | 108 | 109 | 110 | ## SAS 111 | 112 | Use [implicit or explicit SQL passthrough](https://documentation.sas.com/?cdcId=pgmsascdc&cdcVersion=9.4_3.5&docsetId=spdsug&docsetTarget=n095mdd1wof6ogn1neyglan77ghc.htm&locale=en#!) to execute SQL code on the server (rather than locally). 113 | 114 | Example: 115 | 116 | ```SAS 117 | proc sql; 118 | connect to odbc (dsn="ask_someone_for_the_DSN"); 119 | create table My_SAS_table_of_friends as 120 | select * from connection to odbc 121 | /* -------- Start of SQL code -------*/ 122 | (SELECT * 123 | FROM Notes.Friends); 124 | /* ----------------------------------*/ 125 | disconnect from odbc; 126 | quit; 127 | ``` 128 | 129 | Here's another example using tables and columns in the IDI 130 | 131 | ```SAS 132 | proc sql; 133 | connect to odbc(dsn="ask_someone_for_the_DSN"); 134 | create table work.census_address as 135 | select * from connection to odbc 136 | /* -------- Start of SQL code -------*/ 137 | (SELECT snz_uid 138 | ,snz_cen_uid 139 | ,address_type_code 140 | ,meshblock_code 141 | ,au_code 142 | ,geocode_source_code 143 | FROM cen_clean.census_address 144 | WHERE address_type_code='UR' 145 | ORDER BY meshblock_code, snz_uid); 146 | /* ----------------------------------*/ 147 | disconnect from odbc; 148 | quit; 149 | 150 | ``` 151 | 152 | For more IDI SAS examples [see the VHIN website](https://vhin.co.nz/guides/shared-code/) -------------------------------------------------------------------------------- /scripts-from-notes/README.md: -------------------------------------------------------------------------------- 1 | # Scripts from notes 2 | 3 | This folder contains some of the longer scripts from the course notes, making them easier to practice and experiment with. -------------------------------------------------------------------------------- /scripts-from-notes/bayes-vs-frequentist.sql: -------------------------------------------------------------------------------- 1 | -- This script is not intended to be run in one go. 2 | -- It is a code dump of a number of related examples from the notes. 3 | -- It is here for you to experiment with as you follow the notes. 4 | 5 | CREATE DATABASE StackExchange; 6 | GO -- only use GO in T-SQL, remove GO for MySQL 7 | 8 | USE StackExchange; 9 | GO -- only use GO in T-SQL, remove GO for MySQL 10 | 11 | CREATE TABLE Posts ( 12 | Id INT NOT NULL, 13 | CreationDate DATETIME, 14 | Score INT, 15 | ViewCount INT, 16 | Body VARCHAR(100), 17 | PRIMARY KEY (Id) 18 | ); 19 | 20 | INSERT INTO Posts 21 | VALUES 22 | (1, '2020-01-01',1,200,'dummy text'), 23 | (2, '2020-02-01',1,200,'dummy frequentist'), 24 | (3, '2020-03-01',1,200,'dummy text'), 25 | (4, '2020-03-01',1,200,'dummy bayesian'); 26 | 27 | -- ---------------------------------- 28 | -- FASTBALL TESTING / WRITING THE QUERY 29 | -- ---------------------------------- 30 | 31 | -- step 1 32 | SELECT MONTH(CreationDate) AS CreationMonth, 33 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 34 | WHEN Body LIKE '%bayesian%' THEN 'B' 35 | END AS Category 36 | FROM Posts; 37 | 38 | 39 | -- ---------------------------------- 40 | 41 | -- step 2 42 | INSERT INTO Posts VALUES 43 | (5, '2021-01-01',1,200,'dummy frequentist'); 44 | 45 | SELECT MONTH(CreationDate) AS CreationMonth, 46 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 47 | WHEN Body LIKE '%bayesian%' THEN 'B' 48 | END AS Category 49 | FROM Posts 50 | WHERE YEAR(CreationDate) <= 2020 51 | AND Body LIKE '%bayesian%' OR Body LIKE '%frequentist%'; 52 | 53 | -- ---------------------------------- 54 | 55 | -- step 3 56 | INSERT INTO Posts VALUES 57 | (6, '2020-03-01',2,200,'dummy bayesian'); 58 | 59 | SELECT * FROM Posts; 60 | 61 | SELECT AVG(Score) AvgScore, 62 | MONTH(CreationDate) AS CreationMonth, 63 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 64 | WHEN Body LIKE '%bayesian%' THEN 'B' 65 | END AS Category 66 | FROM Posts 67 | WHERE YEAR(CreationDate) <= 2020 68 | AND (Body LIKE '%bayesian%' OR Body LIKE '%frequentist%') 69 | GROUP BY MONTH(CreationDate), 70 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 71 | WHEN Body LIKE '%bayesian%' THEN 'B' 72 | END; 73 | 74 | -- ---------------------------------- 75 | 76 | -- step 4 77 | SELECT AVG(CAST(Score AS DECIMAL)) AvgScore, 78 | AVG(CAST(ViewCount AS DECIMAL)) AS AvgViews, 79 | COUNT(*) AS NumPosts, 80 | MONTH(CreationDate) AS CreationMonth, 81 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 82 | WHEN Body LIKE '%bayesian%' THEN 'B' 83 | END AS Category 84 | FROM Posts 85 | WHERE YEAR(CreationDate) <= 2020 86 | AND (Body LIKE '%bayesian%' OR Body LIKE '%frequentist%') 87 | GROUP BY MONTH(CreationDate), 88 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 89 | WHEN Body LIKE '%bayesian%' THEN 'B' 90 | END; 91 | 92 | 93 | -- ---------------------------------- 94 | -- REDUCING REPETITION 95 | -- ---------------------------------- 96 | 97 | -- cleaning, step 1 98 | WITH PostCats AS ( 99 | SELECT MONTH(CreationDate) AS CreationMonth, 100 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 101 | WHEN Body LIKE '%bayesian%' THEN 'B' 102 | END AS Category 103 | FROM Posts 104 | WHERE YEAR(CreationDate) <= 2020 105 | ) 106 | SELECT * 107 | FROM PostCats; 108 | 109 | -- ----------------------------------- 110 | 111 | -- cleaning, step 2 112 | WITH PostCats AS ( 113 | SELECT MONTH(CreationDate) AS CreationMonth, 114 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 115 | WHEN Body LIKE '%bayesian%' THEN 'B' 116 | END AS Category 117 | FROM Posts 118 | WHERE YEAR(CreationDate) <= 2020 119 | ) 120 | SELECT CreationMonth, Category 121 | FROM PostCats 122 | WHERE Category IS NOT NULL 123 | GROUP BY CreationMonth, Category; 124 | 125 | -- ----------------------------------- 126 | 127 | -- cleaning, step 3 128 | WITH PostCats AS ( 129 | SELECT MONTH(CreationDate) AS CreationMonth, 130 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 131 | WHEN Body LIKE '%bayesian%' THEN 'B' 132 | END AS Category, 133 | CAST(Score AS DECIMAL) AS Score, 134 | CAST(ViewCount AS DECIMAL) AS ViewCount 135 | FROM Posts 136 | WHERE YEAR(CreationDate) <= 2020 137 | ) 138 | SELECT CreationMonth, Category, 139 | AVG(Score) AS AvgScore, 140 | AVG(ViewCount) AS AvgViews, 141 | COUNT(*) AS NumPosts 142 | FROM PostCats 143 | WHERE Category IS NOT NULL 144 | GROUP BY CreationMonth, Category; 145 | 146 | -- here is a link to the query, hosted on Stack Exchange: 147 | -- https://data.stackexchange.com/stats/query/1509266/score-and-views-for-bayesian-vs-frequentist 148 | 149 | 150 | -- ---------------------------------- 151 | -- VALIDITY TESTING 152 | -- ---------------------------------- 153 | 154 | -- example 1 155 | 156 | WITH Posts (Id, CreationDate, Score, ViewCount, Body) AS ( 157 | SELECT 1,'2019-01-01',1,200,'dummy frequentist' 158 | UNION ALL 159 | SELECT 2,'2020-01-01',1,200,'dummy frequentist' 160 | UNION ALL 161 | SELECT 3,'2021-01-01',1,200,'dummy frequentist' 162 | UNION ALL 163 | SELECT 4,NULL,1,200,'dummy frequentist' 164 | ), 165 | PostCats AS ( 166 | SELECT MONTH(CreationDate) AS CreationMonth, 167 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 168 | WHEN Body LIKE '%bayesian%' THEN 'B' 169 | END AS Category, 170 | CAST(Score AS DECIMAL) AS Score, 171 | CAST(ViewCount AS DECIMAL) AS ViewCount 172 | FROM Posts 173 | WHERE YEAR(CreationDate) <= 2020 174 | ) 175 | SELECT CreationMonth, Category, 176 | AVG(Score) AS AvgScore, 177 | AVG(ViewCount) AS AvgViews, 178 | COUNT(*) AS NumPosts 179 | FROM PostCats 180 | WHERE Category IS NOT NULL 181 | GROUP BY CreationMonth, Category; 182 | 183 | -- example 2 184 | 185 | WITH Posts (Id, CreationDate, Score, ViewCount, Body) AS ( 186 | SELECT 1,'2020-01-01',1,200,'dummy FREQUENTIST' 187 | UNION ALL 188 | SELECT 2,'2020-01-01',1,200,'dummy FREQUENTIST dummy' 189 | UNION ALL 190 | SELECT 3,'2020-01-01',1,200,'FREQUENTIST dummy' 191 | UNION ALL 192 | SELECT 4,'2020-01-01',1,200,NULL 193 | ), 194 | PostCats AS ( 195 | SELECT MONTH(CreationDate) AS CreationMonth, 196 | CASE WHEN Body LIKE '%frequentist%' THEN 'F' 197 | WHEN Body LIKE '%bayesian%' THEN 'B' 198 | END AS Category, 199 | CAST(Score AS DECIMAL) AS Score, 200 | CAST(ViewCount AS DECIMAL) AS ViewCount 201 | FROM Posts 202 | WHERE YEAR(CreationDate) <= 2020 203 | ) 204 | SELECT CreationMonth, Category, 205 | AVG(Score) AS AvgScore, 206 | AVG(ViewCount) AS AvgViews, 207 | COUNT(*) AS NumPosts 208 | FROM PostCats 209 | WHERE Category IS NOT NULL 210 | GROUP BY CreationMonth, Category; 211 | 212 | -- delete the database 213 | DROP DATABASE StackExchange; 214 | GO -------------------------------------------------------------------------------- /scripts-from-notes/manipulate-tables.sql: -------------------------------------------------------------------------------- 1 | -- This script is not intended to be run in one go. 2 | -- It is a code dump of a number of related examples from the notes. 3 | -- It is here for you to experiment with as you follow the notes. 4 | 5 | CREATE DATABASE MyExperiments; 6 | GO -- only use GO in T-SQL, remove GO for MySQL 7 | 8 | USE MyExperiments; 9 | GO -- only use GO in T-SQL, remove GO for MySQL 10 | 11 | -- create the Friends table 12 | CREATE TABLE Friends ( 13 | FriendID INT not null, 14 | FirstName VARCHAR(20), 15 | LastName VARCHAR(20), 16 | FavColour VARCHAR(20), 17 | PRIMARY KEY (FriendID) 18 | ); 19 | 20 | -- create the Pets table 21 | CREATE TABLE Pets ( 22 | PetID INT not null, 23 | PetName VARCHAR(20), 24 | PetDOB DATE, 25 | FriendID INT not null, 26 | FOREIGN KEY (FriendID) REFERENCES Friends (FriendID), 27 | PRIMARY KEY (PetID) 28 | ); 29 | 30 | ALTER TABLE Friends 31 | ADD StartDate DATE, 32 | ADD StartTime TIME; 33 | 34 | ALTER TABLE Friends 35 | DROP COLUMN StartDate; 36 | 37 | -- produces an error (foreign key) 38 | ALTER TABLE Pets 39 | DROP COLUMN FriendID; 40 | 41 | -- circumvent the error (delete foreign key) 42 | ALTER TABLE table_name 43 | DROP CONSTRAINT pets_ibfk_1; 44 | 45 | -- change a data type in MySQL (uses 'modify column') 46 | ALTER TABLE Friends 47 | MODIFY COLUMN FirstName VARCHAR(40); 48 | 49 | -- change a data type in T-SQL (uses 'alter column') 50 | ALTER TABLE Friends 51 | ALTER COLUMN FirstName VARCHAR(40); 52 | 53 | -- using the MySQL syntax 54 | ALTER TABLE Friends 55 | MODIFY COLUMN FavColour VARCHAR(3); 56 | 57 | -- produces an error (foreign key) 58 | DROP TABLE Friends; 59 | 60 | -- remove the foreign key from Pets 61 | ALTER TABLE Pets 62 | DROP CONSTRAINT pets_ibfk_1; 63 | 64 | -- delete the Friends table 65 | DROP TABLE Friends; 66 | 67 | -- ----------------------------------------------- 68 | 69 | -- create the Friends table 70 | CREATE TABLE Friends ( 71 | FriendID INT not null, 72 | FirstName VARCHAR(20), 73 | LastName VARCHAR(20), 74 | FavColour VARCHAR(20), 75 | PRIMARY KEY (FriendID) 76 | ); 77 | 78 | INSERT INTO Friends 79 | VALUES 80 | (1, 'X', 'A', 'red'), 81 | (2, 'Y', 'B', 'blue'), 82 | (3, 'Z', 'C', NULL), 83 | (4, 'Kimmy', 'Jenkins', 'yellow'), 84 | (5, 'Jimmy', 'Jenkins', NULL); 85 | 86 | 87 | INSERT INTO Friends 88 | (FriendID, FirstName) 89 | VALUES 90 | (6, 'Niko'), 91 | (7, 'Sage'); 92 | 93 | DROP TABLE Friends; 94 | 95 | -- ----------------------------------------------- 96 | 97 | -- first create the structure 98 | CREATE TABLE Friends ( 99 | FriendID INT not null, 100 | FirstName VARCHAR(20), 101 | LastName VARCHAR(20), 102 | FavColour VARCHAR(20), 103 | PRIMARY KEY (FriendID) 104 | ); 105 | 106 | -- then insert our 3 friends 107 | INSERT INTO Friends 108 | VALUES 109 | (1,'X','A','red'), 110 | (2,'Y','B','blue'), 111 | (3,'Z','C', NULL); 112 | 113 | -- now alter the table to add StartDate and StartTime 114 | ALTER TABLE Friends 115 | ADD StartDate DATE, 116 | ADD StartTime TIME; 117 | 118 | UPDATE Friends 119 | SET StartDate = '1999-12-30', StartTime = '16:30:00'; 120 | 121 | UPDATE Friends 122 | SET StartDate = '2000-01-03', StartTime = '08:00:00' 123 | WHERE FriendID = 1; 124 | 125 | DELETE FROM Friends 126 | WHERE FriendID = 999; 127 | 128 | -- delete the database 129 | DROP DATABASE MyExperiments; 130 | GO 131 | 132 | -- ------------------------------------------------ 133 | 134 | CREATE TABLE Membership ( 135 | memberID INT not null, 136 | memberName VARCHAR(100), 137 | phone VARCHAR(20), 138 | joinDate DATE, 139 | PRIMARY KEY (memberID) 140 | ); 141 | 142 | INSERT INTO Membership 143 | VALUES (12231, 'Denali Dune', '+61 03 97229917', '2021-06-21'); 144 | 145 | DELETE FROM Membership 146 | WHERE memberId = 12231; 147 | 148 | INSERT INTO Membership 149 | VALUES 150 | (12688, 'Reilly Bierman', '+61 03 9269 1200', '2021-05-01'), 151 | (12233, 'Shiloh Henry', '+61 03 9479 6000', '2021-05-13'), 152 | (12565, 'Tristan Gaumond', '+61 03 9905 4000', '2021-05-04'), 153 | (12223, 'Rene Brassard', '+61 03 9903 2000', '2021-06-30'), 154 | (12668, 'Tanner Hubert', '+61 03 9035 5511', '2021-07-29'); 155 | 156 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime 157 | FROM Membership; 158 | 159 | -- MySQL only: table creation and initial insert 160 | CREATE TABLE MemberCountHistory 161 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime 162 | FROM Membership; 163 | 164 | -- MySQL only: subsequent inserts (execute once per month) 165 | INSERT INTO MemberCountHistory 166 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime 167 | FROM Membership; 168 | 169 | -- T-SQL only: initial table creation (and execute once per month) 170 | SELECT COUNT(*) AS MemberCount, SYSDATE() AS ExecutionDateTime 171 | INTO MemberCountHistory 172 | FROM Membership; 173 | 174 | CREATE VIEW FriendsPets AS 175 | SELECT F.FirstName, P.PetName 176 | FROM Friends F JOIN Pets P ON F.FriendID = P.FriendID; 177 | 178 | SELECT * 179 | FROM FriendsPets; -------------------------------------------------------------------------------- /textbook-and-slides/README.md: -------------------------------------------------------------------------------- 1 | For the interactive version of the day 4 slides, hosted on RPubs, [click here](https://rpubs.com/frycast/sql-with-r). -------------------------------------------------------------------------------- /textbook-and-slides/SQL_Course_Slides_Day_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_1.pdf -------------------------------------------------------------------------------- /textbook-and-slides/SQL_Course_Slides_Day_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_2.pdf -------------------------------------------------------------------------------- /textbook-and-slides/SQL_Course_Slides_Day_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_3.pdf -------------------------------------------------------------------------------- /textbook-and-slides/SQL_Course_Slides_Day_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Slides_Day_4.pdf -------------------------------------------------------------------------------- /textbook-and-slides/SQL_Course_Textbook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frycast/SQL_course/13d5cf01fdd82b7cf8de3bb29846b5d595d07ab4/textbook-and-slides/SQL_Course_Textbook.pdf --------------------------------------------------------------------------------