├── .babelrc ├── .editorconfig ├── .env.example ├── .eslintrc.js ├── .gitignore ├── .htaccess ├── .nvmrc ├── .travis.yml ├── LICENSE-MIT ├── README.md ├── __mocks__ ├── .eslintrc.js ├── bunyan.js ├── fixtures │ ├── examData1.json │ ├── examData2.json │ ├── test1.pdf │ └── test2.pdf └── fs-extra.js ├── config.js ├── gulp-tasks ├── index.js ├── local │ ├── consolidateForSem.js │ ├── consolidateForYear.js │ ├── genReqTree │ │ ├── constants.js │ │ ├── index.js │ │ ├── normalizeString.js │ │ ├── normalizeString.test.js │ │ ├── parse+normalize.test.js │ │ ├── parseString.js │ │ └── parseString.test.js │ ├── mergeCorsBiddingStats.js │ ├── splitForSem.js │ └── splitForYear.js ├── remote │ ├── __snapshots__ │ │ └── examTimetable.test.js.snap │ ├── bulletinModules.js │ ├── cors.js │ ├── corsBiddingStats.js │ ├── examTimetable.js │ ├── examTimetable.test.js │ ├── ivle.js │ ├── moduleTimetableDelta.js │ └── venues.js └── utils │ ├── __snapshots__ │ └── timify.test.js.snap │ ├── gotCached.js │ ├── iterateSems.js │ ├── mergeModuleFields.js │ ├── mergeModuleFields.test.js │ ├── pdf.js │ ├── pdf.test.js │ ├── sortByKey.js │ ├── sortByKey.test.js │ ├── timify.js │ ├── timify.test.js │ └── titleize.js ├── gulpfile.babel.js ├── jest.config.js ├── jsonp.php ├── knexfile.js ├── migrations └── 20170101000000_initial.js ├── package.json ├── seeds └── schools.js ├── src ├── db.js ├── graphql │ ├── __snapshots__ │ │ └── index.test.js.snap │ ├── index.js │ ├── index.test.js │ └── jsonData.js ├── index.js ├── middleware │ ├── error.js │ └── request.js ├── scrapers │ ├── BaseTask.js │ ├── BaseTask.test.js │ ├── HttpService.js │ ├── HttpService.test.js │ ├── VenuesScraper.js │ └── VenuesScraper.test.js └── util │ ├── log.js │ ├── mapKeysDeep.js │ ├── mapKeysDeep.test.js │ ├── walkDir.js │ └── walkDir.test.js ├── webpack.config.babel.js └── yarn.lock /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | [ 4 | "env", 5 | { 6 | "targets": { "node": "current" }, 7 | "modules": false, 8 | "loose": true, 9 | "useBuiltIns": true 10 | } 11 | ], 12 | "flow", 13 | "bluebird" 14 | ], 15 | "env": { 16 | "development": { 17 | "plugins": ["transform-es2015-modules-commonjs"] 18 | }, 19 | "test": { 20 | "plugins": ["transform-es2015-modules-commonjs"] 21 | } 22 | }, 23 | "plugins": [["transform-object-rest-spread", { "useBuiltIns": true }]] 24 | } 25 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 2 8 | 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | NODE_ENV=development 2 | IVLE_API_KEY= 3 | IVLE_API_TOKEN= 4 | DB_DEBUG=false 5 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parser: 'babel-eslint', 3 | root: true, 4 | extends: [ 5 | 'airbnb-base', 6 | ], 7 | env: { 8 | node: true, 9 | }, 10 | plugins: [ 11 | 'import', 12 | ], 13 | settings: { 14 | 'import/resolver': 'node', 15 | }, 16 | overrides: [ 17 | { 18 | files: '**/*.test.{js,jsx}', 19 | env: { 20 | jest: true, 21 | }, 22 | rules: { 23 | // Much more lenient linting for tests 24 | 'max-len': ['error', 120, { 25 | ignoreComments: true, 26 | ignoreStrings: true, 27 | ignoreTemplateLiterals: true, 28 | }], 29 | }, 30 | }, 31 | ], 32 | rules: { 33 | // Consistent arrow parens 34 | 'arrow-parens': ['error', 'as-needed', { requireForBlockBody: true }], 35 | 'import/extensions': [ 36 | 'error', 37 | 'always', 38 | { 39 | js: 'never', 40 | }, 41 | ], 42 | 'max-len': ['error', 100, { ignoreComments: true }], 43 | // Let git handle the linebreaks instead 44 | 'linebreak-style': 'off', 45 | 'no-shadow': ['error', { allow: ['i'] }], 46 | }, 47 | }; 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | app 2 | cache 3 | node_modules 4 | coverage 5 | .env 6 | *.compiled.js 7 | *.sqlite3 8 | -------------------------------------------------------------------------------- /.htaccess: -------------------------------------------------------------------------------- 1 | Header set Access-Control-Allow-Origin "*" 2 | 3 | RewriteEngine On 4 | RewriteCond %{REQUEST_FILENAME} -f 5 | RewriteCond %{QUERY_STRING} callback= 6 | RewriteRule .+\.json$ jsonp.php [L] 7 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | 8 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | dist: trusty 3 | sudo: false 4 | addons: 5 | apt: 6 | sources: 7 | - travis-ci/sqlite3 8 | packages: 9 | - sqlite3 10 | cache: 11 | yarn: true 12 | directories: 13 | - node_modules 14 | script: npm test 15 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Eu Beng Hee 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NUSMods API [![Code Climate](http://img.shields.io/codeclimate/github/nusmodifications/nusmods-api.svg)](https://codeclimate.com/github/nusmodifications/nusmods-api) [![Dependency Status](http://img.shields.io/david/nusmodifications/nusmods-api.svg)](https://david-dm.org/nusmodifications/nusmods-api) 2 | 3 | NUSMods API consolidates and normalizes various bits of NUS module information 4 | from the following sources: 5 | 6 | - [CORS historical bidding statistics](http://www.nus.edu.sg/cors/archive.html) 7 | - [CORS module information listings](http://www.nus.edu.sg/cors/schedule.html#corsmodinfo) 8 | - [IVLE API](http://wiki.nus.edu.sg/display/ivlelapi/Home) 9 | - [NUS Bulletin](http://www.nus.edu.sg/registrar/nusbulletin/modulesearch.html) 10 | - [NUS Examination Time-Table](http://www.nus.edu.sg/registrar/event/examschedule-sem1.html) 11 | - [NUS Living Lab API](http://nuslivinglab.nus.edu.sg/) 12 | 13 | It is a purely JSON API, with CORS and JSONP support, and is statically 14 | generated - since the module information changes a few times a day at most, this 15 | allows the API to be as fast as possible, and be completely browsable at 16 | http://api.nusmods.com. 17 | 18 | The raw and intermediate processed JSON are also available, in addition to the 19 | fully processed and normalized JSON. JSON property names are UpperCamelCase and 20 | match the equivalent IVLE API property names where possible to adhere to the 21 | principle of least astonishment. 22 | 23 | To be respectful to NUS servers, and reduce waiting while developing, the 24 | crawlers use simple local filesystem caching and HTTP `if-modified-since` 25 | headers. They are modular, being written as separate gulp tasks, so if only a 26 | subset of information is required, it can be specified down to individual 27 | semesters. 28 | 29 | ## Contributing 30 | 31 | The API is still in its infancy, and there are plenty more endpoints that could 32 | be integrated, and other formats that might be useful, like CSV. Suggestions are 33 | very welcome, and if you have any particular needs for your app, feel free to 34 | open an issue or pull request, or simply contact me directly. I'd be happy to 35 | deploy any additional tasks to the live API site as well. 36 | 37 | ## Cross-Origin Resource Sharing (CORS) Support 38 | 39 | Cross-Origin Resource Sharing (CORS) is enabled, if supporting legacy 40 | browsers is not required. 41 | 42 | ### jQuery Example 43 | 44 | ```js 45 | $.getJSON('http://api.nusmods.com/2015-2016/1/moduleList.json', function (data) { 46 | console.log(data); 47 | }); 48 | ``` 49 | 50 | ## JSONP Support 51 | 52 | If supporting legacy browsers is required, JSONP can be used instead. 53 | 54 | ### jQuery Example 55 | 56 | ```js 57 | $.getJSON('http://api.nusmods.com/2015-2016/1/moduleList.json?callback=?', function (data) { 58 | console.log(data); 59 | }); 60 | ``` 61 | 62 | If the URL includes the string "callback=?", the request is treated as JSONP. 63 | The server will prepend the JSON data with the callback name to form a valid 64 | JSONP response. 65 | 66 | ## API Examples 67 | 68 | ### GET /<AcadYear>/<Semester>/moduleCodes.json 69 | 70 | http://api.nusmods.com/2015-2016/1/moduleCodes.json 71 | 72 | ```js 73 | [ 74 | "ACC1002", 75 | "ACC1002X", 76 | "ACC2002", 77 | ... 78 | ] 79 | ``` 80 | 81 | ### GET /<AcadYear>/<Semester>/moduleList.json 82 | 83 | http://api.nusmods.com/2015-2016/1/moduleList.json 84 | 85 | ```js 86 | { 87 | "ACC1002": "Financial Accounting", 88 | "ACC1002X": "Financial Accounting", 89 | "ACC2002": "Managerial Accounting", 90 | ... 91 | } 92 | ``` 93 | 94 | ### GET /<AcadYear>/<Semester>/facultyDepartments.json 95 | 96 | http://api.nusmods.com/2015-2016/1/facultyDepartments.json 97 | 98 | ```js 99 | { 100 | "ARTS & SOCIAL SCIENCES": [ 101 | "CENTRE FOR LANGUAGE STUDIES", 102 | "CHINESE STUDIES", 103 | "COMMUNICATIONS AND NEW MEDIA", 104 | ... 105 | ], 106 | "DENTISTRY": [ 107 | "DENTISTRY", 108 | "DIVISION OF GRADUATE DENTAL STUDIES" 109 | ], 110 | "DUKE-NUS GRADUATE MEDICAL SCHOOL S'PORE": [ 111 | "DUKE-NUS GRADUATE MEDICAL SCHOOL S'PORE" 112 | ], 113 | ... 114 | } 115 | ``` 116 | 117 | ### GET /lessonTypes.json 118 | 119 | http://api.nusmods.com/lessonTypes.json 120 | 121 | ```js 122 | { 123 | "DESIGN LECTURE": "Tutorial", 124 | "LABORATORY": "Tutorial", 125 | "LECTURE": "Lecture", 126 | "PACKAGED LECTURE": "Lecture", 127 | "PACKAGED TUTORIAL": "Lecture", 128 | "RECITATION": "Tutorial", 129 | "SECTIONAL TEACHING": "Lecture", 130 | "SEMINAR-STYLE MODULE CLASS": "Lecture", 131 | "TUTORIAL": "Tutorial", 132 | "TUTORIAL TYPE 2": "Tutorial", 133 | "TUTORIAL TYPE 3": "Tutorial" 134 | } 135 | ``` 136 | 137 | ### GET /<AcadYear>/<Semester>/modules/<ModuleCode>.json 138 | 139 | `ExamDate` and `ExamDuration` are in ISO 8601 formats. 140 | 141 | http://api.nusmods.com/2014-2015/2/modules/FE5218.json 142 | 143 | ```js 144 | { 145 | "ModuleCode": "FE5218", 146 | "ModuleTitle": "Credit Risk", 147 | "Department": "RISK MANAGEMENT INSTITUTE", 148 | "ModuleDescription": "The course consists of two parts – (i) statistical credit rating models and (ii) credit derivatives. The first part would cover various statistical credit rating models including Altman’s Z-score, logistic regression, artificial neural network and intensity models. The second part will cover various models used to price credit derivative as well as tools used to manage credit risk. The topics covered would include real and risk neutral probability of default, RiskMetricsTM, CreditRisk+, default correlation, Copula, Basket default swap, CDOs etc.", 149 | "ModuleCredit": "4", 150 | "Workload": "3-0-0-0-7", 151 | "Prerequisite": "FE5101: Derivatives and Fixed Income", 152 | "Corequisite": "FE 5102: Quantitative Methods and Programming", 153 | "ExamDate": "2013-05-03T19:00+0800", 154 | "ExamOpenBook": true, 155 | "ExamDuration": "P2H30M", 156 | "ExamVenue": "LT31", 157 | "Timetable": [ 158 | { 159 | "ClassNo": "SL1", 160 | "LessonType": "LECTURE", 161 | "WeekText": "EVERY WEEK", 162 | "DayText": "WEDNESDAY", 163 | "StartTime": "1900", 164 | "EndTime": "2200", 165 | "Venue": "RMI-SR1" 166 | } 167 | ] 168 | } 169 | ``` 170 | 171 | ### GET /<AcadYear>/<Semester>/modules.json 172 | 173 | http://api.nusmods.com/2015-2016/1/modules.json 174 | 175 | ```js 176 | [ 177 | { 178 | "ModuleCode": "ACC1002", 179 | "ModuleTitle": "Financial Accounting", 180 | "Department": "ACCOUNTING", 181 | "ModuleDescription": "The course provides an introduction to financial accounting. It examines accounting from an external user's perspective: an external user being an investor or a creditor. Such users would need to understand financial accounting in order to make investing or lending decisions. However, to attain a good understanding, it is also necessary to be familiar with how the information is derived. Therefore, students would learn how to prepare the reports or statements resulting from financial accounting and how to use them for decision-making.", 182 | "ModuleCredit": "4", 183 | "Workload": "2-2-0-3-4", 184 | "Preclusion": "Students who have passed FNA1002 are not allowed to take ACC1002.", 185 | "ExamDate": "2015-11-25T13:00+0800", 186 | "Types": [ 187 | "Module" 188 | ], 189 | "Timetable": [ 190 | { 191 | "ClassNo": "V1", 192 | "LessonType": "LECTURE", 193 | "WeekText": "EVERY WEEK", 194 | "DayText": "WEDNESDAY", 195 | "StartTime": "1000", 196 | "EndTime": "1200", 197 | "Venue": "LT16" 198 | }, 199 | ... 200 | ] 201 | }, 202 | { 203 | "ModuleCode": "ACC1002X", 204 | "ModuleTitle": "Financial Accounting", 205 | "Department": "ACCOUNTING", 206 | "ModuleDescription": "The course provides an introduction to financial accounting. It examines accounting from an external user's perspective: an external user being an investor or a creditor. Such users would need to understand financial accounting in order to make investing or lending decisions. However, to attain a good understanding, it is also necessary to be familiar with how the information are derived. Therefore, students would learn how to prepare the reports or statements resulting from financial accounting and how to use them for decision-making.", 207 | "ModuleCredit": "4", 208 | "Preclusion": "Students who have passed CS1304 or EC3212 or BK1003 or BZ1002 or BH1002 or BZ1002E or BH1002E or FNA1002E or FNA1002X are not allowed to take ACC1002X.", 209 | "ExamDate": "2015-11-25T13:00+0800", 210 | "Types": [ 211 | "Module", 212 | "UEM" 213 | ], 214 | "Timetable": [ 215 | ... 216 | ] 217 | }, 218 | ... 219 | ] 220 | ``` 221 | 222 | ## Initial Setup 223 | 224 | Copy `.env.example` to a file named `.env`. 225 | 226 | Get an API key from [IVLE](http://ivle.nus.edu.sg/LAPI/default.aspx) and put it in `.env` under `IVLE_API_KEY`. 227 | 228 | Download and install [Node.js](http://nodejs.org), [npm](http://npmjs.org), [yarn](https://yarnpkg.com/en/docs/install) and [sqlite3](https://www.sqlite.org/download.html). 229 | 230 | 231 | Then run the following commands: 232 | 233 | ```bash 234 | $ yarn # install node dependencies 235 | $ npx knex migrate:latest # set up db tables 236 | $ npx knex seed:run # set up basic information 237 | ``` 238 | 239 | ## Updating Module Information 240 | 241 | The default gulp task is set to scrape the semester data in the upcoming month. The following commands are valid: 242 | 243 | ```bash 244 | yarn build:scraper && yarn scrape # production use 245 | yarn scrape:dev # development use 246 | ``` 247 | 248 | Invoking sub-tasks would involve calling the task by changing the commands in `package.json`, or through installing `gulp-cli` globally. For example, to run the `examTimetable` task specifically: 249 | 250 | ```bash 251 | $ gulp examTimetable 252 | ``` 253 | For a list of all tasks available run 254 | 255 | ```bash 256 | $ gulp --tasks 257 | ``` 258 | 259 | ## Task Configuration and Targets 260 | 261 | Many of the tasks have multiple targets, and can have more defined if necessary. In order to configure file-paths and runtime settings, take a look at `config.js`. 262 | 263 | If you want to parse a specific year or semester, take a look at `gulpfile.babel.js`. Each task will look something like below: 264 | 265 | ```js 266 | gulp.task('bulletinModules', () => { 267 | const subtasks = iterateSems({ 268 | from: 2017, // change this to year you want to start from 269 | to: 2018, // year to end parsing 270 | semesters: [1, 2, 3, 4], // sem 1, 2 and the 2 special semesters 271 | config: config.bulletinModules, // configuration as found in config.js 272 | }); 273 | 274 | const bulletinModules = R.map(tasks.bulletinModules, subtasks); 275 | return Promise.all(bulletinModules); 276 | }); 277 | ``` 278 | 279 | ## License 280 | 281 | Copyright (c) 2017 NUSModifications. Licensed under the MIT license. 282 | -------------------------------------------------------------------------------- /__mocks__/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | env: { 3 | jest: true, 4 | }, 5 | rules: { 6 | // eslint thinks testing tools are dependencies 7 | 'import/no-extraneous-dependencies': 'off', 8 | }, 9 | }; 10 | -------------------------------------------------------------------------------- /__mocks__/bunyan.js: -------------------------------------------------------------------------------- 1 | const bunyan = require('bunyan'); 2 | 3 | const mockedBunyan = jest.genMockFromModule('bunyan'); 4 | 5 | // Only log out fatal logs to prevent testing from 6 | // outputing logs 7 | mockedBunyan.createLogger = ({ level, ...otherConfig }) => bunyan.createLogger({ 8 | ...otherConfig, 9 | level: bunyan.FATAL, 10 | }); 11 | 12 | module.exports = mockedBunyan; 13 | -------------------------------------------------------------------------------- /__mocks__/fixtures/examData1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Date": "29/9/2016", 4 | "Time": "4:30PM", 5 | "Faculty": "Yale‐NUS College", 6 | "ModuleCode": "YSC1206", 7 | "Title": "CONCEPTUAL CALCULUS" 8 | }, 9 | { 10 | "Date": "16/11/2016", 11 | "Time": "9:00AM", 12 | "Faculty": "Yale‐NUS College", 13 | "ModuleCode": "YLG2202", 14 | "Title": "INTERMEDIATE CLASSICAL GREEK" 15 | }, 16 | { 17 | "Date": "18/11/2016", 18 | "Time": "9:00AM", 19 | "Faculty": "Ctr for Engl Lang Comms", 20 | "ModuleCode": "ENV1202", 21 | "Title": "COMMUNICATIONS FOR ENVIRONMENTAL STUDIES" 22 | }, 23 | { 24 | "Date": "18/11/2016", 25 | "Time": "9:00AM", 26 | "Faculty": "Ctr for Engl Lang Comms", 27 | "ModuleCode": "ES1102", 28 | "Title": "ENGLISH FOR ACADEMIC PURPOSES" 29 | }, 30 | { 31 | "Date": "18/11/2016", 32 | "Time": "9:00AM", 33 | "Faculty": "Ctr for Engl Lang Comms", 34 | "ModuleCode": "ES1103", 35 | "Title": "ENGLISH FOR ACADEMIC PURPOSES" 36 | }, 37 | { 38 | "Date": "18/11/2016", 39 | "Time": "9:00AM", 40 | "Faculty": "Ctr for Engl Lang Comms", 41 | "ModuleCode": "ES5000", 42 | "Title": "GRADUATE ENGLISH COURSE (BASIC LEVEL)" 43 | }, 44 | { 45 | "Date": "18/11/2016", 46 | "Time": "9:00AM", 47 | "Faculty": "Ctr for Engl Lang Comms", 48 | "ModuleCode": "ES5001A", 49 | "Title": "GRADUATE ENGLISH COURSE (INTERMEDIATE LEVEL)" 50 | }, 51 | { 52 | "Date": "18/11/2016", 53 | "Time": "9:00AM", 54 | "Faculty": "Ctr for Engl Lang Comms", 55 | "ModuleCode": "ES1531", 56 | "Title": "CRITICAL THINKING AND WRITING" 57 | }, 58 | { 59 | "Date": "18/11/2016", 60 | "Time": "9:00AM", 61 | "Faculty": "Ctr for Engl Lang Comms", 62 | "ModuleCode": "GEK1549", 63 | "Title": "CRITICAL THINKING AND WRITING" 64 | }, 65 | { 66 | "Date": "18/11/2016", 67 | "Time": "9:00AM", 68 | "Faculty": "Ctr for Engl Lang Comms", 69 | "ModuleCode": "GET1021", 70 | "Title": "CRITICAL THINKING AND WRITING" 71 | }, 72 | { 73 | "Date": "19/11/2016", 74 | "Time": "9:00AM", 75 | "Faculty": "NUS Business School", 76 | "ModuleCode": "ACC2002", 77 | "Title": "MANAGERIAL ACCOUNTING" 78 | }, 79 | { 80 | "Date": "19/11/2016", 81 | "Time": "9:00AM", 82 | "Faculty": "Faculty of Engineering", 83 | "ModuleCode": "BN3201", 84 | "Title": "INTRODUCTION TO BIOMECHANICS" 85 | }, 86 | { 87 | "Date": "19/11/2016", 88 | "Time": "9:00AM", 89 | "Faculty": "Faculty of Science", 90 | "ModuleCode": "CM4242", 91 | "Title": "ADVANCED ANALYTICAL TECHNIQUES" 92 | }, 93 | { 94 | "Date": "19/11/2016", 95 | "Time": "9:00AM", 96 | "Faculty": "School of Computing", 97 | "ModuleCode": "CS2104", 98 | "Title": "PROGRAMMING LANGUAGE CONCEPTS" 99 | }, 100 | { 101 | "Date": "19/11/2016", 102 | "Time": "9:00AM", 103 | "Faculty": "School of Computing", 104 | "ModuleCode": "CS3244", 105 | "Title": "MACHINE LEARNING" 106 | }, 107 | { 108 | "Date": "19/11/2016", 109 | "Time": "9:00AM", 110 | "Faculty": "School of Computing", 111 | "ModuleCode": "CS4224", 112 | "Title": "DISTRIBUTED DATABASES" 113 | }, 114 | { 115 | "Date": "19/11/2016", 116 | "Time": "9:00AM", 117 | "Faculty": "Arts & Social Sciences", 118 | "ModuleCode": "EC2104", 119 | "Title": "QUANTITATIVE METHODS FOR ECONOMIC ANALYSIS" 120 | }, 121 | { 122 | "Date": "19/11/2016", 123 | "Time": "9:00AM", 124 | "Faculty": "Arts & Social Sciences", 125 | "ModuleCode": "EC3373", 126 | "Title": "ASEAN ECONOMIES" 127 | }, 128 | { 129 | "Date": "19/11/2016", 130 | "Time": "9:00AM", 131 | "Faculty": "Arts & Social Sciences", 132 | "ModuleCode": "EC4306", 133 | "Title": "APPLIED MICROECONOMIC ANALYSIS" 134 | }, 135 | { 136 | "Date": "19/11/2016", 137 | "Time": "9:00AM", 138 | "Faculty": "Arts & Social Sciences", 139 | "ModuleCode": "ECA5325", 140 | "Title": "PROJECT & POLICY EVALUATION" 141 | }, 142 | { 143 | "Date": "19/11/2016", 144 | "Time": "9:00AM", 145 | "Faculty": "Faculty of Engineering", 146 | "ModuleCode": "EE2011", 147 | "Title": "ENGINEERING ELECTROMAGNETICS" 148 | }, 149 | { 150 | "Date": "19/11/2016", 151 | "Time": "9:00AM", 152 | "Faculty": "Faculty of Engineering", 153 | "ModuleCode": "EE4218", 154 | "Title": "EMBEDDED HARDWARE SYSTEM DESIGN" 155 | }, 156 | { 157 | "Date": "19/11/2016", 158 | "Time": "9:00AM", 159 | "Faculty": "Arts & Social Sciences", 160 | "ModuleCode": "EL3203", 161 | "Title": "SEMANTICS AND PRAGMATICS" 162 | }, 163 | { 164 | "Date": "19/11/2016", 165 | "Time": "9:00AM", 166 | "Faculty": "Risk Management Institute", 167 | "ModuleCode": "FE5101", 168 | "Title": "DERIVATIVES AND FIXED INCOME" 169 | }, 170 | { 171 | "Date": "19/11/2016", 172 | "Time": "9:00AM", 173 | "Faculty": "Faculty of Engineering", 174 | "ModuleCode": "IE5301", 175 | "Title": "HUMAN FACTORS IN ENGINEERING AND DESIGN" 176 | }, 177 | { 178 | "Date": "19/11/2016", 179 | "Time": "9:00AM", 180 | "Faculty": "Faculty of Science", 181 | "ModuleCode": "LSM2102", 182 | "Title": "MOLECULAR BIOLOGY" 183 | }, 184 | { 185 | "Date": "19/11/2016", 186 | "Time": "9:00AM", 187 | "Faculty": "Faculty of Science", 188 | "ModuleCode": "MA5205", 189 | "Title": "GRADUATE ANALYSIS I" 190 | }, 191 | { 192 | "Date": "19/11/2016", 193 | "Time": "9:00AM", 194 | "Faculty": "Faculty of Engineering", 195 | "ModuleCode": "ME5304", 196 | "Title": "EXPERIMENTAL FLUID MECHANICS" 197 | }, 198 | { 199 | "Date": "19/11/2016", 200 | "Time": "9:00AM", 201 | "Faculty": "Arts & Social Sciences", 202 | "ModuleCode": "NM2220", 203 | "Title": "INTRODUCTION TO MEDIA WRITING" 204 | }, 205 | { 206 | "Date": "19/11/2016", 207 | "Time": "9:00AM", 208 | "Faculty": "Arts & Social Sciences", 209 | "ModuleCode": "NM3224", 210 | "Title": "CULTURE INDUSTRIES" 211 | }, 212 | { 213 | "Date": "19/11/2016", 214 | "Time": "9:00AM", 215 | "Faculty": "Faculty of Engineering", 216 | "ModuleCode": "OT5102", 217 | "Title": "OIL & GAS TECHNOLOGY" 218 | }, 219 | { 220 | "Date": "19/11/2016", 221 | "Time": "9:00AM", 222 | "Faculty": "Faculty of Science", 223 | "ModuleCode": "PR5301", 224 | "Title": "FOOD AND DRUG LAWS" 225 | }, 226 | { 227 | "Date": "19/11/2016", 228 | "Time": "9:00AM", 229 | "Faculty": "Arts & Social Sciences", 230 | "ModuleCode": "SC3229", 231 | "Title": "COMPARING DEVIANCE: PERVERTS & SCANDALOUS IMPROPRIE" 232 | }, 233 | { 234 | "Date": "19/11/2016", 235 | "Time": "9:00AM", 236 | "Faculty": "Faculty of Science", 237 | "ModuleCode": "ST3242", 238 | "Title": "INTRODUCTION TO SURVIVAL ANALYSIS" 239 | }, 240 | { 241 | "Date": "19/11/2016", 242 | "Time": "9:00AM", 243 | "Faculty": "Faculty of Engineering", 244 | "ModuleCode": "TE2101", 245 | "Title": "PROGRAMMING METHODOLOGY" 246 | }, 247 | { 248 | "Date": "19/11/2016", 249 | "Time": "9:00AM", 250 | "Faculty": "Faculty of Engineering", 251 | "ModuleCode": "ME2151", 252 | "Title": "PRINCIPLES OF MECHANICAL ENG. MATERIALS" 253 | }, 254 | { 255 | "Date": "19/11/2016", 256 | "Time": "9:00AM", 257 | "Faculty": "Faculty of Engineering", 258 | "ModuleCode": "ME2151E", 259 | "Title": "PRINCIPLES OF MECHANICAL ENG. MATERIALS" 260 | }, 261 | { 262 | "Date": "19/11/2016", 263 | "Time": "9:00AM", 264 | "Faculty": "Arts & Social Sciences", 265 | "ModuleCode": "GEH1014", 266 | "Title": "SAMURAI, GEISHA, YAKUZA AS SELF OR OTHER" 267 | }, 268 | { 269 | "Date": "19/11/2016", 270 | "Time": "9:00AM", 271 | "Faculty": "Arts & Social Sciences", 272 | "ModuleCode": "GEK2022", 273 | "Title": "SAMURAI, GEISHA, YAKUZA AS SELF OR OTHER" 274 | }, 275 | { 276 | "Date": "19/11/2016", 277 | "Time": "9:00AM", 278 | "Faculty": "Faculty of Engineering", 279 | "ModuleCode": "EE3302", 280 | "Title": "INDUSTRIAL CONTROL SYSTEMS" 281 | }, 282 | { 283 | "Date": "19/11/2016", 284 | "Time": "9:00AM", 285 | "Faculty": "Faculty of Engineering", 286 | "ModuleCode": "EE3302E", 287 | "Title": "INDUSTRIAL CONTROL SYSTEMS" 288 | }, 289 | { 290 | "Date": "19/11/2016", 291 | "Time": "9:00AM", 292 | "Faculty": "Faculty of Engineering", 293 | "ModuleCode": "EE5107", 294 | "Title": "OPTIMAL CONTROL SYSTEMS" 295 | }, 296 | { 297 | "Date": "19/11/2016", 298 | "Time": "9:00AM", 299 | "Faculty": "Faculty of Engineering", 300 | "ModuleCode": "EE6107", 301 | "Title": "OPTIMAL CONTROL SYSTEMS (ADVANCED)" 302 | }, 303 | { 304 | "Date": "19/11/2016", 305 | "Time": "9:00AM", 306 | "Faculty": "Faculty of Science", 307 | "ModuleCode": "PR3144", 308 | "Title": "PRINCIPLES OF RESEARCH METHODS" 309 | }, 310 | { 311 | "Date": "19/11/2016", 312 | "Time": "9:00AM", 313 | "Faculty": "Faculty of Science", 314 | "ModuleCode": "PR4103", 315 | "Title": "RESEARCH METHODOLOGY" 316 | }, 317 | { 318 | "Date": "19/11/2016", 319 | "Time": "9:00AM", 320 | "Faculty": "Faculty of Engineering", 321 | "ModuleCode": "CN5162", 322 | "Title": "ADVANCED POLYMERIC MATERIALS" 323 | }, 324 | { 325 | "Date": "19/11/2016", 326 | "Time": "9:00AM", 327 | "Faculty": "Faculty of Engineering", 328 | "ModuleCode": "CN6162", 329 | "Title": "ADVANCED POLYMERIC MATERIALS" 330 | }, 331 | { 332 | "Date": "19/11/2016", 333 | "Time": "9:00AM", 334 | "Faculty": "Faculty of Engineering", 335 | "ModuleCode": "CN3121", 336 | "Title": "PROCESS DYNAMICS & CONTROL" 337 | }, 338 | { 339 | "Date": "19/11/2016", 340 | "Time": "9:00AM", 341 | "Faculty": "Faculty of Engineering", 342 | "ModuleCode": "CN3121E", 343 | "Title": "PROCESS DYNAMICS & CONTROL" 344 | }, 345 | { 346 | "Date": "19/11/2016", 347 | "Time": "1:00PM", 348 | "Faculty": "Faculty of Science", 349 | "ModuleCode": "BL5234", 350 | "Title": "THEORETICAL BIOLOGY" 351 | }, 352 | { 353 | "Date": "19/11/2016", 354 | "Time": "1:00PM", 355 | "Faculty": "School of Computing", 356 | "ModuleCode": "BT3102", 357 | "Title": "COMPUTATIONAL METHODS FOR BUSINESS ANALYTICS" 358 | }, 359 | { 360 | "Date": "19/11/2016", 361 | "Time": "1:00PM", 362 | "Faculty": "Arts & Social Sciences", 363 | "ModuleCode": "CH2295", 364 | "Title": "COMMERCE AND CULTURE IN CHINAS PAST (IN ENGLISH)" 365 | }, 366 | { 367 | "Date": "19/11/2016", 368 | "Time": "1:00PM", 369 | "Faculty": "Faculty of Science", 370 | "ModuleCode": "CM5268", 371 | "Title": "ADVANCED ORGANIC MATERIALS" 372 | }, 373 | { 374 | "Date": "19/11/2016", 375 | "Time": "1:00PM", 376 | "Faculty": "Arts & Social Sciences", 377 | "ModuleCode": "EC3342", 378 | "Title": "INTERNATIONAL TRADE I" 379 | }, 380 | { 381 | "Date": "19/11/2016", 382 | "Time": "1:00PM", 383 | "Faculty": "Arts & Social Sciences", 384 | "ModuleCode": "EC4371", 385 | "Title": "DEVELOPMENT ECONOMICS II" 386 | }, 387 | { 388 | "Date": "19/11/2016", 389 | "Time": "1:00PM", 390 | "Faculty": "Arts & Social Sciences", 391 | "ModuleCode": "EC5102", 392 | "Title": "MACROECONOMIC THEORY" 393 | }, 394 | { 395 | "Date": "19/11/2016", 396 | "Time": "1:00PM", 397 | "Faculty": "Faculty of Engineering", 398 | "ModuleCode": "EE3431C", 399 | "Title": "MICROELECTRONICS MATERIALS AND DEVICES" 400 | }, 401 | { 402 | "Date": "19/11/2016", 403 | "Time": "1:00PM", 404 | "Faculty": "Faculty of Engineering", 405 | "ModuleCode": "EE5431R", 406 | "Title": "FUNDAMENTALS OF NANOELECTRONICS" 407 | }, 408 | { 409 | "Date": "19/11/2016", 410 | "Time": "1:00PM", 411 | "Faculty": "Faculty of Engineering", 412 | "ModuleCode": "ESE2001", 413 | "Title": "ENVIRONMENTAL PROCESSES" 414 | }, 415 | { 416 | "Date": "19/11/2016", 417 | "Time": "1:00PM", 418 | "Faculty": "Faculty of Science", 419 | "ModuleCode": "FST3103", 420 | "Title": "ADVANCED FOOD ENGINEERING" 421 | }, 422 | { 423 | "Date": "19/11/2016", 424 | "Time": "1:00PM", 425 | "Faculty": "Arts & Social Sciences", 426 | "ModuleCode": "GE4207", 427 | "Title": "COASTAL MANAGEMENT" 428 | }, 429 | { 430 | "Date": "19/11/2016", 431 | "Time": "1:00PM", 432 | "Faculty": "Faculty of Science", 433 | "ModuleCode": "GER1000", 434 | "Title": "QUANTITATIVE REASONING" 435 | }, 436 | { 437 | "Date": "19/11/2016", 438 | "Time": "1:00PM", 439 | "Faculty": "School of Computing", 440 | "ModuleCode": "IS3243", 441 | "Title": "TECHNOLOGY STRATEGY AND MANAGEMENT" 442 | }, 443 | { 444 | "Date": "19/11/2016", 445 | "Time": "1:00PM", 446 | "Faculty": "Arts & Social Sciences", 447 | "ModuleCode": "JS2216", 448 | "Title": "POSTWAR JAPANESE FILM AND ANIME" 449 | }, 450 | { 451 | "Date": "19/11/2016", 452 | "Time": "1:00PM", 453 | "Faculty": "Yong Loo Lin School (Medicine)", 454 | "ModuleCode": "LSM3232", 455 | "Title": "MICROBIOLOGY" 456 | }, 457 | { 458 | "Date": "19/11/2016", 459 | "Time": "1:00PM", 460 | "Faculty": "Faculty of Science1", 461 | "ModuleCode": "LSM4254", 462 | "Title": "PRINCIPLES OF TAXONOMY AND SYSTEMATICS" 463 | } 464 | ] 465 | -------------------------------------------------------------------------------- /__mocks__/fixtures/examData2.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Date": "19/06/2017", 4 | "Time": "9:00AM", 5 | "Faculty": "Risk Management Inst", 6 | "ModuleCode": "FE5112", 7 | "Title": "STOCHASTIC CALCULUS AND QUANTITATIVE METHODS" 8 | }, 9 | { 10 | "Date": "19/06/2017", 11 | "Time": "1:00PM", 12 | "Faculty": "Risk Management Inst", 13 | "ModuleCode": "FE5209", 14 | "Title": "FINANCIAL ECONOMETRICS" 15 | }, 16 | { 17 | "Date": "24/06/2017", 18 | "Time": "9:00AM", 19 | "Faculty": "Risk Management Inst", 20 | "ModuleCode": "FE5105", 21 | "Title": "CORPORATE FINANCING AND RISK" 22 | }, 23 | { 24 | "Date": "14/07/2017", 25 | "Time": "9:00AM", 26 | "Faculty": "Risk Management Inst", 27 | "ModuleCode": "FE5116", 28 | "Title": "PROGRAMMING AND ADVANCED NUMERICAL METHODS" 29 | }, 30 | { 31 | "Date": "14/07/2017", 32 | "Time": "2:30PM", 33 | "Faculty": "Faculty of Science", 34 | "ModuleCode": "FST2204", 35 | "Title": "SEAFOOD SUPPLY CHAINS IN JAPAN AND SINGAPORE" 36 | }, 37 | { 38 | "Date": "15/07/2017", 39 | "Time": "9:00AM", 40 | "Faculty": "Risk Management Inst", 41 | "ModuleCode": "FE5211", 42 | "Title": "SEMINAR IN FINANCIAL ENGINEERING" 43 | }, 44 | { 45 | "Date": "28/07/2017", 46 | "Time": "9:00AM", 47 | "Faculty": "School of Computing", 48 | "ModuleCode": "CS1020", 49 | "Title": "DATA STRUCTURES AND ALGORITHMS I" 50 | }, 51 | { 52 | "Date": "28/07/2017", 53 | "Time": "9:00AM", 54 | "Faculty": "School of Computing", 55 | "ModuleCode": "CS2010", 56 | "Title": "DATA STRUCTURES AND ALGORITHMS II" 57 | }, 58 | { 59 | "Date": "28/07/2017", 60 | "Time": "9:00AM", 61 | "Faculty": "Arts & Social Sciences", 62 | "ModuleCode": "EC3385", 63 | "Title": "MARITIME AND SHIPPING ECONOMICS" 64 | }, 65 | { 66 | "Date": "28/07/2017", 67 | "Time": "9:00AM", 68 | "Faculty": "Arts & Social Sciences", 69 | "ModuleCode": "ECA5884", 70 | "Title": "APPLIED BEHAVIOURAL ECONOMICS" 71 | }, 72 | { 73 | "Date": "28/07/2017", 74 | "Time": "9:00AM", 75 | "Faculty": "Faculty of Engineering", 76 | "ModuleCode": "EE2021", 77 | "Title": "DEVICES AND CIRCUITS" 78 | }, 79 | { 80 | "Date": "28/07/2017", 81 | "Time": "2:30PM", 82 | "Faculty": "School of Computing", 83 | "ModuleCode": "CS2103", 84 | "Title": "SOFTWARE ENGINEERING" 85 | }, 86 | { 87 | "Date": "28/07/2017", 88 | "Time": "2:30PM", 89 | "Faculty": "School of Computing", 90 | "ModuleCode": "CS2105", 91 | "Title": "INTRODUCTION TO COMPUTER NETWORKS" 92 | }, 93 | { 94 | "Date": "28/07/2017", 95 | "Time": "2:30PM", 96 | "Faculty": "Arts & Social Sciences", 97 | "ModuleCode": "ECA5335", 98 | "Title": "DERIVATIVE SECURITIES" 99 | }, 100 | { 101 | "Date": "28/07/2017", 102 | "Time": "2:30PM", 103 | "Faculty": "Arts & Social Sciences", 104 | "ModuleCode": "ECA5376", 105 | "Title": "AUCTIONS AND MARKET DESIGN" 106 | }, 107 | { 108 | "Date": "29/07/2017", 109 | "Time": "9:00AM", 110 | "Faculty": "Arts & Social Sciences", 111 | "ModuleCode": "EC5387", 112 | "Title": "ISSUES IN MARITIME AND SHIPPING ECONOMICS" 113 | }, 114 | { 115 | "Date": "29/07/2017", 116 | "Time": "9:00AM", 117 | "Faculty": "Arts & Social Sciences", 118 | "ModuleCode": "EC5387R", 119 | "Title": "ISSUES IN MARITIME AND SHIPPING ECONOMICS" 120 | }, 121 | { 122 | "Date": "29/07/2017", 123 | "Time": "9:00AM", 124 | "Faculty": "Faculty of Science", 125 | "ModuleCode": "QF5204", 126 | "Title": "NUMERICAL METHODS IN QUANTITATIVE FINANCE" 127 | } 128 | ] 129 | -------------------------------------------------------------------------------- /__mocks__/fixtures/test1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nusmodifications/nusmods-api/12c61d55ab295285a72d58b7056314446badf3f6/__mocks__/fixtures/test1.pdf -------------------------------------------------------------------------------- /__mocks__/fixtures/test2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nusmodifications/nusmods-api/12c61d55ab295285a72d58b7056314446badf3f6/__mocks__/fixtures/test2.pdf -------------------------------------------------------------------------------- /__mocks__/fs-extra.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import R from 'ramda'; 3 | 4 | const fs = jest.genMockFromModule('fs-extra'); 5 | 6 | // This is a custom function that our tests can use during setup to specify 7 | // what the files on the "mock" filesystem should look like when any of the 8 | // `fs` APIs are used. 9 | let mockFiles = {}; 10 | let mockFilesMeta = {}; 11 | fs.setMock = (mockFileSystem, mockFilesSystemMeta) => { 12 | mockFiles = mockFileSystem; 13 | mockFilesMeta = mockFilesSystemMeta; 14 | }; 15 | 16 | fs.readFileSync = filePath => mockFiles[filePath]; 17 | 18 | fs.readFile = async filePath => fs.readFileSync(filePath); 19 | 20 | // A custom version of `readdirSync` that reads from the special mocked out 21 | // file list set via setMock 22 | fs.readdirSync = (directoryPath) => { 23 | const pathArr = directoryPath.split(path.sep); 24 | return Object.keys(R.path(pathArr, mockFiles)) || []; 25 | }; 26 | 27 | // A custom version of `readdir` that reads from the special mocked out 28 | // file list set via setMock 29 | fs.readdir = async directoryPath => fs.readdirSync(directoryPath); 30 | 31 | // A custom version of `readJson` that reads from the special mocked out 32 | // file list set via setMock 33 | 34 | /** 35 | * A custom version of `readJson` that reads from the mocked out file system. 36 | * Reads json from string, error otherwise. 37 | */ 38 | fs.readJson = async (directoryPath) => { 39 | const pathArr = directoryPath.split(path.sep); 40 | try { 41 | return JSON.parse(R.path(pathArr, mockFiles)); 42 | } catch (error) { 43 | return Promise.reject(error); 44 | } 45 | }; 46 | 47 | /** 48 | * A custom version of `readJsonSync` that reads from the mocked out file system. 49 | * Reads json from string, error otherwise. 50 | */ 51 | fs.readJsonSync = (directoryPath) => { 52 | const pathArr = directoryPath.split(path.sep); 53 | return JSON.parse(R.path(pathArr, mockFiles)); 54 | }; 55 | 56 | /** 57 | * Mocks and fakes meta data for the file 58 | */ 59 | fs.stat = async (filePath) => { 60 | const meta = mockFilesMeta[filePath]; 61 | if (!meta) { 62 | throw new Error('No such file'); 63 | } 64 | return meta; 65 | }; 66 | 67 | module.exports = fs; 68 | -------------------------------------------------------------------------------- /config.js: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | 3 | const ivleApi = { 4 | baseUrl: 'https://ivle.nus.edu.sg/api/Lapi.svc/', 5 | key: process.env.IVLE_API_KEY, 6 | token: process.env.IVLE_API_TOKEN, 7 | }; 8 | 9 | const venuesApi = { 10 | baseUrl: 'http://nuslivinglab.nus.edu.sg/api_dev/api/', 11 | }; 12 | 13 | const defaults = { 14 | // Set which year of school data to scrape. `null` value will scrape 15 | // a month ahead of the most current school year's data. 16 | year: null, 17 | cachePath: 'cache', 18 | // Maximum cache age in seconds. Can be set to 0 to force refresh every 19 | // time. If set to -1, cached files never expire and are always used. 20 | // By default, force refresh for dist build, cache for one day otherwise. 21 | maxCacheAge: process.env.NODE_ENV === 'production' ? 0 : 86400, 22 | destFolder: 'app/api', 23 | // Pretty-print JSON with '\t', uglify JSON with ''. 24 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify#space_argument 25 | jsonSpace: process.env.NODE_ENV === 'production' ? '' : '\t', 26 | headers: {}, 27 | concurrency: 128, 28 | }; 29 | 30 | export default { 31 | defaults, 32 | bulletinModules: { 33 | cachePath: defaults.cachePath, 34 | maxCacheAge: defaults.maxCacheAge, 35 | destFolder: defaults.destFolder, 36 | jsonSpace: defaults.jsonSpace, 37 | destFileName: 'bulletinModulesRaw.json', 38 | destFacultyDepartments: 'facultyDepartments.json', 39 | ivleApi, 40 | venuesApi, 41 | }, 42 | cors: { 43 | cachePath: defaults.cachePath, 44 | maxCacheAge: defaults.maxCacheAge, 45 | destFolder: defaults.destFolder, 46 | jsonSpace: defaults.jsonSpace, 47 | concurrency: defaults.concurrency, 48 | destFileName: 'corsRaw.json', 49 | destLessonTypes: 'lessonTypes.json', 50 | }, 51 | corsBiddingStats: { 52 | cachePath: defaults.cachePath, 53 | maxCacheAge: defaults.maxCacheAge, 54 | destFolder: defaults.destFolder, 55 | jsonSpace: defaults.jsonSpace, 56 | destFileName: 'corsBiddingStatsRaw.json', 57 | }, 58 | examTimetable: { 59 | cachePath: defaults.cachePath, 60 | maxCacheAge: defaults.maxCacheAge, 61 | destFolder: defaults.destFolder, 62 | jsonSpace: defaults.jsonSpace, 63 | destFileName: 'examTimetableRaw.json', 64 | }, 65 | ivle: { 66 | cachePath: defaults.cachePath, 67 | maxCacheAge: defaults.maxCacheAge, 68 | srcFolder: defaults.destFolder, 69 | destFolder: defaults.destFolder, 70 | jsonSpace: defaults.jsonSpace, 71 | concurrency: defaults.concurrency, 72 | destFileName: 'ivleRaw.json', 73 | ivleApi, 74 | }, 75 | moduleTimetableDelta: { 76 | cachePath: defaults.cachePath, 77 | maxCacheAge: defaults.maxCacheAge, 78 | destFolder: defaults.destFolder, 79 | jsonSpace: defaults.jsonSpace, 80 | destFileName: 'moduleTimetableDeltaRaw.json', 81 | ivleApi, 82 | }, 83 | venues: { 84 | cachePath: defaults.cachePath, 85 | maxCacheAge: defaults.maxCacheAge, 86 | destFolder: defaults.destFolder, 87 | jsonSpace: defaults.jsonSpace, 88 | destFileName: 'venuesRaw.json', 89 | }, 90 | consolidate: { 91 | cachePath: defaults.cachePath, 92 | maxCacheAge: defaults.maxCacheAge, 93 | destFolder: defaults.destFolder, 94 | jsonSpace: defaults.jsonSpace, 95 | destFileName: 'modules.json', 96 | destVenues: 'venues.json', 97 | destConsolidated: 'consolidateRaw.json', 98 | }, 99 | split: { 100 | destFolder: defaults.destFolder, 101 | jsonSpace: defaults.jsonSpace, 102 | destSubfolder: 'modules', 103 | destModuleCodes: 'moduleCodes.json', 104 | destModuleList: 'moduleList.json', 105 | destModuleInformation: 'moduleInformation.json', 106 | destTimetableInformation: 'timetable.json', 107 | destVenueInformation: 'venueInformation.json', 108 | }, 109 | }; 110 | -------------------------------------------------------------------------------- /gulp-tasks/index.js: -------------------------------------------------------------------------------- 1 | import bulletinModules from './remote/bulletinModules'; 2 | import cors from './remote/cors'; 3 | import corsBiddingStats from './remote/corsBiddingStats'; 4 | import examTimetable from './remote/examTimetable'; 5 | import ivle from './remote/ivle'; 6 | import moduleTimetableDelta from './remote/moduleTimetableDelta'; 7 | import venues from './remote/venues'; 8 | import mergeCorsBiddingStats from './local/mergeCorsBiddingStats'; 9 | import consolidateForSem from './local/consolidateForSem'; 10 | import consolidateForYear from './local/consolidateForYear'; 11 | import splitForSem from './local/splitForSem'; 12 | import splitForYear from './local/splitForYear'; 13 | 14 | export default { 15 | bulletinModules, 16 | cors, 17 | corsBiddingStats, 18 | examTimetable, 19 | ivle, 20 | moduleTimetableDelta, 21 | venues, 22 | mergeCorsBiddingStats, 23 | consolidateForSem, 24 | splitForSem, 25 | consolidateForYear, 26 | splitForYear, 27 | }; 28 | -------------------------------------------------------------------------------- /gulp-tasks/local/consolidateForSem.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import Promise from 'bluebird'; 5 | import R from 'ramda'; 6 | import moment from 'moment'; 7 | import clean from 'underscore.string/clean'; 8 | import { DATE_FORMAT as EXAM_DATE_FORMAT } from '../remote/examTimetable'; 9 | import { DATE_FORMAT as CORS_DATE_FORMAT } from '../remote/cors'; 10 | import mergeModuleFields from '../utils/mergeModuleFields'; 11 | import titleize from '../utils/titleize'; 12 | 13 | /** 14 | * Normalises then consolidates information for one semester. 15 | * By default outputs to: 16 | * - modules.json 17 | * - venues.json 18 | */ 19 | 20 | const MODULE_FIELDS = [ 21 | 'ModuleCode', 22 | 'ModuleTitle', 23 | 'Department', 24 | 'ModuleDescription', 25 | 'ModuleCredit', 26 | 'Workload', 27 | 'Types', 28 | 'CrossModule', 29 | 'Corequisite', 30 | 'Prerequisite', 31 | 'Preclusion', 32 | 'ExamDate', 33 | 'ExamDuration', 34 | 'ExamOpenBook', 35 | 'ExamVenue', 36 | 'Timetable', 37 | 'IVLE', 38 | 'LecturePeriods', 39 | 'Lecturers', 40 | 'TutorialPeriods', 41 | 'CorsBiddingStats', 42 | ]; 43 | 44 | const LESSON_FIELDS = [ 45 | 'LessonType', 46 | 'ClassNo', 47 | 'DayText', 48 | 'StartTime', 49 | 'EndTime', 50 | 'WeekText', 51 | 'Venue', 52 | ]; 53 | 54 | const log = bunyan.createLogger({ name: 'consolidateForSem' }); 55 | 56 | function normalize(data, subLog) { 57 | function normalizeSingleTimetable(timetable) { 58 | const toFourCharsTime = time => `000${time}`.slice(-4); 59 | const processLesson = R.evolve({ 60 | StartTime: toFourCharsTime, 61 | EndTime: toFourCharsTime, 62 | DayText: titleize, 63 | WeekText: titleize, 64 | LessonType: titleize, 65 | Venue: R.trim, 66 | }); 67 | return timetable.map(processLesson); 68 | } 69 | 70 | function normalizeSingleCors(cors) { 71 | function processExamDate(examDate) { 72 | if (examDate === 'No Exam Date.') { 73 | return ''; 74 | } 75 | const dateTime = examDate.split(' '); 76 | if (dateTime.length !== 2) { 77 | throw new Error(`ExamDate of cors should contain whitespace, found: ${examDate}`); 78 | } 79 | const date = moment.utc(R.head(dateTime), CORS_DATE_FORMAT); 80 | switch (R.last(dateTime)) { 81 | case 'AM': 82 | date.hour(9); 83 | break; 84 | case 'PM': 85 | // 2.30 PM on Friday afternoons 86 | if (date.day() === 5) { 87 | date.hour(14).minute(30); 88 | } else { 89 | date.hour(13); 90 | } 91 | break; 92 | case 'EVENING': 93 | date.hour(17); 94 | break; 95 | default: 96 | subLog.error(`Unexpected exam time '${examDate}'`); 97 | } 98 | return `${date.toISOString().slice(0, 16)}+0800`; 99 | } 100 | const processTimetable = R.pipe( 101 | R.map(R.evolve({ 102 | WeekText: R.replace(' ', ' '), 103 | Venue: R.replace(/(?:^null)?,$/, ''), 104 | })), 105 | normalizeSingleTimetable, 106 | ); 107 | const processCors = R.evolve({ 108 | ExamDate: processExamDate, 109 | Timetable: processTimetable, 110 | }); 111 | return processCors(cors); 112 | } 113 | 114 | function normalizeCors(cors) { 115 | const corsMods = {}; 116 | Object.values(cors).forEach((module) => { 117 | const mod = normalizeSingleCors(module); 118 | 119 | const codes = mod.ModuleCode.split(' / '); 120 | mod.Types = codes.map((code) => { 121 | if (/^GE[KM]\d/.test(code)) { 122 | return 'GEM'; 123 | } else if (/^SS[A-Z]\d/.test(code)) { 124 | return 'SSM'; 125 | } 126 | return mod.Type; 127 | }); 128 | codes.forEach((code) => { 129 | corsMods[code] = corsMods[code] || R.omit(['Type'], mod); 130 | corsMods[code].ModuleCode = code; 131 | corsMods[code].Types = R.union(corsMods[code].Types, mod.Types); 132 | }); 133 | }); 134 | return corsMods; 135 | } 136 | 137 | function normalizeSingleTimetableDelta(timetableDelta) { 138 | const sortByLastModified = R.sortBy(R.prop('LastModified')); 139 | const isRedundant = lesson => lesson.isDelete || lesson.DayCode === '7'; // Sundays seem to be dummy values 140 | const removeRedundant = R.pipe( 141 | sortByLastModified, 142 | R.reverse, 143 | R.uniqBy(R.props(LESSON_FIELDS)), // only keep the latest 144 | R.reject(isRedundant), 145 | R.map(R.pick(LESSON_FIELDS)), 146 | normalizeSingleTimetable, 147 | ); 148 | return removeRedundant(timetableDelta); 149 | } 150 | 151 | function normalizeSingleCorsBiddingStats(corsBiddingStats) { 152 | return corsBiddingStats.map(R.pipe( 153 | R.omit(['ModuleCode']), 154 | R.evolve({ 155 | Group: titleize, 156 | Faculty: titleize, 157 | StudentAcctType: R.replace('
', ''), 158 | }), 159 | )); 160 | } 161 | 162 | function normalizeSingleExam(exam) { 163 | const examMoment = moment.utc(exam.Date.slice(0, 11) + exam.Time, `${EXAM_DATE_FORMAT} h:mm a`); 164 | const examString = `${examMoment.toISOString().slice(0, 16)}+0800`; 165 | 166 | let duration; 167 | if (exam.Duration) { 168 | duration = `P${exam.Duration.replace(/\s/g, '').toUpperCase().slice(0, 5)}`; 169 | } 170 | return { 171 | ModuleCode: exam.ModuleCode, 172 | ExamDate: examString, 173 | ExamDuration: duration, 174 | ExamOpenBook: exam[''] ? exam[''] === '*' : undefined, 175 | ExamVenue: exam.Venue || undefined, 176 | }; 177 | } 178 | 179 | const normalizeData = R.evolve({ 180 | cors: normalizeCors, 181 | corsBiddingStats: R.map(normalizeSingleCorsBiddingStats), 182 | examTimetable: R.map(normalizeSingleExam), 183 | moduleTimetableDelta: R.map(normalizeSingleTimetableDelta), 184 | }); 185 | return normalizeData(data); 186 | } 187 | 188 | function consolidate(data, subLog) { 189 | const mainModuleCodes = [ 190 | ...Object.keys(data.bulletinModules), 191 | ...Object.keys(data.cors), 192 | ]; 193 | const auxiliaryModuleCodes = [ 194 | ...Object.keys(data.corsBiddingStats), 195 | ...Object.keys(data.examTimetable), 196 | ...Object.keys(data.moduleTimetableDelta), 197 | ...Object.keys(data.ivle), 198 | ]; 199 | const moduleCodesWithoutData = R.difference(auxiliaryModuleCodes, mainModuleCodes); 200 | // eslint-disable-next-line max-len 201 | subLog.warn(`${moduleCodesWithoutData.join(', ')} have no bulletin or cors data source and will be excluded.`); 202 | 203 | const allModuleCodes = mainModuleCodes; 204 | subLog.info(`parsing ${allModuleCodes.length} modules`); 205 | 206 | const dataTypes = Object.keys(data); 207 | const consolidated = R.fromPairs(allModuleCodes.map((moduleCode) => { 208 | const module = {}; 209 | dataTypes.forEach((type) => { 210 | module[type] = data[type][moduleCode]; 211 | }); 212 | return [moduleCode, module]; // fromPairs turns [key, val] to { key: val } 213 | })); 214 | return consolidated; 215 | } 216 | 217 | function parseModule(rawModule, lessonTypes) { 218 | function titleizeIfAllCaps(val) { 219 | return val === val.toUpperCase() ? titleize(val) : val; 220 | } 221 | function cleanIfString(val) { 222 | return typeof val === 'string' ? clean(val) : val; 223 | } 224 | function sortLessons(a, b) { 225 | const week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']; 226 | for (let i = 0; i < LESSON_FIELDS.length; i += 1) { 227 | const key = LESSON_FIELDS[i]; 228 | if (a[key] !== b[key]) { 229 | if (key === 'DayText') { 230 | return week.indexOf(a[key]) - week.indexOf(b[key]); 231 | } 232 | return a[key].localeCompare(b[key]); 233 | } 234 | } 235 | return 0; 236 | } 237 | const module = R.pipe( 238 | R.evolve({ 239 | Department: titleize, 240 | ModuleTitle: titleizeIfAllCaps, 241 | Timetable: R.sort(sortLessons), 242 | }), 243 | R.map(cleanIfString), 244 | )(rawModule); 245 | const lecturerNames = []; 246 | const lecturers = module.IVLE ? R.pluck('Lecturers', module.IVLE) : []; 247 | lecturers.forEach((lecturer) => { 248 | switch (lecturer.Role.trim()) { 249 | case 'Lecturer': 250 | case 'Co-Lecturer': 251 | case 'Visiting Professor': 252 | lecturerNames.push(lecturer.User.Name); 253 | break; 254 | default: 255 | log.warn(`${lecturer.Role.trim()} not recognised`); 256 | } 257 | }); 258 | module.Lecturers = lecturerNames; 259 | 260 | const periods = { Lecture: new Set(), Tutorial: new Set() }; 261 | module.Timetable.forEach((lesson) => { 262 | let period; 263 | if (lesson.StartTime < '1200') { 264 | period = 'Morning'; 265 | } else if (lesson.StartTime < '1800') { 266 | period = 'Afternoon'; 267 | } else { 268 | period = 'Evening'; 269 | } 270 | // Either 'Lecture' or 'Tutorial' 271 | const lessonType = lessonTypes[R.toUpper(lesson.LessonType)]; 272 | periods[lessonType].add(`${lesson.DayText} ${period}`); 273 | }); 274 | module.LecturePeriods = [...periods.Lecture]; 275 | module.TutorialPeriods = [...periods.Tutorial]; 276 | return R.pipe( 277 | R.pick(MODULE_FIELDS), 278 | R.pickBy(val => val && !R.isEmpty(val)), 279 | )(module); 280 | } 281 | 282 | /** 283 | * Merges in the following order, taking the second object's value 284 | * if the key exists in both objects. 285 | * No First object Second object 286 | * ==================================================== 287 | * 1) cors merge bulletinModules 288 | * 2) examTimetable merge Module 289 | * 3) Module merge ivle 290 | * 4) Module merge corsBiddingStats 291 | * 5) Module concat moduleTimetableDelta 292 | */ 293 | function merge(consolidated, lessonTypes, subLog) { 294 | const merged = Object.entries(consolidated).map(([moduleCode, module]) => { 295 | const mergeModule = mergeModuleFields(subLog, moduleCode); 296 | 297 | const base = mergeModule(module.cors || {}, module.bulletinModules || {}); 298 | const mergedModule = mergeModule(module.examTimetable || {}, base); 299 | mergedModule.IVLE = module.ivle; 300 | mergedModule.CorsBiddingStats = module.corsBiddingStats; 301 | mergedModule.Timetable = mergedModule.Timetable || module.moduleTimetableDelta || []; 302 | return parseModule(mergedModule, lessonTypes); 303 | }); 304 | return merged; 305 | } 306 | 307 | function parseVenues(modules) { 308 | const lessons = R.pipe( 309 | R.pluck('Timetable'), 310 | R.unnest, 311 | R.filter(R.identity), 312 | )(modules); 313 | const venuesSet = new Set(R.pluck('Venue', lessons)); 314 | return [...venuesSet]; 315 | } 316 | 317 | async function consolidateForSem(config) { 318 | const { year, semester } = config; 319 | const subLog = log.child({ year, semester }); 320 | 321 | const dataCategories = [ 322 | 'bulletinModules', 323 | 'cors', 324 | 'corsBiddingStats', 325 | 'examTimetable', 326 | 'moduleTimetableDelta', 327 | 'ivle', 328 | ]; 329 | 330 | const data = {}; 331 | const missingFiles = []; 332 | async function readFile(category) { 333 | let func = R.indexBy; // func to apply to data 334 | let filePath = path.join( 335 | config[category].destFolder, 336 | `${year}-${year + 1}`, 337 | `${semester}`, 338 | config[category].destFileName, 339 | ); 340 | if (category === 'corsBiddingStats') { 341 | filePath = path.join( 342 | config[category].destFolder, 343 | config[category].destFileName, 344 | ); 345 | func = R.groupBy; 346 | } else if (category === 'moduleTimetableDelta') { 347 | func = R.groupBy; 348 | } 349 | 350 | const catData = await fs.readJson(filePath).catch(() => { 351 | missingFiles.push(config[category].destFileName); 352 | return []; 353 | }); 354 | data[category] = func(R.prop('ModuleCode'), catData); 355 | } 356 | await Promise.all(R.map(readFile, dataCategories)); 357 | if (missingFiles.length > 0) { 358 | subLog.info(`${missingFiles.join(', ')} are not found, continuing with consolidating.`); 359 | } 360 | 361 | const lessonTypesPath = path.join(config.cors.destFolder, config.cors.destLessonTypes); 362 | const lessonTypes = await fs.readJson(lessonTypesPath).catch(() => { 363 | subLog.info(`${lessonTypesPath} is not found, continuing with consolidating.`); 364 | return {}; 365 | }); 366 | 367 | const normalizedData = normalize(data, subLog); 368 | const consolidated = consolidate(normalizedData, subLog); 369 | const modules = merge(consolidated, lessonTypes, subLog); 370 | 371 | const venuesList = parseVenues(modules); 372 | 373 | const thisConfig = config.consolidate; 374 | async function write(filePath, content) { 375 | const pathToWrite = path.join( 376 | thisConfig.destFolder, 377 | `${year}-${year + 1}`, 378 | `${semester}`, 379 | filePath, 380 | ); 381 | subLog.info(`saving to ${pathToWrite}`); 382 | await fs.outputJson(pathToWrite, content, { spaces: thisConfig.jsonSpace }); 383 | } 384 | 385 | await write(thisConfig.destConsolidated, consolidated); 386 | await write(thisConfig.destFileName, modules); 387 | await write(thisConfig.destVenues, venuesList); 388 | } 389 | 390 | export default consolidateForSem; 391 | -------------------------------------------------------------------------------- /gulp-tasks/local/consolidateForYear.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | import genReqTree from './genReqTree'; 6 | import mergeModuleFields from '../utils/mergeModuleFields'; 7 | 8 | /** 9 | * Consolidates all information and generates the 10 | * prerequisite/modmaven tree for one academic year. 11 | * By default outputs to the base path of the academic year. 12 | * - modules.json 13 | * See genReqTree for the generation of the tree. 14 | */ 15 | 16 | const MODULE_GENERAL_KEYS = [ 17 | 'ModuleCode', 18 | 'ModuleTitle', 19 | 'AcadYear', 20 | 'Department', 21 | 'ModuleDescription', 22 | 'ModuleCredit', 23 | 'Workload', 24 | 'Types', 25 | 'CrossModule', 26 | 'Corequisite', 27 | 'Prerequisite', 28 | 'ParsedPrerequisite', 29 | 'Preclusion', 30 | 'ParsedPreclusion', 31 | 'ModmavenTree', 32 | 'LockedModules', 33 | 'CorsBiddingStats', 34 | 'History', 35 | ]; 36 | 37 | const SEMESTER_SPECIFIC_KEYS = [ 38 | 'Semester', 39 | 'ExamDate', 40 | 'Timetable', 41 | 'IVLE', 42 | 'Lecturers', 43 | 'LecturePeriods', 44 | 'TutorialPeriods', 45 | ]; 46 | 47 | const log = bunyan.createLogger({ name: 'consolidateForYear' }); 48 | 49 | async function consolidateForYear(config) { 50 | const { year } = config; 51 | const subLog = log.child({ year }); 52 | 53 | const acadYear = `${year}/${year + 1}`; 54 | const basePath = path.join( 55 | config.destFolder, 56 | acadYear.replace('/', '-'), 57 | ); 58 | 59 | const modules = {}; 60 | await Promise.all(R.range(1, 5).map(async (semester) => { 61 | const pathToRead = path.join( 62 | basePath, 63 | semester.toString(), 64 | config.destFileName, 65 | ); 66 | const listOfModules = await fs.readJson(pathToRead).catch(() => { 67 | subLog.info(`${pathToRead} does not exist, continuing with joining`); 68 | return []; 69 | }); 70 | listOfModules.forEach((mod) => { 71 | const module = { 72 | ...mod, 73 | AcadYear: acadYear, 74 | Semester: semester, 75 | }; 76 | const code = module.ModuleCode; 77 | modules[code] = modules[code] || {}; 78 | modules[code][acadYear + semester] = module; 79 | }); 80 | })); 81 | 82 | const joined = Object.entries(modules).map(([moduleCode, mods]) => { 83 | const mergeModule = mergeModuleFields(subLog, moduleCode); 84 | const modulesInAcadYear = Object.values(mods); 85 | let baseMod = {}; 86 | modulesInAcadYear.forEach((mod) => { 87 | baseMod = mergeModule(baseMod, R.pick(MODULE_GENERAL_KEYS, mod)); 88 | }); 89 | baseMod.History = modulesInAcadYear.map(R.pick(SEMESTER_SPECIFIC_KEYS)); 90 | return baseMod; 91 | }); 92 | 93 | const reqTree = await genReqTree(joined, config); 94 | const final = R.sortBy(R.prop('ModuleCode'), reqTree); 95 | 96 | const pathToWrite = path.join( 97 | basePath, 98 | config.destFileName, 99 | ); 100 | subLog.info(`saving to ${pathToWrite}`); 101 | await fs.outputJson(pathToWrite, final, { spaces: config.jsonSpace }); 102 | } 103 | 104 | export default consolidateForYear; 105 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/constants.js: -------------------------------------------------------------------------------- 1 | const MODULE_REGEX = /[A-Z]{2,3}[0-9]{4}(?:[A-Z]|[A-Z]R)?/; 2 | 3 | const OPERATORS = { 4 | and: ' and ', 5 | or: ' or ', 6 | }; 7 | const AND_OR_REGEX = new RegExp(Object.keys(OPERATORS).join('|')); 8 | const OPERATORS_REGEX = new RegExp(AND_OR_REGEX, 'gi'); 9 | 10 | export { 11 | MODULE_REGEX, 12 | OPERATORS, 13 | AND_OR_REGEX, 14 | OPERATORS_REGEX, 15 | }; 16 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/index.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | import parseString from './parseString'; 6 | import normalizeString from './normalizeString'; 7 | import { OPERATORS_REGEX, MODULE_REGEX } from './constants'; 8 | 9 | /** 10 | * Generate the following fields for modules: 11 | * ParsedPrerequisite: prerequisite in the form of a tree 12 | * ParsedPreclusion: preclusion in the form of a tree 13 | * LockedModules: modules that cannot be taken until this module is fulfilled 14 | * ModmavenTree: different format of ParsedPrerequisite 15 | */ 16 | 17 | const log = bunyan.createLogger({ 18 | name: 'genReqTree', 19 | level: process.env.NODE_ENV === 'development' ? 'debug' : 'info', 20 | }); 21 | 22 | // Add any key-words and reasons for which NO parsing should be done and 23 | // the entire pre-req string should be shown instead 24 | const RESTRICTED_KEYWORDS = [ 25 | // requirement to be USP students cannot be represented 26 | 'USP', 27 | // Yearly based modules cannot be represented 28 | 'Cohort', 29 | 'cohort', 30 | 'AY20', 31 | // no QET module 32 | 'Qualifying English Test', 33 | // requirement by grade cannot be represented 34 | 'grade', 35 | 'Grade', 36 | 'At least a B-', 37 | 'Honours eligibility requirements', 38 | // no A-level modules 39 | 'A-level', 40 | 'H1 ', 41 | 'H2 ', 42 | // requirement by mc cannot be represented 43 | 'MC', 44 | // 4 out of 5 requirement cannot be represented 45 | '4 out of the 5', 46 | '4 of the 5', 47 | ]; 48 | 49 | function parse(key, data, subLog) { 50 | const generateModulesToKeyMap = R.pipe( 51 | R.map(R.props(['ModuleCode', key])), 52 | R.fromPairs, // [key, val] => { key: val } 53 | R.filter(R.identity), 54 | ); 55 | const moduleCodeToData = generateModulesToKeyMap(data); 56 | 57 | const filterUnparseable = R.pipe( 58 | R.filter(str => !RESTRICTED_KEYWORDS.some(keyword => str.includes(keyword))), // remove restricted 59 | R.filter(R.test(MODULE_REGEX)), // remove those with no modules 60 | ); 61 | const parsable = filterUnparseable(moduleCodeToData); 62 | 63 | Object.keys(moduleCodeToData).forEach((moduleCode) => { 64 | if (!Object.prototype.hasOwnProperty.call(parsable, moduleCode)) { 65 | // log.debug(`${moduleCode}'s ${key} cannot be parsed: ${moduleCodeToData[moduleCode]}`); 66 | } 67 | }); 68 | 69 | Object.keys(parsable).forEach((moduleCode) => { 70 | const string = parsable[moduleCode]; 71 | const normalizedString = normalizeString(string, moduleCode); 72 | 73 | const moduleLog = subLog.child({ moduleCode }); 74 | const parsedString = parseString(normalizedString, moduleLog); 75 | parsable[moduleCode] = parsedString ? { 76 | [key]: string, 77 | [`Parsed${key}`]: parsedString, 78 | } : null; 79 | }); 80 | const removeNull = R.filter(R.identity); 81 | return removeNull(parsable); 82 | } 83 | 84 | function generateRequirements(allModules, moduleCodes) { 85 | const modules = {}; 86 | 87 | // converts { key: val } turns into { name: key, children: val } 88 | function node(key, val) { 89 | return { name: key, children: val }; 90 | } 91 | function genModmavenTree(tree) { 92 | if (typeof tree === 'string') { 93 | return node(tree, []); 94 | } else if (Array.isArray(tree)) { 95 | return tree.map(genModmavenTree); 96 | } 97 | return Object.entries(tree).map(([key, val]) => { 98 | // recursively gen tree 99 | const children = genModmavenTree(val); 100 | return node(key, children); 101 | }); 102 | } 103 | Object.values(allModules).forEach((module) => { 104 | const moduleCode = module.ModuleCode; 105 | const parsedPrerequisite = genModmavenTree(module.ParsedPrerequisite || []); 106 | modules[moduleCode] = { 107 | ...module, 108 | ModmavenTree: node(moduleCode, parsedPrerequisite), 109 | }; 110 | }); 111 | 112 | // locked modules mean 'inverse prerequisite', or 113 | // 'if you have not taken this module, you cannot take the following' 114 | 115 | // inject 'LockedModules' key into every module as a set 116 | moduleCodes.forEach((moduleCode) => { 117 | modules[moduleCode].LockedModules = new Set(); 118 | }); 119 | 120 | function flattenTree(tree) { 121 | if (typeof tree === 'string') { 122 | return [tree]; 123 | } else if (Array.isArray(tree)) { 124 | return R.unnest(tree.map(flattenTree)); 125 | } 126 | return R.unnest(Object.values(tree).map(flattenTree)); 127 | } 128 | Object.values(modules).forEach((module) => { 129 | const thisModuleCode = module.ModuleCode; 130 | const parsedPrerequisite = module.ParsedPrerequisite || []; 131 | const flattenedPrerequisites = flattenTree(parsedPrerequisite); 132 | flattenedPrerequisites.forEach((moduleCode) => { 133 | if (Object.prototype.hasOwnProperty.call(modules, moduleCode)) { 134 | modules[moduleCode].LockedModules.add(thisModuleCode); 135 | } 136 | }); 137 | }); 138 | 139 | // convert set back to array 140 | moduleCodes.forEach((moduleCode) => { 141 | modules[moduleCode].LockedModules = [...modules[moduleCode].LockedModules]; 142 | }); 143 | return modules; 144 | } 145 | 146 | async function genReqTree(allModules, config) { 147 | const { year } = config; 148 | const subLog = log.child({ year }); 149 | 150 | // check that all modules match regex and no modules contain operators 151 | const moduleCodes = R.uniq(R.pluck('ModuleCode', allModules)); 152 | moduleCodes.forEach((moduleCode) => { 153 | const isModule = MODULE_REGEX.test(moduleCode); 154 | if (!isModule) { 155 | throw new Error(`Module ${moduleCode}'s module code does not match regex.`); 156 | } 157 | const hasOperators = OPERATORS_REGEX.test(moduleCode); 158 | if (hasOperators) { 159 | throw new Error(`Module ${moduleCode}'s module code contains operators.`); 160 | } 161 | }); 162 | 163 | const prerequisites = parse('Prerequisite', allModules, subLog); 164 | const preclusions = parse('Preclusion', allModules, subLog); 165 | const merged = allModules.map((data) => { 166 | const moduleCode = data.ModuleCode; 167 | const mergedPrerequisite = R.merge(data, prerequisites[moduleCode]); 168 | const mergedPreclusion = R.merge(mergedPrerequisite, preclusions[moduleCode]); 169 | return mergedPreclusion; 170 | }); 171 | const modules = generateRequirements(merged, moduleCodes); 172 | 173 | // for debugging usage 174 | if (process.env.NODE_ENV === 'development') { 175 | const debugOutput = R.map(R.pick([ 176 | 'Prerequisite', 177 | 'ParsedPrerequisite', 178 | 'Preclusion', 179 | 'ParsedPreclusion', 180 | 'ModmavenTree', 181 | 'LockedModules', 182 | ]), modules); 183 | 184 | const pathToWrite = path.join( 185 | config.destFolder, 186 | `${year}-${year + 1}`, 187 | 'reqTree.json', 188 | ); 189 | subLog.debug(`saving to ${pathToWrite}`); 190 | await fs.outputJson(pathToWrite, debugOutput, { spaces: config.jsonSpace }); 191 | } 192 | 193 | return Object.values(modules); 194 | } 195 | 196 | export default genReqTree; 197 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/normalizeString.js: -------------------------------------------------------------------------------- 1 | import R from 'ramda'; 2 | import romanify from 'romanify'; 3 | 4 | import { OPERATORS, MODULE_REGEX, AND_OR_REGEX, OPERATORS_REGEX } from './constants'; 5 | 6 | /** 7 | * Normalizes different formats with same semantic meanings. 8 | * E.g. x & y -> x and y 9 | * 10 | * The following code depends heavily on regex and the replace function. 11 | * It is recommended that you read the following before proceeding: 12 | * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace 13 | */ 14 | 15 | // converts `CS 1000` to `CS1000` 16 | function removeSpaceFromModule(string) { 17 | const moduleWithDelimiterRegex = /[A-Z]{2,3}\W[0-9]{4}(?:[A-Z]|[A-Z]R)?/g; 18 | return string.replace(moduleWithDelimiterRegex, R.replace(/\W/, '')); 19 | } 20 | 21 | // converts `CS1000and` to `CS1000 and` 22 | function fixOperatorTypos(string) { 23 | const leftConjoinedRegex = /\b([^\s()]{6,9})(and|or)\b/ig; 24 | const rightConjoinedRegex = /\b(and|or)([^\s()]{6,9})\b/ig; 25 | return string 26 | .replace(leftConjoinedRegex, (match, p1, p2) => { 27 | if (MODULE_REGEX.test(p1)) { 28 | return [p1, p2].join(' '); 29 | } 30 | return match; 31 | }) 32 | .replace(rightConjoinedRegex, (match, p1, p2) => { 33 | if (MODULE_REGEX.test(p2)) { 34 | return [p1, p2].join(' '); 35 | } 36 | return match; 37 | }); 38 | } 39 | 40 | // converts `CS1000/R` into `CS1000 or CS1000R` 41 | function insertPostFixAsStandalone(string) { 42 | const modulePostFixRegex = /([A-Z]{2,3}[0-9]{4})(\/[A-Z]|[A-Z]R)+\b/g; 43 | return string.replace(modulePostFixRegex, (match, module, ...args) => { 44 | const p = args.slice(0, -2); // last two are offset and string 45 | const modules = [ 46 | module, 47 | ...p.map(postfix => `${module}${postfix.slice(1)}`), // remove '/' sign 48 | ]; 49 | return modules.join(OPERATORS.or); 50 | }); 51 | } 52 | 53 | // people write 'x, y and z', meaning 'x and y and z' but 54 | // people write 'x, y, z' meaning 'x or y or z' 55 | function convertCommas(oxfordString) { 56 | // replace oxford comma 57 | const string = oxfordString.replace(/,\s*and\b/gi, OPERATORS.and); 58 | if (!string.includes(',')) { 59 | return string; 60 | } 61 | const hasAndOperators = string.includes(OPERATORS.and); 62 | const hasOrOperators = string.includes(OPERATORS.or); 63 | if (hasAndOperators && !hasOrOperators) { 64 | return string.replace(/,/g, OPERATORS.and); 65 | } 66 | return string.replace(/,/g, OPERATORS.or); 67 | } 68 | 69 | // converts roman numerals and alphabets into digits 70 | // e.g. (a) (b) (c) into (1) (2) (3) 71 | function convertToNumerals(number, string) { 72 | const alphabet = 'abcdefghijklmnopqrstuvwxyz'[(number - 1) % 26]; 73 | const roman = romanify(number).toLowerCase(); 74 | // detect roman numeral or alphabet in brackets 75 | // not case sensitive as '(Communications and Networks I)' and `(M&A)` would be false positives 76 | const romanNumberalRegex = new RegExp(`\\(?(?:${roman}|${alphabet})\\)`); 77 | if (romanNumberalRegex.test(string)) { 78 | const replaced = string.replace(romanNumberalRegex, `(${number})`); 79 | // recursively replace the next numeral 80 | return convertToNumerals(number + 1, replaced); 81 | } 82 | return string; 83 | } 84 | 85 | // converts `1) x 2) y` to `(1) x (2) y` 86 | function fixBrackets(string) { 87 | // check brackets aren't balanced before fixing 88 | if (R.match(/\(/g, string).length === R.match(/\)/g, string).length) { 89 | return string; 90 | } 91 | return string.replace(/(?:\(?\b(\d+)\))/g, (match, digit) => `(${digit})`); 92 | } 93 | 94 | // recursively remove module title containing operators, 95 | // given that neither is a module or operator, until no more changes 96 | function removeModuleTitles(string) { 97 | const moduleTitlesRegex = /([^\s()]+)\b[\s]+(?:and|or)[\s]+([^\s()]+)\b/g; 98 | const result = string.replace(moduleTitlesRegex, (match, p1, p2) => { 99 | if (AND_OR_REGEX.test(p1) || AND_OR_REGEX.test(p2) || 100 | MODULE_REGEX.test(p1) || MODULE_REGEX.test(p2)) { 101 | return match; 102 | } 103 | return ''; 104 | }); 105 | if (result !== string) { 106 | return removeModuleTitles(result); 107 | } 108 | return result; 109 | } 110 | 111 | const normalize = R.pipe( 112 | removeSpaceFromModule, 113 | fixOperatorTypos, 114 | insertPostFixAsStandalone, 115 | convertCommas, 116 | R.replace(/{|\[|/g, ')'), 118 | R.curry(convertToNumerals)(1), 119 | fixBrackets, 120 | R.replace(/\|/g, OPERATORS.or), 121 | R.replace(/\//g, OPERATORS.or), 122 | R.replace(/;/g, OPERATORS.and), 123 | R.replace(/&/g, OPERATORS.and), 124 | R.replace(/ plus /g, OPERATORS.and), 125 | R.replace(OPERATORS_REGEX, R.toLower), 126 | removeModuleTitles, 127 | ); 128 | 129 | function normalizeString(string, moduleCode) { 130 | // remove own module code from string (e.g. `CS1000R` would remove `CS1000R`, `CS1000`) 131 | const moduleWithoutPostfix = moduleCode.slice(0, R.findLastIndex(R.test(/\d/), moduleCode) + 1); 132 | const moduleRegex = new RegExp(`\\b${moduleWithoutPostfix}(?:[A-Z]|[A-Z]R)?\\b`, 'g'); 133 | const preprocessed = string.replace(moduleRegex, ''); 134 | return normalize(preprocessed); 135 | } 136 | 137 | export default normalizeString; 138 | export { normalize }; 139 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/normalizeString.test.js: -------------------------------------------------------------------------------- 1 | import { normalize } from './normalizeString'; 2 | 3 | /* eslint-disable max-len */ 4 | describe('normalizeString', () => { 5 | it('converts commas to delimiter or', () => { 6 | const testString = 'ACC1002 Financial Accounting, BSP1004 Legal Environment of Business, FIN2004 Finance'; 7 | const expected = 'ACC1002 Financial Accounting or BSP1004 Legal Environment of Business or FIN2004 Finance'; 8 | expect(normalize(testString)).toBe(expected); 9 | }); 10 | 11 | it('converts commas to delimiter and when written like a sentence', () => { 12 | const testString = 'ACC1002 Financial Accounting, BSP1004 Legal Environment of Business, and FIN2004 Finance'; 13 | const expected = 'ACC1002 Financial Accounting and BSP1004 Legal Environment of Business and FIN2004 Finance'; 14 | expect(normalize(testString)).toBe(expected); 15 | }); 16 | 17 | it('splits conjoined operators', () => { 18 | const testString = 'MA1505and MA1506'; 19 | const expected = 'MA1505 and MA1506'; 20 | expect(normalize(testString)).toBe(expected); 21 | }); 22 | 23 | it('splits / module codes into respective modules', () => { 24 | const testString = 'CS2103/T'; 25 | const expected = 'CS2103 or CS2103T'; 26 | expect(normalize(testString)).toBe(expected); 27 | }); 28 | 29 | it('removes module titles that contains operators 1', () => { 30 | const testString = 'ACC3616 Corporate Governance and Risk Management or ACC3612 Risk Management and Internal Control'; 31 | const expected = 'ACC3616 Corporate Management or ACC3612 Risk Control'; 32 | expect(normalize(testString)).toBe(expected); 33 | }); 34 | 35 | it('removes module titles that contains operators 2', () => { 36 | const testString = '(Undergraduate physics and mathematics AND Electronics materials courses)'; 37 | const expected = '( materials courses)'; 38 | expect(normalize(testString)).toBe(expected); 39 | }); 40 | 41 | it('replaces synonyms', () => { 42 | const testString = '[(CM1121 or CM1501) plus (LSM1101 or LSM1401 or MLE1101)] or MLE3104'; 43 | const expected = '((CM1121 or CM1501) and (LSM1101 or LSM1401 or MLE1101)) or MLE3104'; 44 | expect(normalize(testString)).toBe(expected); 45 | }); 46 | 47 | it('lowercases operators', () => { 48 | const testString = '(1) Either BSP1005 or EC1301 AND (2) Either DSC2008 or EC2303'; 49 | const expected = '(1) Either BSP1005 or EC1301 and (2) Either DSC2008 or EC2303'; 50 | expect(normalize(testString)).toBe(expected); 51 | }); 52 | 53 | it('changes roman numerals to digits', () => { 54 | const testString = '(i) CS1000 (ii) CS1001'; 55 | const testString1 = 'i) CS1000 ii) CS1001'; 56 | const expected = '(1) CS1000 (2) CS1001'; 57 | expect(normalize(testString)).toBe(expected); 58 | expect(normalize(testString1)).toBe(expected); 59 | }); 60 | 61 | it('changes alphabets to digits', () => { 62 | const testString = '(a) CS1000 (b) CS1001'; 63 | const testString1 = 'a) CS1000 b) CS1001'; 64 | const expected = '(1) CS1000 (2) CS1001'; 65 | expect(normalize(testString)).toBe(expected); 66 | expect(normalize(testString1)).toBe(expected); 67 | expect(normalize('a) CS1000 or b) CS1001')).toBe('(1) CS1000 or (2) CS1001'); 68 | }); 69 | 70 | it('does not change modules or operators to digits', () => { 71 | const testString = '(CS1000)'; 72 | expect(normalize(testString)).toBe(testString); 73 | const testString1 = 'CS1000)'; 74 | expect(normalize(testString1)).toBe(testString1); 75 | const testString2 = '(or)'; 76 | expect(normalize(testString2)).toBe(testString2); 77 | }); 78 | 79 | it('fixes listing brackets', () => { 80 | const testString = '1) CS1000 2) CS1001'; 81 | const expected = '(1) CS1000 (2) CS1001'; 82 | expect(normalize(testString)).toBe(expected); 83 | }); 84 | 85 | it('changes listing into brackets given that an operator exists', () => { 86 | const testString = '1) CS1000 or 2) CS1001'; 87 | const expected = '(1) CS1000 or (2) CS1001'; 88 | expect(normalize(testString)).toBe(expected); 89 | }); 90 | }); 91 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/parse+normalize.test.js: -------------------------------------------------------------------------------- 1 | import parseString from './parseString'; 2 | import { normalize } from './normalizeString'; 3 | 4 | /* eslint-disable max-len */ 5 | 6 | // integration tests, normalize + parse 7 | const parse = string => parseString(normalize(string)); 8 | 9 | describe('parse', () => { 10 | it('parses query `(1) either BSP1005 or EC1301 and (2) either DSC2008 or EC2303`)', () => { 11 | const result = { 12 | and: [ 13 | { 14 | or: [ 15 | 'BSP1005', 16 | 'EC1301', 17 | ], 18 | }, 19 | { 20 | or: [ 21 | 'DSC2008', 22 | 'EC2303', 23 | ], 24 | }, 25 | ], 26 | }; 27 | expect(parse('(1) either BSP1005 or EC1301 and (2) either DSC2008 or EC2303')).toEqual(result); 28 | }); 29 | 30 | it('parses query `CS1010 Programming Methodology or its equivalent, and BT1101`)', () => { 31 | const result = { 32 | and: [ 33 | 'CS1010', 34 | 'BT1101', 35 | ], 36 | }; 37 | expect(parse('CS1010 Programming Methodology or its equivalent, and BT1101')).toEqual(result); 38 | }); 39 | 40 | it('parses query `CE2112 or CE4 standing or higher`)', () => { 41 | expect(parse('CE2112 or CE4 standing or higher')).toEqual('CE2112'); 42 | }); 43 | 44 | it('parses query `(1) CE2112 or (2)CE4444 standing or higher`)', () => { 45 | const result = { 46 | or: [ 47 | 'CE2112', 48 | 'CE4444', 49 | ], 50 | }; 51 | expect(parse('(1) CE2112 or (2)CE4444 standing or higher')).toEqual(result); 52 | }); 53 | 54 | it('parses query `CM2101, CM2142 and CM2192`)', () => { 55 | const result = { 56 | and: [ 57 | 'CM2101', 58 | 'CM2142', 59 | 'CM2192', 60 | ], 61 | }; 62 | expect(parse('CM2101, CM2142 and CM2192')).toEqual(result); 63 | }); 64 | 65 | it('parses query `ES1000 and/or ES1102/ES1103`)', () => { 66 | const result = { 67 | or: [ 68 | 'ES1000', 69 | 'ES1102', 70 | 'ES1103', 71 | ], 72 | }; 73 | expect(parse('ES1000 and/or ES1102/ES1103')).toEqual(result); 74 | }); 75 | 76 | it('parses query `(Undergraduate physics and mathematics AND Electronics materials courses) OR EE2004: Semiconductor Devices OR EE3406: Microelectronic Materials OR EE3431C: Microelectronics Materials & Devices`)', () => { 77 | const result = { 78 | or: [ 79 | 'EE2004', 80 | 'EE3406', 81 | 'EE3431C', 82 | ], 83 | }; 84 | expect(parse('(Undergraduate physics and mathematics AND Electronics materials courses) OR EE2004: Semiconductor Devices OR EE3406: Microelectronic Materials OR EE3431C: Microelectronics Materials & Devices')).toEqual(result); 85 | }); 86 | 87 | it('parses query `(1) EN1101E or GEK1000, and (2) EN majors`)', () => { 88 | const result = { 89 | or: [ 90 | 'EN1101E', 91 | 'GEK1000', 92 | ], 93 | }; 94 | expect(parse('(1) EN1101E or GEK1000, and (2) EN majors')).toEqual(result); 95 | }); 96 | 97 | it('parses query `(IS2101 Business and Technical or CS2101 or their equivalents) and (CS2103/CS2103T or IS2150 E-Business Design and Implementation or BT2101 IT and Decision Making)`)', () => { 98 | const result = { 99 | and: [ 100 | { 101 | or: [ 102 | 'IS2101', 103 | 'CS2101', 104 | ], 105 | }, 106 | { 107 | or: [ 108 | 'CS2103', 109 | 'CS2103T', 110 | 'IS2150', 111 | 'BT2101', 112 | ], 113 | }, 114 | ], 115 | }; 116 | expect(parse('(IS2101 Business and Technical or CS2101 or their equivalents) and (CS2103/CS2103T or IS2150 E-Business Design and Implementation or BT2101 IT and Decision Making)')).toEqual(result); 117 | }); 118 | }); 119 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/parseString.js: -------------------------------------------------------------------------------- 1 | import { 2 | Token, 3 | Lexer, 4 | Parser, 5 | } from 'chevrotain'; 6 | import R from 'ramda'; 7 | import { OPERATORS, MODULE_REGEX, AND_OR_REGEX } from './constants'; 8 | 9 | /** 10 | * Parses the string to build a tree of requirements for the module. 11 | * First it goes through a lexer to generate tokens, 12 | * then a parser to build the tree. 13 | * 14 | * Library used for lexing/parsing is chevrotain: 15 | * https://github.com/SAP/chevrotain 16 | */ 17 | class Module extends Token {} 18 | Module.PATTERN = MODULE_REGEX; 19 | class And extends Token {} 20 | And.PATTERN = 'and'; 21 | class Or extends Token {} 22 | Or.PATTERN = 'or'; 23 | 24 | class LeftBracket extends Token {} 25 | LeftBracket.PATTERN = /\(/; 26 | class RightBracket extends Token {} 27 | RightBracket.PATTERN = /\)/; 28 | 29 | class WhiteSpace extends Token {} 30 | WhiteSpace.PATTERN = /\s+/; 31 | WhiteSpace.GROUP = Lexer.SKIPPED; 32 | WhiteSpace.LINE_BREAKS = true; 33 | 34 | class IrrelevantWord extends Token {} 35 | IrrelevantWord.PATTERN = /[^\s()]+/; 36 | IrrelevantWord.GROUP = Lexer.SKIPPED; 37 | 38 | const allTokens = [ 39 | WhiteSpace, 40 | Module, 41 | And, 42 | Or, 43 | LeftBracket, 44 | RightBracket, 45 | IrrelevantWord, 46 | ]; 47 | const ReqTreeLexer = new Lexer(allTokens); 48 | 49 | function generateAndBranch(modules) { 50 | const children = R.uniq(modules); 51 | return { and: children }; 52 | } 53 | function generateOrBranch(modules) { 54 | const children = R.uniq(modules); 55 | return { or: children }; 56 | } 57 | 58 | /** 59 | * ReqTreeParser, works to parse string and tokenize the product. 60 | * The code is extremely similar to the following example: 61 | * @see https://github.com/SAP/chevrotain/blob/master/examples/grammars/calculator/calculator_embedded_actions.js 62 | */ 63 | class ReqTreeParser extends Parser { 64 | constructor(input) { 65 | super(input, allTokens, { recoveryEnabled: true }); 66 | this.RULE('parse', () => this.SUBRULE(this.andExpression)); 67 | 68 | // And has the lowest precedence thus it is first in the rule chain (think +- in math) 69 | // The precedence of binary expressions is determined by 70 | // how far down the Parse Tree the binary expression appears. 71 | this.RULE('andExpression', () => { 72 | const value = []; 73 | 74 | value.push(this.SUBRULE(this.orExpression)); 75 | this.MANY(() => { 76 | this.CONSUME(And); 77 | // the index "2" in SUBRULE2 is needed to 78 | // identify the unique position in the grammar during runtime 79 | value.push(this.SUBRULE2(this.orExpression)); 80 | }); 81 | if (value.length === 1) { 82 | return value[0]; 83 | } 84 | return generateAndBranch(value); 85 | }); 86 | 87 | // Or has the higher precedence (think */ in math) 88 | this.RULE('orExpression', () => { 89 | const value = []; 90 | 91 | value.push(this.SUBRULE(this.atomicExpression)); 92 | this.MANY(() => { 93 | this.CONSUME(Or); 94 | value.push(this.SUBRULE2(this.atomicExpression)); 95 | }); 96 | if (value.length === 1) { 97 | return value[0]; 98 | } 99 | return generateOrBranch(value); 100 | }); 101 | 102 | this.RULE('atomicExpression', () => this.OR([ 103 | { ALT: () => this.SUBRULE(this.parenthesisExpression) }, 104 | { ALT: () => this.CONSUME(Module).image }, 105 | ], 'a module or parenthesis expression')); 106 | 107 | // parenthesisExpression has the highest precedence and thus it appears 108 | // in the "lowest" leaf in the expression ParseTree. 109 | this.RULE('parenthesisExpression', () => { 110 | this.CONSUME(LeftBracket); 111 | const expValue = this.SUBRULE(this.parse); 112 | this.CONSUME(RightBracket); 113 | return expValue; 114 | }); 115 | 116 | // very important to call this after all the rules have been defined. 117 | // otherwise the parser may not work correctly as it will lack information 118 | // derived during the self analysis phase. 119 | Parser.performSelfAnalysis(this); 120 | } 121 | 122 | // avoids inserting module literals as these can have multiple(and infinite) semantic values 123 | canTokenTypeBeInsertedInRecovery(tokClass) { // eslint-disable-line class-methods-use-this 124 | return tokClass !== Module; 125 | } 126 | } 127 | 128 | // removes unneeded `or` and `and` operators, recursively while noting brackets 129 | function cleanOperators(tokens) { 130 | const output = []; 131 | let temp = []; 132 | let bracketsCount = 0; 133 | tokens.forEach((token) => { 134 | const image = token.image; 135 | if (bracketsCount === 0 && image !== '(' && image !== ')') { 136 | output.push(token); 137 | return; 138 | } 139 | 140 | temp.push(token); 141 | if (image === '(') { 142 | bracketsCount += 1; 143 | } else if (image === ')') { 144 | bracketsCount -= 1; 145 | if (bracketsCount === 0) { 146 | // recursive clean within parenthesis, unnests one layer 147 | const cleaned = cleanOperators(temp.slice(1, -1)); 148 | if (cleaned.length) { 149 | output.push(R.head(temp)); 150 | output.push(...cleaned); 151 | output.push(R.last(temp)); 152 | } 153 | temp = []; 154 | } 155 | } 156 | }); 157 | 158 | const findFirstRelevant = R.findIndex((token) => { 159 | const image = token.image; 160 | return MODULE_REGEX.test(image) || image === '('; 161 | }); 162 | const findLastRelevant = R.findLastIndex((token) => { 163 | const image = token.image; 164 | return MODULE_REGEX.test(image) || image === ')'; 165 | }); 166 | const processedTokens = output.slice(findFirstRelevant(output), findLastRelevant(output) + 1); 167 | 168 | const removedDuplicates = processedTokens.filter((item, pos, arr) => { 169 | // always keep the first and last element 170 | if (pos === 0 || pos === arr.length - 1) { 171 | return true; 172 | } 173 | const currentImage = item.image; 174 | const nextImage = arr[pos + 1].image; 175 | // then check if each element is different than the one before it 176 | return !(AND_OR_REGEX.test(currentImage) && AND_OR_REGEX.test(nextImage)); 177 | }); 178 | return removedDuplicates; 179 | } 180 | 181 | /** 182 | * Parses the prerequisite string to produce the tokenized form. 183 | * @see __tests__/genReqTree.test.js 184 | * @param {String} pre The prerequisite string 185 | * @param {bunyan} log The bunyan logger 186 | */ 187 | function parseString(pre, log) { 188 | const findModules = R.match(new RegExp(MODULE_REGEX, 'g')); 189 | const moduleMatches = findModules(pre); 190 | if (moduleMatches.length === 0) { 191 | return null; 192 | } else if (moduleMatches.length === 1) { 193 | // e.g. 'CS1010' or 'CS1010 Introduction to Computer Science' 194 | return moduleMatches[0]; 195 | } else if (!pre.includes(OPERATORS.or) && !pre.includes(OPERATORS.and)) { 196 | // e.g. 'CS1010 CS1231 Some module title' 197 | return generateOrBranch(moduleMatches); 198 | } else if (!pre.includes(OPERATORS.or)) { 199 | // e.g. 'CS1010 and CS1231' 200 | return generateAndBranch(moduleMatches); 201 | } else if (!pre.includes(OPERATORS.and)) { 202 | // e.g. 'CS1010 or CS1231' 203 | return generateOrBranch(moduleMatches); 204 | } 205 | 206 | // check that all brackets are fully enclosed 207 | if (R.match(/\(/g, pre).length !== R.match(/\)/g, pre).length) { 208 | log.error(`pre ${pre}'s brackets do not self enclose.`); 209 | } 210 | 211 | const lexingResult = ReqTreeLexer.tokenize(pre); 212 | const tokens = cleanOperators(lexingResult.tokens); 213 | // log.debug(tokens); 214 | 215 | const parser = new ReqTreeParser(tokens); 216 | const result = parser.parse(); 217 | if (parser.errors.length > 0) { 218 | log.error(`'${pre}' encoutered parsing errors:\n${parser.errors}`); 219 | // log.info(tokens) 220 | } 221 | return result; 222 | } 223 | 224 | export default parseString; 225 | export { cleanOperators }; 226 | -------------------------------------------------------------------------------- /gulp-tasks/local/genReqTree/parseString.test.js: -------------------------------------------------------------------------------- 1 | import parseString, { 2 | cleanOperators, 3 | } from './parseString'; 4 | 5 | describe('cleanOperators', () => { 6 | const andToken = { image: 'and' }; 7 | const orToken = { image: 'or' }; 8 | const moduleToken = { image: 'CS1000' }; 9 | 10 | const leftBracketToken = { image: '(' }; 11 | const rightBracketToken = { image: ')' }; 12 | 13 | it('cleans excess operators from simple strings', () => { 14 | const tokens = [ 15 | andToken, 16 | moduleToken, 17 | orToken, 18 | andToken, 19 | ]; 20 | expect(cleanOperators(tokens)).toEqual([moduleToken]); 21 | }); 22 | 23 | it('cleans excess operators within parenthesis', () => { 24 | const tokens = [ 25 | leftBracketToken, 26 | andToken, 27 | moduleToken, 28 | orToken, 29 | rightBracketToken, 30 | ]; 31 | expect(cleanOperators(tokens)).toEqual([leftBracketToken, moduleToken, rightBracketToken]); 32 | }); 33 | 34 | it('cleans excess operators outside and within parenthesis', () => { 35 | const tokens = [ 36 | orToken, 37 | leftBracketToken, 38 | andToken, 39 | moduleToken, 40 | orToken, 41 | rightBracketToken, 42 | andToken, 43 | ]; 44 | expect(cleanOperators(tokens)).toEqual([leftBracketToken, moduleToken, rightBracketToken]); 45 | }); 46 | 47 | it('cleans excess operators within nested parenthesis', () => { 48 | const tokens = [ 49 | leftBracketToken, 50 | orToken, 51 | leftBracketToken, 52 | andToken, 53 | moduleToken, 54 | orToken, 55 | rightBracketToken, 56 | andToken, 57 | rightBracketToken, 58 | ]; 59 | expect(cleanOperators(tokens)).toEqual([ 60 | leftBracketToken, 61 | leftBracketToken, 62 | moduleToken, 63 | rightBracketToken, 64 | rightBracketToken, 65 | ]); 66 | }); 67 | 68 | it('cleans excess operators within nested parenthesis', () => { 69 | const tokens = [ 70 | leftBracketToken, 71 | leftBracketToken, 72 | andToken, 73 | moduleToken, 74 | orToken, 75 | rightBracketToken, 76 | rightBracketToken, 77 | ]; 78 | expect(cleanOperators(tokens)).toEqual([ 79 | leftBracketToken, 80 | leftBracketToken, 81 | moduleToken, 82 | rightBracketToken, 83 | rightBracketToken, 84 | ]); 85 | }); 86 | 87 | it('does not throw with empty parenthesis', () => { 88 | const tokens = [ 89 | leftBracketToken, 90 | rightBracketToken, 91 | ]; 92 | expect(cleanOperators(tokens)).toEqual([]); 93 | }); 94 | }); 95 | 96 | describe('parseString', () => { 97 | it('parses single module to a leaf', () => { 98 | expect(parseString('CS1000')).toEqual('CS1000'); 99 | }); 100 | 101 | it('parses simple strings in `or` form', () => { 102 | expect(parseString('CS1000 or CS1001')).toEqual({ 103 | or: [ 104 | 'CS1000', 105 | 'CS1001', 106 | ], 107 | }); 108 | }); 109 | 110 | it('parses simple strings in `and` form', () => { 111 | expect(parseString('CS1000 and CS1001')).toEqual({ 112 | and: [ 113 | 'CS1000', 114 | 'CS1001', 115 | ], 116 | }); 117 | }); 118 | 119 | it('parses left to right order for `CS1000 and CS1001 or CS1002`', () => { 120 | const result = { 121 | and: [ 122 | 'CS1000', 123 | { 124 | or: [ 125 | 'CS1001', 126 | 'CS1002', 127 | ], 128 | }, 129 | ], 130 | }; 131 | expect(parseString('CS1000 and CS1001 or CS1002')).toEqual(result); 132 | }); 133 | 134 | it('parses left to right order for `CS1000 or CS1001 and CS1002`', () => { 135 | const result = { 136 | and: [ 137 | { 138 | or: [ 139 | 'CS1000', 140 | 'CS1001', 141 | ], 142 | }, 143 | 'CS1002', 144 | ], 145 | }; 146 | expect(parseString('CS1000 or CS1001 and CS1002')).toEqual(result); 147 | }); 148 | 149 | it('parses left to right order for very complex queries multiple(`or` `and`)', () => { 150 | const result = { 151 | and: [ 152 | { 153 | or: [ 154 | 'CS1000', 155 | 'CS1001', 156 | ], 157 | }, 158 | { 159 | or: [ 160 | 'CS1002', 161 | 'CS1003', 162 | ], 163 | }, 164 | ], 165 | }; 166 | expect(parseString('CS1000 or CS1001 and CS1002 or CS1003')).toEqual(result); 167 | }); 168 | 169 | it('parses strings with excess `or` operator', () => { 170 | expect(parseString('or CS1000')).toEqual('CS1000'); 171 | expect(parseString('CS1000 or')).toEqual('CS1000'); 172 | }); 173 | 174 | it('parses strings with excess `and` operator', () => { 175 | expect(parseString('and CS1000')).toEqual('CS1000'); 176 | expect(parseString('CS1000 and')).toEqual('CS1000'); 177 | expect(parseString('(CS1000 and)')).toEqual('CS1000'); 178 | }); 179 | 180 | it('parses strings with duplicate `and` operator', () => { 181 | expect(parseString('CS1000 and and CS1001')).toEqual({ 182 | and: [ 183 | 'CS1000', 184 | 'CS1001', 185 | ], 186 | }); 187 | }); 188 | 189 | it('parses strings with duplicate `or` operator', () => { 190 | expect(parseString('CS1000 or or CS1001')).toEqual({ 191 | or: [ 192 | 'CS1000', 193 | 'CS1001', 194 | ], 195 | }); 196 | }); 197 | 198 | it('parses strings with parenthesis that have no modules in between', () => { 199 | expect(parseString('CS1000 ()')).toEqual('CS1000'); 200 | }); 201 | 202 | it('parses strings with operators that have no modules in between', () => { 203 | expect(parseString('CS1000 or and CS1001')).toEqual({ 204 | and: [ 205 | 'CS1000', 206 | 'CS1001', 207 | ], 208 | }); 209 | }); 210 | }); 211 | -------------------------------------------------------------------------------- /gulp-tasks/local/mergeCorsBiddingStats.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | 6 | /** 7 | * Merges all years of corsBddingStats together. 8 | * By default outputs to: 9 | * - corsBiddingStatsRaw.json 10 | */ 11 | 12 | const log = bunyan.createLogger({ name: 'mergeCorsBiddingStats' }); 13 | 14 | async function mergeCorsBiddingStats(config) { 15 | const unavailableSems = []; 16 | const toRead = config.map(({ year, semester, destFolder, destFileName }) => { 17 | const acadYear = `${year}-${year + 1}`; 18 | const pathToRead = path.join( 19 | destFolder, 20 | acadYear, 21 | `${semester}`, 22 | destFileName, 23 | ); 24 | return fs.readJson(pathToRead).catch(() => { 25 | unavailableSems.push(`${acadYear} sem ${semester}`); 26 | return []; 27 | }); 28 | }); 29 | const data = await Promise.all(toRead); 30 | log.info(`${unavailableSems.join(', ')} data could not be found, continuing...`); 31 | 32 | const merge = R.pipe( 33 | R.filter(R.identity), 34 | R.unnest, 35 | ); 36 | const corsBddingStats = merge(data); 37 | 38 | const thisConfig = R.last(config); 39 | const pathToWrite = path.join( 40 | thisConfig.destFolder, 41 | thisConfig.destFileName, 42 | ); 43 | log.info(`saving to ${pathToWrite}`); 44 | await fs.outputJson(pathToWrite, corsBddingStats, { spaces: thisConfig.jsonSpace }); 45 | } 46 | 47 | export default mergeCorsBiddingStats; 48 | -------------------------------------------------------------------------------- /gulp-tasks/local/splitForSem.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | import timify from '../utils/timify'; 6 | 7 | /** 8 | * Splits semester data into different chunks. 9 | * By default outputs to: 10 | * - moduleCodes.json 11 | * - moduleList.json 12 | * - timetable.json 13 | * - moduleInformation.json 14 | * - venueInformation.json 15 | * - modules/XModule.json 16 | * - modules/XModule/CorsBiddingStats.json 17 | * - modules/XModule/ivle.json 18 | * - modules/XModule/timetable.json 19 | * - modules/XModule/index.json 20 | */ 21 | 22 | const SCHOOL_START_HOUR = '0600'; 23 | const SCHOOL_END_HOUR = '2400'; 24 | 25 | const log = bunyan.createLogger({ name: 'splitForSem' }); 26 | 27 | async function splitForSem(config) { 28 | const { year, semester } = config; 29 | const subLog = log.child({ year, semester }); 30 | 31 | const basePath = path.join( 32 | config.consolidate.destFolder, 33 | `${year}-${year + 1}`, 34 | `${semester}`, 35 | ); 36 | const pathToRead = path.join(basePath, config.consolidate.destFileName); 37 | const listOfModules = await fs.readJson(pathToRead); 38 | 39 | const thisConfig = config.split; 40 | 41 | function outputData(pathToWrite, data) { 42 | subLog.info(`saving to ${pathToWrite}`); 43 | fs.outputJson(pathToWrite, data, { spaces: thisConfig.jsonSpace }); 44 | } 45 | function write(destPath, func) { 46 | const pathToWrite = path.join( 47 | basePath, 48 | destPath, 49 | ); 50 | const data = func(listOfModules); 51 | return outputData(pathToWrite, data); 52 | } 53 | 54 | // moduleCodes.json 55 | // output: ['CS1010', ... ] 56 | write( 57 | thisConfig.destModuleCodes, 58 | R.pluck('ModuleCode'), 59 | ); 60 | 61 | // moduleList.json 62 | // output: { 'CS1010': 'Introduction to Computer Science', ... } 63 | const collateModuleTitles = R.pipe( 64 | R.indexBy(R.prop('ModuleCode')), 65 | R.map(R.prop('ModuleTitle')), 66 | ); 67 | write( 68 | thisConfig.destModuleList, 69 | collateModuleTitles, 70 | ); 71 | 72 | // timetable.json 73 | write( 74 | thisConfig.destTimetableInformation, 75 | R.map(R.pick([ 76 | 'ModuleCode', 77 | 'ModuleTitle', 78 | 'Timetable', 79 | ])), 80 | ); 81 | 82 | // moduleInformation.json 83 | write( 84 | thisConfig.destModuleInformation, 85 | R.map(R.pick([ 86 | 'ModuleCode', 87 | 'ModuleTitle', 88 | 'Department', 89 | 'ModuleDescription', 90 | 'CrossModule', 91 | 'ModuleCredit', 92 | 'Workload', 93 | 'Prerequisite', 94 | 'Preclusion', 95 | 'Corequisite', 96 | 'ExamDate', 97 | 'Types', 98 | 'Lecturers', 99 | 'LecturePeriods', 100 | 'TutorialPeriods', 101 | ])), 102 | ); 103 | 104 | // venueInformation.json 105 | const getLessons = R.chain((module) => { 106 | const timetable = module.Timetable || []; 107 | // insert 'ModuleCode' key into lessons 108 | return timetable.map(R.assoc('ModuleCode', module.ModuleCode)); 109 | }); 110 | const processTimetables = R.map((venueTimetable) => { 111 | const schoolDays = timify.getSchoolDays(); 112 | // remove 'Venue' key from lessons 113 | const timetable = R.map(R.omit('Venue'), venueTimetable); 114 | return schoolDays.map((day) => { 115 | const lessons = R.filter(lesson => lesson.DayText === day, timetable); 116 | 117 | // Outputs the following: 118 | // availability: { 119 | // "0800": "vacant", 120 | // "0830": "vacant", 121 | // "0900": "occupied", 122 | // "0930": "occupied", 123 | // ... 124 | // "2330": "vacant" 125 | // } 126 | const availability = {}; 127 | timify.getTimeRange(SCHOOL_START_HOUR, SCHOOL_END_HOUR).forEach((time) => { 128 | availability[time] = 'vacant'; 129 | }); 130 | 131 | // for each time slot that contains lesson, label as occupied 132 | lessons.forEach((lesson) => { 133 | timify.getTimeRange(lesson.StartTime, lesson.EndTime).forEach((time) => { 134 | availability[time] = 'occupied'; 135 | }); 136 | }); 137 | 138 | return { 139 | Day: day, 140 | Classes: lessons, 141 | Availability: availability, 142 | }; 143 | }); 144 | }); 145 | 146 | const collateVenues = R.pipe( 147 | getLessons, 148 | R.groupBy(R.prop('Venue')), 149 | R.omit(''), // Delete empty venue string 150 | processTimetables, 151 | ); 152 | write( 153 | thisConfig.destVenueInformation, 154 | collateVenues, 155 | ); 156 | 157 | // modules/*.json 158 | // modules/*/CorsBiddingStats.json 159 | // modules/*/ivle.json 160 | // modules/*/timetable.json 161 | // modules/*/index.json 162 | function writeModule(module) { 163 | const subBasePath = path.join( 164 | basePath, 165 | thisConfig.destSubfolder, 166 | ); 167 | const fileNameToData = { 168 | '': module, 169 | index: module, 170 | corsbiddingstats: module.CorsBiddingStats || [], 171 | ivle: module.IVLE || [], 172 | timetable: module.Timetable || [], 173 | }; 174 | 175 | const moduleCode = module.ModuleCode; 176 | return Object.entries(fileNameToData).map(([fileName, data]) => { 177 | let pathToWrite = path.join(subBasePath, moduleCode, `${fileName}.json`); 178 | if (fileName === '') { // save to parent folder instead of module folder 179 | pathToWrite = path.join(subBasePath, `${moduleCode}.json`); 180 | } 181 | return outputData(pathToWrite, data); 182 | }); 183 | } 184 | await Promise.all(R.chain(writeModule, listOfModules)); 185 | } 186 | 187 | export default splitForSem; 188 | -------------------------------------------------------------------------------- /gulp-tasks/local/splitForYear.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | 6 | /** 7 | * Splits semester data into different chunks. 8 | * By default outputs to: 9 | * - moduleCodes.json 10 | * - moduleList.json 11 | * - timetable.json 12 | * - moduleInformation.json 13 | * - venueInformation.json 14 | * And indivually write each module's information to: 15 | * - modules/XModule.json 16 | * - modules/XModule/CorsBiddingStats.json 17 | * - modules/XModule/ivle.json 18 | * - modules/XModule/timetable.json 19 | * - modules/XModule/index.json 20 | */ 21 | 22 | const log = bunyan.createLogger({ name: 'splitForYear' }); 23 | 24 | async function splitForYear(config) { 25 | const { year } = config; 26 | const subLog = log.child({ year }); 27 | 28 | const acadYear = `${year}/${year + 1}`; 29 | const basePath = path.join( 30 | config.split.destFolder, 31 | acadYear.replace('/', '-'), 32 | ); 33 | 34 | const consolidatedPath = path.join( 35 | config.consolidate.destFolder, 36 | acadYear.replace('/', '-'), 37 | config.consolidate.destFileName, 38 | ); 39 | const modules = await fs.readJson(consolidatedPath); 40 | 41 | async function write(fileName, data) { 42 | const pathToWrite = path.join( 43 | basePath, 44 | fileName, 45 | ); 46 | subLog.info(`saving to ${pathToWrite}`); 47 | await fs.outputJson(pathToWrite, data, { spaces: config.split.jsonSpace }); 48 | } 49 | 50 | const moduleList = []; 51 | const moduleInformation = []; 52 | 53 | modules.forEach((mod) => { 54 | const pathToWrite = path.join( 55 | config.split.destSubfolder, 56 | `${mod.ModuleCode}.json`, 57 | ); 58 | write(pathToWrite, mod); 59 | 60 | const module = R.pick([ 61 | 'ModuleCode', 62 | 'ModuleTitle', 63 | ], mod); 64 | moduleList.push({ 65 | ...module, 66 | Semesters: R.pluck('Semester', mod.History), 67 | }); 68 | 69 | const info = R.pick([ 70 | 'ModuleCode', 71 | 'ModuleTitle', 72 | 'Department', 73 | 'ModuleDescription', 74 | 'CrossModule', 75 | 'ModuleCredit', 76 | 'Workload', 77 | 'Prerequisite', 78 | 'Preclusion', 79 | 'Corequisite', 80 | 'Types', 81 | ], mod); 82 | info.History = mod.History.map(R.omit(['Timetable', 'IVLE'])); 83 | moduleInformation.push(info); 84 | 85 | const pathToWriteInformation = path.join( 86 | config.split.destSubfolder, 87 | mod.ModuleCode, 88 | 'index.json', 89 | ); 90 | write(pathToWriteInformation, info); 91 | }); 92 | 93 | await Promise.all([ 94 | write(config.split.destModuleList, moduleList), 95 | write(config.split.destModuleInformation, moduleInformation), 96 | ]); 97 | } 98 | 99 | export default splitForYear; 100 | -------------------------------------------------------------------------------- /gulp-tasks/remote/bulletinModules.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import querystring from 'querystring'; 3 | import R from 'ramda'; 4 | import fs from 'fs-extra'; 5 | import bunyan from 'bunyan'; 6 | import gotCached from '../utils/gotCached'; 7 | import titleize from '../utils/titleize'; 8 | import sortByKey from '../utils/sortByKey'; 9 | 10 | /** 11 | * Outputs bulletin modules without changing any of the data, 12 | * and also outputs departments under each faculty. 13 | * By default outputs to: 14 | * - bulletinModulesRaw.json 15 | * - facultyDepartments.json 16 | */ 17 | 18 | const log = bunyan.createLogger({ name: 'bulletinModules' }); 19 | 20 | async function parseBulletinModules(config) { 21 | const rootUrl = config.ivleApi.baseUrl; 22 | const query = querystring.stringify({ 23 | APIKey: config.ivleApi.key, 24 | Semester: config.semester, 25 | TitleOnly: false, 26 | }); 27 | const url = `${rootUrl}Bulletin_Module_Search?${query}`; 28 | 29 | const fileData = await gotCached(url, config); 30 | const modules = JSON.parse(fileData).Results; 31 | const data = R.groupBy(R.prop('AcadYear'), modules); 32 | return data; 33 | } 34 | 35 | function parseFacultyDepartment(modules) { 36 | const facultyDepartments = {}; 37 | modules.forEach((module) => { 38 | const faculty = titleize(module.Faculty); 39 | const department = titleize(module.Department); 40 | facultyDepartments[faculty] = facultyDepartments[faculty] || []; 41 | facultyDepartments[faculty].push(department); 42 | }); 43 | const withoutDuplicateDepartments = R.map(R.uniq, facultyDepartments); 44 | return sortByKey(withoutDuplicateDepartments); 45 | } 46 | 47 | async function bulletinModules(config) { 48 | const bulletinData = await parseBulletinModules(config); 49 | const semester = config.semester; 50 | 51 | const toWrite = []; 52 | Object.entries(bulletinData).forEach(([academicYear, modules]) => { 53 | const subLog = log.child({ academicYear, semester }); 54 | const facultyDepartment = parseFacultyDepartment(modules); 55 | subLog.info(`parsed ${modules.length} bulletin modules`); 56 | 57 | async function write(fileName, data) { 58 | const pathToWrite = path.join( 59 | config.destFolder, 60 | academicYear.replace('/', '-'), 61 | `${semester}`, 62 | fileName, 63 | ); 64 | subLog.info(`saving to ${pathToWrite}`); 65 | await fs.outputJson(pathToWrite, data, { spaces: config.jsonSpace }); 66 | } 67 | toWrite.push(write(config.destFileName, modules)); 68 | toWrite.push(write(config.destFacultyDepartments, facultyDepartment)); 69 | }); 70 | await Promise.all(toWrite); 71 | } 72 | 73 | export default bulletinModules; 74 | -------------------------------------------------------------------------------- /gulp-tasks/remote/cors.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | import cheerio from 'cheerio'; 6 | import moment from 'moment'; 7 | import Promise from 'bluebird'; 8 | import gotCached from '../utils/gotCached'; 9 | import sortByKey from '../utils/sortByKey'; 10 | 11 | /** 12 | * Outputs cors data for regular sems (1 & 2) or 13 | * special sems (3 & 4). 14 | * Also outputs lesson types that are either 15 | * lectures or tutorials. 16 | * By default outputs to: 17 | * - corsRaw.json 18 | * - lessonTypes.json 19 | */ 20 | 21 | const REGULAR_SEMESTER = 'regular semester'; 22 | const SPECIAL_SEMESTER = 'special semester'; 23 | 24 | const TIMESTAMP_REGEX = /Correct as at ([^<]+)/; 25 | const ACADEMIC_YEAR_REGEX = /\d{4}\W\d{4}/; 26 | 27 | const DATE_FORMAT = 'DD-MM-YYYY'; 28 | 29 | const LESSON_TYPES = ['Lecture', 'Tutorial']; 30 | const ROOT_URLS = { 31 | [REGULAR_SEMESTER]: 'https://myaces.nus.edu.sg/cors/jsp/report/', 32 | [SPECIAL_SEMESTER]: 'https://myaces.nus.edu.sg/sts/jsp/report/', 33 | }; 34 | 35 | const MODULE_TYPES = [ 36 | 'Module', 37 | 'GEM2015', 38 | 'GEM', 39 | 'SSM', 40 | 'UEM', 41 | 'CFM', 42 | ]; 43 | 44 | const log = bunyan.createLogger({ name: 'cors' }); 45 | 46 | function processModulePage(webpage, moduleInfo) { 47 | const $ = cheerio.load(webpage); 48 | const timestamp = $('h2').text().match(TIMESTAMP_REGEX).pop(); 49 | 50 | // first table consist of details of the module 51 | const moduleDetails = $('.tableframe').first().find('tr td:nth-child(2)'); 52 | const timetable = []; 53 | 54 | // get the timetable info 55 | const timetableTables = $('.tableframe').find('tr table'); 56 | timetableTables.each((i, table) => { 57 | // remove inner header and empty rows 58 | const rows = $('tr', table) 59 | .slice(1) 60 | .filter((i, el) => $('td', el).length > 6); 61 | 62 | // get all the relevant information 63 | const timetableDetails = rows.map((i, el) => { 64 | const row = $('td', el); 65 | return { 66 | ClassNo: row.eq(0).text().trim(), 67 | LessonType: row.eq(1).text(), 68 | WeekText: row.eq(2).text().replace(/\u00a0/g, ' '), 69 | DayText: row.eq(3).text(), 70 | StartTime: row.eq(4).text(), 71 | EndTime: row.eq(5).text(), 72 | Venue: row.eq(6).text(), 73 | }; 74 | }).get(); 75 | 76 | timetable.push(...timetableDetails); 77 | }); 78 | 79 | const examText = moduleDetails.eq(4).text().trim(); 80 | if (examText !== 'No Exam Date.') { 81 | const date = R.head(examText.split(' ')); 82 | const examMoment = moment(date, DATE_FORMAT, true); 83 | if (!examMoment.isValid()) { 84 | throw new Error(`Module ${moduleInfo.moduleCode}'s date format is wrong: ${date}`); 85 | } 86 | } 87 | return { 88 | Type: moduleInfo.type, 89 | ModuleCode: moduleInfo.moduleCode, 90 | Department: moduleInfo.department, 91 | CorrectAsAt: timestamp, 92 | ModuleTitle: moduleDetails.eq(1).text(), 93 | ModuleDescription: moduleDetails.eq(2).text(), 94 | ExamDate: examText, 95 | ModuleCredit: moduleDetails.eq(5).text(), 96 | Prerequisite: moduleDetails.eq(6).text(), 97 | Preclusion: moduleDetails.eq(7).text(), 98 | Workload: moduleDetails.eq(8).text(), 99 | Timetable: timetable, 100 | }; 101 | } 102 | 103 | function processLessonTypes(webpage, lessonTypes) { 104 | const $ = cheerio.load(webpage); 105 | 106 | const tableframes = $('.tableframe').slice(1); 107 | tableframes.each((i, tableframe) => { 108 | const tables = $(tableframe).find('tr table'); 109 | if (tables.empty()) return; 110 | const table = tables[0]; 111 | 112 | const lessonType = LESSON_TYPES[i]; 113 | // remove inner header and empty rows 114 | const rows = $('tr', table) 115 | .slice(1) 116 | .filter((i, el) => $('td', el).length > 6); 117 | 118 | rows.each((i, el) => { 119 | const key = $('td', el).eq(1).text(); 120 | 121 | const originalVal = lessonTypes[key]; 122 | // throw if original value is different from the new one 123 | if (originalVal && originalVal !== lessonType) { 124 | throw new Error(`lessonTypes ${key} conflict: ${originalVal} vs ${lessonType}`); 125 | } 126 | lessonTypes[key] = lessonType; // eslint-disable-line no-param-reassign 127 | }); 128 | }); 129 | } 130 | 131 | async function processListings(rootUrl, type, lessonTypes, config) { 132 | const url = `${rootUrl}${type}InfoListing.jsp`; 133 | const webpage = await gotCached(url, config); 134 | const $ = cheerio.load(webpage); 135 | const listingInfo = $('h2').text().split(':'); 136 | 137 | const academicYear = listingInfo[1].match(ACADEMIC_YEAR_REGEX).shift(); 138 | const semester = listingInfo[2].match(/\d/).shift(); 139 | 140 | const listOfModuleInfo = $('tr[valign="top"]').toArray(); 141 | 142 | async function processModuleInfo(row) { 143 | const hyperlink = $('div > a', row); 144 | 145 | const urlStr = `${rootUrl}${hyperlink.prop('href')}`; 146 | const page = await gotCached(urlStr, config); 147 | 148 | processLessonTypes(page, lessonTypes); 149 | 150 | const moduleInfo = { 151 | type, 152 | moduleCode: hyperlink.html().trim(), 153 | department: $('td div', row).last().text().trim(), 154 | }; 155 | 156 | const moduleData = await processModulePage(page, moduleInfo); 157 | return moduleData; 158 | } 159 | const modules = await Promise.map(listOfModuleInfo, processModuleInfo, 160 | { concurrency: config.concurrency }); 161 | return { 162 | academicYear, 163 | semester, 164 | modules, 165 | }; 166 | } 167 | 168 | async function cors(config) { 169 | const semesterCategory = config.semester; 170 | const subLog = log.child({ semesterCategory }); 171 | 172 | const lessonTypesPath = path.join( 173 | config.destFolder, 174 | config.destLessonTypes, 175 | ); 176 | const lessonTypes = await fs.readJson(lessonTypesPath).catch(() => { 177 | subLog.warn(`Failed to read ${lessonTypesPath}, proceeding with empty object`); 178 | return {}; 179 | }); 180 | const url = ROOT_URLS[semesterCategory]; 181 | const modulesByTypes = MODULE_TYPES.map(type => processListings(url, type, lessonTypes, config)); 182 | const modulesByAcadYearAndSem = await Promise.all(modulesByTypes); 183 | 184 | function pluckSingle(property) { 185 | const props = R.uniq(R.pluck(property, modulesByAcadYearAndSem)); 186 | if (props.length > 1) { 187 | throw new Error(`${property} should only contain single piece of data, found ${props}`); 188 | } 189 | return R.head(props); 190 | } 191 | const academicYear = pluckSingle('academicYear'); 192 | const semester = pluckSingle('semester'); 193 | const modules = R.chain(R.prop('modules'), modulesByAcadYearAndSem); 194 | subLog.info(`parsed ${modules.length} cors modules`); 195 | 196 | subLog.info(`saving to ${lessonTypesPath}`); 197 | await fs.outputJson(lessonTypesPath, sortByKey(lessonTypes), { spaces: config.jsonSpace }); 198 | 199 | const pathToWrite = path.join( 200 | config.destFolder, 201 | academicYear.replace('/', '-'), 202 | semester, 203 | config.destFileName, 204 | ); 205 | subLog.info(`saving to ${pathToWrite}`); 206 | await fs.outputJson(pathToWrite, modules, { spaces: config.jsonSpace }); 207 | return modules; 208 | } 209 | 210 | export default cors; 211 | export { 212 | REGULAR_SEMESTER, 213 | SPECIAL_SEMESTER, 214 | DATE_FORMAT, 215 | }; 216 | -------------------------------------------------------------------------------- /gulp-tasks/remote/corsBiddingStats.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | import cheerio from 'cheerio'; 6 | import gotCached from '../utils/gotCached'; 7 | 8 | /** 9 | * Outputs cors bidding stats for one semester. 10 | * By default outputs to: 11 | * - corsBiddingStatsRaw.json 12 | */ 13 | 14 | const CORS_URL = 'http://www.nus.edu.sg/cors/'; 15 | const CORS_ARCHIVE_URL = `${CORS_URL}archive.html`; 16 | const BID_RESULTS_LINK_SELECTOR = 'a[href*="successbid"]'; 17 | const BID_RESULTS_ROW_SELECTOR = 'body > table > tr[valign=top]'; 18 | 19 | // eslint-disable-next-line max-len 20 | const biddingSummaryUrlPattern = /Archive\/(\d{4})\d{2}_Sem(\d)\/successbid_(\d[A-F])_\d{4,8}s\d\.html/; 21 | const statsKeys = [ 22 | 'Quota', 23 | 'Bidders', 24 | 'LowestBid', 25 | 'LowestSuccessfulBid', 26 | 'HighestBid', 27 | 'Faculty', 28 | 'StudentAcctType', 29 | ]; 30 | 31 | const log = bunyan.createLogger({ name: 'corsBiddingStats' }); 32 | 33 | async function processBiddingStats(href, config) { 34 | const url = href.startsWith('.') ? `${CORS_URL}${href}` : href; 35 | const urlMatch = biddingSummaryUrlPattern.exec(url); 36 | 37 | const webpage = await gotCached(url, config); 38 | const $ = cheerio.load(webpage); 39 | // some pages have 2 tables, we want the table that is a direct descendant of body 40 | // this selector get rids of all non-data tr (such as headers) 41 | // cors should really use th for headers... 42 | const trs = $(BID_RESULTS_ROW_SELECTOR); 43 | 44 | let moduleCode; 45 | let group; 46 | 47 | const biddingResults = trs.map((i, tr) => { 48 | const ps = $('p', tr); 49 | 50 | // there are 2 kinds of rows 51 | // 1. rows with module code (which has 9 p nodes) 52 | // 2. rows without belong to a previous row that has a module code (8 p nodes) 53 | // when we meet row of kind 1, we store the module and group info to be used 54 | // by rows of type 2 that follows it 55 | if (ps.length === 9) { 56 | moduleCode = $(ps[0]).text(); 57 | group = $(ps[1]).text(); 58 | } 59 | 60 | const statsArray = ps.slice(ps.length - 7).map((i, el) => $(el).text()); 61 | 62 | return { 63 | ...R.zipObj(statsKeys, statsArray), 64 | AcadYear: `${urlMatch[1]}/${parseInt(urlMatch[1], 10) + 1}`, 65 | Semester: urlMatch[2], 66 | Round: urlMatch[3], 67 | ModuleCode: moduleCode, 68 | Group: group, 69 | }; 70 | }); 71 | 72 | return biddingResults.get(); 73 | } 74 | 75 | async function corsBiddingStats(config) { 76 | const { year, semester } = config; 77 | const subLog = log.child({ year, semester }); 78 | 79 | const webpage = await gotCached(CORS_ARCHIVE_URL, config); 80 | const $ = cheerio.load(webpage); 81 | 82 | const urls = $(BID_RESULTS_LINK_SELECTOR).map((i, anchor) => $(anchor).prop('href')).get(); 83 | const wantedUrls = urls.filter(href => href.includes(`${year + 1}s${semester}`)); 84 | 85 | const statsByPhase = wantedUrls.map(href => processBiddingStats(href, config)); 86 | const biddingStats = R.unnest(await Promise.all(statsByPhase)); 87 | if (biddingStats.length === 0) { 88 | subLog.info('no bidding stats available, scrape ended.'); 89 | return; 90 | } 91 | subLog.info(`parsed ${biddingStats.length} bidding stats`); 92 | 93 | const pathToWrite = path.join( 94 | config.destFolder, 95 | `${year}-${year + 1}`, 96 | `${semester}`, 97 | config.destFileName, 98 | ); 99 | subLog.info(`saving to ${pathToWrite}`); 100 | await fs.outputJson(pathToWrite, biddingStats, { spaces: config.jsonSpace }); 101 | } 102 | 103 | export default corsBiddingStats; 104 | -------------------------------------------------------------------------------- /gulp-tasks/remote/examTimetable.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import moment from 'moment'; 4 | import R from 'ramda'; 5 | import bunyan from 'bunyan'; 6 | import gotCached from '../utils/gotCached'; 7 | import { getPagesTextFromPdf } from '../utils/pdf'; 8 | 9 | /** 10 | * Outputs exam data for one semester. 11 | * File type is a pdf so parsing is done entirely 12 | * with regex. 13 | * By default outputs to: 14 | * - examTimetableRaw.json 15 | */ 16 | 17 | const DATE_FORMAT = 'D/M/YYYY'; 18 | 19 | // matches dd/mm/yyyy or d/m/yy or d/m/yy 20 | const DATE_REGEX = /\d{1,2}\W\d{1,2}\W[20]{0,2}\d{2}/; 21 | // matches Mon or mon or any 3 letter words 22 | const DAY_REGEX = /\(\w{3}\)/; 23 | // matches 0900AM, 900PM or 9:00 PM 24 | const TIME_REGEX = /[0-2]?[1-9]\W?[0-5]\d\s?(?:AM|PM)?/; 25 | // matches 2 or 3 capital alphabets mixed with whitespace 26 | // followed by 4 numerics and 1 or 2 letters 27 | const CODE_REGEX = /[A-Z|\W]{2,4}[0-9]{4}(?:[A-Z]|[A-Z]R)?/; 28 | // matches multiple words in all caps with symbols and roman numerals I, V 29 | const TITLE_REGEX = /[^a-z]+[IV]*/; 30 | // matches rest of any non digit word(s) 31 | const FACULTY_REGEX = /[^\d]+/; 32 | 33 | // combined to give us this using capture groups and delimiter allowances 34 | const MODULE_REGEX = new RegExp( 35 | [ 36 | '(', 37 | DATE_REGEX.source, 38 | ')\\s?(?:', 39 | DAY_REGEX.source, 40 | ')?\\s*(', 41 | TIME_REGEX.source, 42 | ')\\s*(', 43 | CODE_REGEX.source, 44 | ')\\s*(', 45 | TITLE_REGEX.source, 46 | ')\\b(', 47 | FACULTY_REGEX.source, 48 | ')$', 49 | ].join(''), 50 | ); 51 | 52 | const log = bunyan.createLogger({ name: 'examTimetable' }); 53 | 54 | function parseModule(module, subLog) { 55 | const moduleArr = R.pipe( 56 | R.match(MODULE_REGEX), 57 | R.map(R.replace(/\s/g, ' ')), // normalize whitespace 58 | )(module); 59 | 60 | if (!moduleArr.length) { 61 | subLog.warn(`'${module}' is not a valid module`); 62 | return {}; 63 | } 64 | 65 | const date = moduleArr[1].replace(/\W/g, '/'); // replace delimiters to '/' 66 | const time = moduleArr[2].replace(' ', ''); // remove whitespace 67 | const code = moduleArr[3]; 68 | const title = moduleArr[4].trim(); 69 | const faculty = moduleArr[5]; 70 | 71 | if (!moment(date, DATE_FORMAT, true).isValid()) { 72 | throw new Error(`Module ${code}'s date format is wrong: ${date}`); 73 | } 74 | 75 | return { 76 | Date: date, 77 | Time: time, 78 | Faculty: faculty, 79 | ModuleCode: code, 80 | Title: title, 81 | }; 82 | } 83 | 84 | async function parseExamPdf(fileData, subLog) { 85 | function removeHeadersAndPageNum(pages) { 86 | return pages.map((page, index) => { 87 | const startOfData = R.findIndex(R.test(DATE_REGEX), page); 88 | const endOfData = R.findLastIndex(R.test(/[A-Za-z]+/), page) + 1; 89 | 90 | if (startOfData === -1 || endOfData === -1) { 91 | // eslint-disable-next-line max-len 92 | subLog.warn( 93 | `page ${index + 1} of pdf has no data, please visually check if this is correct`, 94 | ); 95 | return []; 96 | } 97 | return page.slice(startOfData, endOfData); 98 | }); 99 | } 100 | 101 | function modulesFromText(strings) { 102 | const modules = []; 103 | strings.forEach((str) => { 104 | if (DATE_REGEX.test(str)) { 105 | // create new module 106 | modules.push(str); 107 | } else { 108 | modules[modules.length - 1] += str; 109 | } 110 | }); 111 | return modules; 112 | } 113 | 114 | const modulesArrFromPages = R.pipe( 115 | removeHeadersAndPageNum, 116 | R.flatten, 117 | R.map(str => str.replace(/\s{2,}/g, ' ').replace(/ ‐/, '-')), 118 | modulesFromText, 119 | ); 120 | 121 | const pagesOfText = await getPagesTextFromPdf(fileData); 122 | const modulesArr = modulesArrFromPages(pagesOfText); 123 | const filterEmptyObject = R.reject(R.isEmpty); 124 | return filterEmptyObject(modulesArr.map(module => parseModule(module, subLog))); 125 | } 126 | 127 | async function examTimetable(config) { 128 | const { year, semester } = config; 129 | const subLog = log.child({ year, semester }); 130 | 131 | let url = `https://webrb.nus.edu.sg/examtt/Exam${year}`; 132 | if (semester < 3) { 133 | url += `/Semester ${semester}/Semester_${semester}_By_Date.pdf`; 134 | } else { 135 | const specialSem = semester - 2; 136 | url += `/Special Term Part ${specialSem}/Special_Term_Part${specialSem}_By_Date.pdf`; 137 | } 138 | let pdf; 139 | try { 140 | pdf = await gotCached(url, config); 141 | } catch (e) { 142 | log.error(e); 143 | log.info('Unable to download pdf file, continuing...'); 144 | return null; 145 | } 146 | const data = await parseExamPdf(pdf, subLog); 147 | 148 | subLog.info(`parsed ${data.length} exam timetables`); 149 | const pathToWrite = path.join( 150 | config.destFolder, 151 | `${year}-${year + 1}`, 152 | `${semester}`, 153 | config.destFileName, 154 | ); 155 | subLog.info(`saving to ${pathToWrite}`); 156 | await fs.outputJson(pathToWrite, data, { spaces: config.jsonSpace }); 157 | return data; 158 | } 159 | 160 | export default examTimetable; 161 | export { 162 | parseExamPdf, 163 | DATE_FORMAT, 164 | DATE_REGEX, 165 | TIME_REGEX, 166 | CODE_REGEX, 167 | TITLE_REGEX, 168 | FACULTY_REGEX, 169 | }; 170 | -------------------------------------------------------------------------------- /gulp-tasks/remote/examTimetable.test.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs-extra'; 2 | import { 3 | parseExamPdf, 4 | DATE_REGEX, 5 | TIME_REGEX, 6 | CODE_REGEX, 7 | TITLE_REGEX, 8 | // Regex too simple to justify testing 9 | FACULTY_REGEX, // eslint-disable-line 10 | } from './examTimetable'; 11 | 12 | jest.unmock('fs-extra'); 13 | 14 | describe('parseExamPdf', () => { 15 | const sublog = { warn: jest.fn() }; 16 | function matchPdfOutput(filePath) { 17 | return fs 18 | .readFile(filePath) 19 | .then(fileContent => parseExamPdf(fileContent, sublog)) 20 | .then((result) => { 21 | expect(result).toMatchSnapshot(); 22 | }); 23 | } 24 | 25 | it('scrapes all the modules for 2016 sem 1', () => 26 | matchPdfOutput('__mocks__/fixtures/test1.pdf')); 27 | it('scrapes all the modules for 2017 sem 1', () => 28 | matchPdfOutput('__mocks__/fixtures/test2.pdf')); 29 | }); 30 | 31 | function passRegex(regex, str) { 32 | expect(regex.test(str)).toBe(true); 33 | } 34 | 35 | function failRegex(regex, str) { 36 | return () => expect(regex.test(str)).toBe(false); 37 | } 38 | 39 | describe('date regex', () => { 40 | it('captures date-like sequences', () => { 41 | passRegex(DATE_REGEX, '09-09-2016'); 42 | passRegex(DATE_REGEX, '9-9-2016'); 43 | passRegex(DATE_REGEX, '09/09/2016'); 44 | passRegex(DATE_REGEX, '09-09-16'); 45 | }); 46 | 47 | it('fails with wrong month', failRegex(DATE_REGEX, '00-000-2000')); 48 | it('fails with wrong date', failRegex(DATE_REGEX, '-00-2000')); 49 | it('fails with wrong year', failRegex(DATE_REGEX, '00-00-')); 50 | it('fails with wrong delimiters', failRegex(DATE_REGEX, '00x00x2000')); 51 | }); 52 | 53 | describe('time regex', () => { 54 | it('captures time-like sequences', () => { 55 | passRegex(TIME_REGEX, '0900AM'); 56 | passRegex(TIME_REGEX, '09:00AM'); 57 | passRegex(TIME_REGEX, '900 AM'); 58 | passRegex(TIME_REGEX, '1400PM'); 59 | passRegex(TIME_REGEX, '0900'); 60 | }); 61 | 62 | it('matches only timing in case of wrong period', () => { 63 | const match = TIME_REGEX.exec('0900BM')[0]; 64 | expect(match).toBe('0900'); 65 | }); 66 | it('fails with no digits', failRegex(TIME_REGEX, '00PM')); 67 | it('fails with wrong digits', failRegex(TIME_REGEX, '0000PM')); 68 | }); 69 | 70 | describe('code regex', () => { 71 | it('captures code-like sequences', () => { 72 | passRegex(CODE_REGEX, 'CS1010'); 73 | passRegex(CODE_REGEX, 'CS1010S'); 74 | passRegex(CODE_REGEX, 'GER1010'); 75 | }); 76 | 77 | it('fails with wrong digits', failRegex(CODE_REGEX, 'CS10')); 78 | it('fails with no digits', failRegex(CODE_REGEX, 'CS')); 79 | it('fails with no prefix', failRegex(CODE_REGEX, '1010')); 80 | }); 81 | 82 | describe('title regex', () => { 83 | it('captures title-like sequences', () => { 84 | passRegex(TITLE_REGEX, 'TEST()'); 85 | passRegex(TITLE_REGEX, 'TEST[]'); 86 | passRegex(TITLE_REGEX, '1TEST:'); 87 | passRegex(TITLE_REGEX, 'TEST'); 88 | }); 89 | 90 | it('fails with no uppercase', failRegex(TITLE_REGEX, 'test')); 91 | it('fails with empty string', failRegex(TITLE_REGEX, '')); 92 | }); 93 | 94 | describe('title regex', () => { 95 | it('captures title-like sequences', () => { 96 | passRegex(TITLE_REGEX, 'TEST()'); 97 | passRegex(TITLE_REGEX, 'TEST[]'); 98 | passRegex(TITLE_REGEX, '1TEST:'); 99 | passRegex(TITLE_REGEX, 'TEST'); 100 | }); 101 | 102 | it('fails with no uppercase', failRegex(TITLE_REGEX, 'test')); 103 | it('fails with empty string', failRegex(TITLE_REGEX, '')); 104 | }); 105 | -------------------------------------------------------------------------------- /gulp-tasks/remote/ivle.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import querystring from 'querystring'; 3 | import fs from 'fs-extra'; 4 | import bunyan from 'bunyan'; 5 | import R from 'ramda'; 6 | import Promise from 'bluebird'; 7 | import gotCached from '../utils/gotCached'; 8 | 9 | /** 10 | * Deprecated: ivle api seems to have not worked last year. 11 | * 12 | * Outputs ivle data for one semester. 13 | * Fetches all modules individually, thus requiring 14 | * module codes from all other data sources. 15 | * 16 | * Changes `CourseCode` field to `ModuleCode` 17 | * to be consistent with all other data sources. 18 | * By default outputs to: 19 | * - ivleRaw.json 20 | * Requires ivle token and key. 21 | */ 22 | 23 | const log = bunyan.createLogger({ name: 'ivle' }); 24 | 25 | async function ivle(config) { 26 | const { year, semester } = config; 27 | const subLog = log.child({ year, semester }); 28 | const thisConfig = config.ivle; 29 | 30 | const basePath = path.join( 31 | thisConfig.srcFolder, 32 | `${year}-${year + 1}`, 33 | `${semester}`, 34 | ); 35 | const bulletinModulesPath = path.join( 36 | basePath, 37 | config.bulletinModules.destFileName, 38 | ); 39 | const corsPath = path.join( 40 | basePath, 41 | config.cors.destFileName, 42 | ); 43 | const examTimetablePath = path.join( 44 | basePath, 45 | config.examTimetable.destFileName, 46 | ); 47 | const moduleTimetableDeltaPath = path.join( 48 | basePath, 49 | config.moduleTimetableDelta.destFileName, 50 | ); 51 | 52 | // Get module codes from all preceding tasks 53 | let moduleCodes = []; 54 | async function populateModuleCodes(jsonPath, keyOrFunc) { 55 | try { 56 | const data = await fs.readJson(jsonPath); 57 | let mods = []; 58 | if (typeof keyOrFunc === 'string') { 59 | mods = R.pluck(keyOrFunc, data); 60 | } else { 61 | mods = R.chain(keyOrFunc, data); 62 | } 63 | moduleCodes.push(...mods); 64 | } catch (error) { 65 | log.debug(`${jsonPath} file not present, continuing with parsing.`); 66 | } 67 | } 68 | 69 | await populateModuleCodes(bulletinModulesPath, 'ModuleCode'); 70 | await populateModuleCodes(corsPath, mod => mod.ModuleCode.split(' / ')); 71 | await populateModuleCodes(examTimetablePath, 'ModuleCode'); 72 | await populateModuleCodes(moduleTimetableDeltaPath, 'ModuleCode'); 73 | 74 | moduleCodes = R.uniq(moduleCodes); 75 | subLog.info(`found ${moduleCodes.length} modules`); 76 | 77 | async function processModule(moduleCode) { 78 | const query = querystring.stringify({ 79 | APIKey: thisConfig.ivleApi.key, 80 | AcadYear: `${year}/${year + 1}`, 81 | IncludeAllInfo: true, 82 | ModuleCode: moduleCode, 83 | Semester: `Semester ${semester}`, 84 | AuthToken: thisConfig.ivleApi.token, 85 | }); 86 | const url = `${thisConfig.ivleApi.baseUrl}Modules_Search?${query}`; 87 | 88 | let results = []; 89 | try { 90 | const fileData = await gotCached(url, thisConfig); 91 | results = JSON.parse(fileData).Results; 92 | } catch (err) { 93 | log.error(moduleCode); 94 | } 95 | 96 | const modules = []; 97 | results.forEach((result) => { 98 | if (result.CourseCode === moduleCode) { 99 | const module = R.omit(['CourseCode'], result); 100 | module.ModuleCode = moduleCode; 101 | modules.push(module); 102 | } 103 | }); 104 | return modules; 105 | } 106 | const ivleModules = await Promise.map(moduleCodes, processModule, 107 | { concurrency: thisConfig.concurrency }); 108 | subLog.info(`parsed ${ivleModules.length} bidding stats`); 109 | 110 | const pathToWrite = path.join( 111 | thisConfig.destFolder, 112 | `${year}-${year + 1}`, 113 | `${semester}`, 114 | thisConfig.destFileName, 115 | ); 116 | subLog.info(`saving to ${pathToWrite}`); 117 | await fs.outputJson(pathToWrite, ivleModules, { spaces: thisConfig.jsonSpace }); 118 | } 119 | 120 | export default ivle; 121 | -------------------------------------------------------------------------------- /gulp-tasks/remote/moduleTimetableDelta.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import R from 'ramda'; 5 | import moment from 'moment'; 6 | import querystring from 'querystring'; 7 | import gotCached from '../utils/gotCached'; 8 | 9 | /** 10 | * Outputs module timetable delta data for one semester. 11 | * Module timetable delta are all lesson changes made 12 | * since the LastModified field. 13 | * By default outputs to: 14 | * - moduleTimetableDeltaRaw.json 15 | */ 16 | 17 | const log = bunyan.createLogger({ name: 'moduleTimetableDelta' }); 18 | 19 | async function moduleTimetableDelta(config) { 20 | let moduleTimetableDeltas = []; 21 | let secondsSinceLastModified = 365 * 24 * 60 * 60; // retrieve changes at most a year back 22 | 23 | const destPath = path.join(config.destFolder, config.destFileName); 24 | try { 25 | moduleTimetableDeltas = await fs.readJson(destPath); 26 | const moduleLastModified = parseInt(R.last(moduleTimetableDeltas).LastModified.substr(6), 10); 27 | secondsSinceLastModified = Math.floor((Date.now() - moduleLastModified) / 1000) - 1; 28 | } catch (error) { 29 | log.warn(`Failed to read ${destPath}, proceeding with empty array`); 30 | } 31 | const readableAgo = moment.duration(secondsSinceLastModified, 'seconds').humanize(); 32 | log.info(`retrieving changes to modules since ${readableAgo} ago.`); 33 | 34 | const query = querystring.stringify({ 35 | APIKey: config.ivleApi.key, 36 | LastModified: secondsSinceLastModified, 37 | }); 38 | const url = `${config.ivleApi.baseUrl}Delta_ModuleTimeTable?${query}`; 39 | const delta = await gotCached(url, config); 40 | const deltasSinceLastModified = JSON.parse(delta); 41 | 42 | // If it encounters an exception, IVLE API does not seem to indicate 43 | // an error via HTTP status codes but still returns data, 44 | // with a stack trace as the value for ModuleCode and 45 | // default values for the rest of the fields. 46 | const isException = R.whereEq({ LastModified: '/Date(-62135596800000)/' }); 47 | const elementsWithException = R.filter(isException, deltasSinceLastModified); 48 | if (elementsWithException.length) { 49 | throw new Error(`Encounted exceptions with IVLE API: ${elementsWithException}`); 50 | } 51 | moduleTimetableDeltas = moduleTimetableDeltas.concat(deltasSinceLastModified); 52 | log.info(`parsed ${moduleTimetableDeltas.length} module timetable delta`); 53 | 54 | function write(pathToWrite, data) { 55 | log.info(`saving to ${pathToWrite}`); 56 | return fs.outputJson(pathToWrite, data, { spaces: config.jsonSpace }); 57 | } 58 | 59 | write(destPath, moduleTimetableDeltas); 60 | 61 | const groupByAcadYear = R.groupBy(R.prop('AcadYear')); 62 | const groupBySemester = R.groupBy(R.prop('Semester')); 63 | const toWrite = []; 64 | Object.entries(groupByAcadYear(moduleTimetableDeltas)).forEach(([academicYear, deltaForAY]) => { 65 | Object.entries(groupBySemester(deltaForAY)).forEach(([semester, deltaForSem]) => { 66 | const pathToWrite = path.join( 67 | config.destFolder, 68 | academicYear.replace('/', '-'), 69 | semester, 70 | config.destFileName, 71 | ); 72 | toWrite.push(write(pathToWrite, deltaForSem)); 73 | }); 74 | }); 75 | await Promise.all(toWrite); 76 | } 77 | 78 | export default moduleTimetableDelta; 79 | -------------------------------------------------------------------------------- /gulp-tasks/remote/venues.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import bunyan from 'bunyan'; 4 | import querystring from 'querystring'; 5 | import gotCached from '../utils/gotCached'; 6 | 7 | /** 8 | * Outputs venue data for the school for one acad year. 9 | * By default outputs to: 10 | * - venuesRaw.json 11 | */ 12 | 13 | const NUS_API_URL = 'http://nuslivinglab.nus.edu.sg/api_dev/api/'; 14 | 15 | const log = bunyan.createLogger({ name: 'venues' }); 16 | 17 | async function venues(config) { 18 | const query = querystring.stringify({ 19 | name: '', 20 | output: 'json', 21 | }); 22 | const url = `${NUS_API_URL}Dept?${query}`; 23 | const locations = JSON.parse(await gotCached(url, config)); 24 | log.info(`parsed ${locations.length} venues`); 25 | 26 | const pathToWrite = path.join(config.destFolder, config.destFileName); 27 | log.info(`saving to ${pathToWrite}`); 28 | await fs.outputJson(pathToWrite, locations, { spaces: config.jsonSpace }); 29 | } 30 | 31 | export default venues; 32 | -------------------------------------------------------------------------------- /gulp-tasks/utils/__snapshots__/timify.test.js.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`getAllDays should get array of only school days 1`] = ` 4 | Array [ 5 | "Monday", 6 | "Tuesday", 7 | "Wednesday", 8 | "Thursday", 9 | "Friday", 10 | "Saturday", 11 | "Sunday", 12 | ] 13 | `; 14 | 15 | exports[`getSchoolDays should get array of only school days 1`] = ` 16 | Array [ 17 | "Monday", 18 | "Tuesday", 19 | "Wednesday", 20 | "Thursday", 21 | "Friday", 22 | "Saturday", 23 | ] 24 | `; 25 | 26 | exports[`getWeekdays should get array of only weekdays 1`] = ` 27 | Array [ 28 | "Monday", 29 | "Tuesday", 30 | "Wednesday", 31 | "Thursday", 32 | "Friday", 33 | ] 34 | `; 35 | -------------------------------------------------------------------------------- /gulp-tasks/utils/gotCached.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import url from 'url'; 4 | import got from 'got'; 5 | import parse5 from 'parse5'; 6 | import isBinaryPath from 'is-binary-path'; 7 | import bunyan from 'bunyan'; 8 | 9 | const log = bunyan.createLogger({ name: 'gotCached' }); 10 | 11 | /** 12 | * Converts URL to equivalent valid filename. 13 | */ 14 | function getCachePath(urlStr, cachePath) { 15 | const fileUrl = url.parse(urlStr); 16 | const pathAndHash = fileUrl.path + (fileUrl.hash ? fileUrl.hash : ''); 17 | const hostname = encodeURIComponent(fileUrl.hostname); 18 | const restOfPath = encodeURIComponent(pathAndHash); 19 | return path.join(cachePath, hostname, restOfPath); 20 | } 21 | 22 | /** 23 | * Gets the time the file was last modified if it exists, null otherwise. 24 | */ 25 | async function getFileModifiedTime(cachedPath, urlStr) { 26 | try { 27 | const stats = await fs.stat(cachedPath); 28 | if (stats.isFile()) { 29 | return stats.mtime; 30 | } 31 | log.warn(`${cachedPath} is not a file`); 32 | } catch (err) { 33 | log.info(`no cached file for ${urlStr}`); 34 | } 35 | return null; 36 | } 37 | 38 | async function gotCached(urlStr, config) { 39 | const cachedPath = getCachePath(urlStr, config.cachePath); 40 | function returnCached() { 41 | log.info(`returning cached file for ${urlStr}`); 42 | return fs.readFile(cachedPath); 43 | } 44 | 45 | const modifiedTime = await getFileModifiedTime(cachedPath, urlStr); 46 | const maxCacheAge = config.maxCacheAge; 47 | const isCachedFileValid = modifiedTime && (modifiedTime > Date.now() - (maxCacheAge * 1000)); 48 | if (maxCacheAge === -1 || isCachedFileValid) { 49 | return returnCached(); 50 | } 51 | 52 | const options = { 53 | url: urlStr, 54 | // returns body as a buffer instead of string if its a binary file 55 | encoding: isBinaryPath(urlStr) ? null : 'utf-8', 56 | }; 57 | if (modifiedTime) { 58 | options.headers = config.headers || {}; 59 | const modifedTimeString = (new Date(modifiedTime)).toUTCString(); 60 | options.headers['if-modified-since'] = modifedTimeString; 61 | } 62 | 63 | try { 64 | const response = await got(urlStr, options); 65 | let body = response.body; 66 | if (response.headers['content-type'] === 'text/html') { 67 | // Serializes the parsed document 68 | const doc = parse5.parse(body); 69 | body = parse5.serialize(doc); 70 | } 71 | await fs.outputFile(cachedPath, body); 72 | return body; 73 | } catch (error) { 74 | if (error.statusCode === 304) { 75 | return returnCached(); 76 | } 77 | if (error.statusCode) { 78 | throw new Error(`got http ${error.statusCode} while fetching ${urlStr}`); 79 | } 80 | throw error; 81 | } 82 | } 83 | 84 | export default gotCached; 85 | -------------------------------------------------------------------------------- /gulp-tasks/utils/iterateSems.js: -------------------------------------------------------------------------------- 1 | import R from 'ramda'; 2 | 3 | /** 4 | * Generates an array of configuration for gulp tasks use. 5 | */ 6 | export default function iterateSems(obj) { 7 | const yearStart = obj.from; 8 | const yearEnd = obj.to; 9 | 10 | const semesters = obj.semesters; 11 | 12 | const years = R.range(yearStart, yearEnd); 13 | const config = obj.config; 14 | 15 | // eslint-disable-next-line 16 | const injectConfig = R.map(([year, semester]) => { 17 | return { 18 | ...config, 19 | year, 20 | semester, 21 | }; 22 | }); 23 | 24 | const allSems = R.xprod(years, semesters); 25 | return injectConfig(allSems); 26 | } 27 | -------------------------------------------------------------------------------- /gulp-tasks/utils/mergeModuleFields.js: -------------------------------------------------------------------------------- 1 | import R from 'ramda'; 2 | import { diffWords, diffJson } from 'diff'; 3 | import prune from 'underscore.string/prune'; 4 | 5 | // values that are recognised as null 6 | const NULL_REGEX = /^(^$|--|n[/.]?a\.?|nil|none\.?|null)$/i; 7 | 8 | // fields that will cause problems if differences in values arises 9 | const CRITICAL_FIELDS = [ 10 | 'Department', 11 | 'CrossModule', 12 | 'ModuleCredit', 13 | 'ExamDate', 14 | 'ExamOpenBook', 15 | 'ExamDuration', 16 | 'ExamVenue', 17 | ]; 18 | 19 | // arbitrary char length to truncate for module description 20 | const PRUNE_LIMIT = 100; 21 | 22 | /** 23 | * Merges modules' by their fields, more critical fields are logged as warnings, 24 | * whereas less critical fields are logged as info. 25 | * Curried function to be easily reused. 26 | * @param {bunyan log} log - The log to be used for output. 27 | * @param {string} moduleCode - The moduleCode to be used for logging. 28 | * @param {string} module - The module to be merged. 29 | * @param {string} anotherModule - The module to be merged, whose field will be used in case of conflict. 30 | * @returns {Object} output - The merged module. 31 | */ 32 | function mergeModuleFields(log, moduleCode, thisModule, anotherModule) { 33 | const differentModuleError = new Error('Different modules cannot be merged.'); 34 | if (thisModule.ModuleCode && thisModule.ModuleCode !== moduleCode) { 35 | throw differentModuleError; 36 | } 37 | if (anotherModule.ModuleCode && anotherModule.ModuleCode !== moduleCode) { 38 | throw differentModuleError; 39 | } 40 | return R.mergeWithKey((key, x, y) => { 41 | // return whichever side that has data 42 | const xIsNullData = NULL_REGEX.test(x); 43 | const yIsNullData = NULL_REGEX.test(y); 44 | if (xIsNullData && yIsNullData) { 45 | return ''; 46 | } else if (yIsNullData) { 47 | return x; 48 | } else if (xIsNullData) { 49 | return y; 50 | } 51 | if (x === y) { 52 | return y; 53 | } 54 | // diff and return whichever side that has strictly more data 55 | const diffFunc = typeof x === 'string' ? diffWords : diffJson; 56 | const diffs = diffFunc(x, y); 57 | if (diffs.filter(diff => diff.removed).length === 0) { 58 | return y; 59 | } else if (diffs.filter(diff => diff.added).length === 0) { 60 | return x; 61 | } 62 | const level = CRITICAL_FIELDS.includes(key) ? 'warn' : 'info'; 63 | const strX = key === 'ModuleDescription' ? prune(x, PRUNE_LIMIT) : x; 64 | const strY = key === 'ModuleDescription' ? prune(y, PRUNE_LIMIT) : y; 65 | log[level](`module ${moduleCode}'s ${key} is not the same, got:\n1) '${strX}'\n2) '${strY}'`); 66 | return y; 67 | }, thisModule, anotherModule); 68 | } 69 | 70 | export default R.curry(mergeModuleFields); 71 | -------------------------------------------------------------------------------- /gulp-tasks/utils/mergeModuleFields.test.js: -------------------------------------------------------------------------------- 1 | import mergeModuleFields from './mergeModuleFields'; 2 | 3 | describe('mergeModuleFields', () => { 4 | const mockLog = { 5 | info: jest.fn(), 6 | warn: jest.fn(), 7 | }; 8 | const testModuleCode = 'test'; 9 | const testMergeModule = mergeModuleFields(mockLog, testModuleCode); 10 | 11 | beforeEach(() => { 12 | Object.values(mockLog).forEach((mock) => { 13 | mock.mockReset(); 14 | }); 15 | }); 16 | 17 | it('throws when module codes are incorrect', () => { 18 | expect(() => testMergeModule( 19 | { ModuleCode: testModuleCode }, 20 | { ModuleCode: 'test1' }, 21 | )).toThrow(); 22 | expect(() => testMergeModule( 23 | { ModuleCode: 'test1' }, 24 | { ModuleCode: testModuleCode }, 25 | )).toThrow(); 26 | }); 27 | 28 | it('does not throws when module codes are the correct', () => { 29 | expect(() => testMergeModule( 30 | { ModuleCode: testModuleCode }, 31 | { ModuleCode: testModuleCode }, 32 | )).not.toThrow(); 33 | }); 34 | 35 | it('merges whichever side that has data', () => { 36 | const testModule = { Data: 'testData' }; 37 | expect(testMergeModule( 38 | testModule, 39 | { Data: '' }, 40 | )).toEqual(testModule); 41 | expect(testMergeModule( 42 | { Data: '' }, 43 | testModule, 44 | )).toEqual(testModule); 45 | }); 46 | 47 | it('removes null data', () => { 48 | const testModules = [ 49 | { Data: 'nil' }, 50 | { Data: 'n.a.' }, 51 | { Data: 'none' }, 52 | { Data: 'null' }, 53 | ]; 54 | const relevantModule = { Data: 'relevant' }; 55 | testModules.forEach((mod) => { 56 | expect(testMergeModule( 57 | mod, 58 | relevantModule, 59 | )).toEqual(relevantModule); 60 | expect(testMergeModule( 61 | relevantModule, 62 | mod, 63 | )).toEqual(relevantModule); 64 | }); 65 | }); 66 | 67 | it('merges whichever side that has strictly more words', () => { 68 | const testModule = { Data: 'more data' }; 69 | const testModule1 = { Data: 'data' }; 70 | expect(testMergeModule( 71 | testModule, 72 | testModule1, 73 | )).toEqual(testModule); 74 | expect(testMergeModule( 75 | testModule1, 76 | testModule, 77 | )).toEqual(testModule); 78 | }); 79 | 80 | it('merges whichever side that has a larger array', () => { 81 | const testModule = { Data: ['more', 'data'] }; 82 | const testModule1 = { Data: ['data'] }; 83 | expect(testMergeModule( 84 | testModule, 85 | testModule1, 86 | )).toEqual(testModule); 87 | expect(testMergeModule( 88 | testModule1, 89 | testModule, 90 | )).toEqual(testModule); 91 | }); 92 | 93 | it('merges whichever side that has larger object', () => { 94 | const testModule = { Data: { field1: 'data', field2: 'more data' } }; 95 | const testModule1 = { Data: { field1: 'data' } }; 96 | expect(testMergeModule( 97 | testModule, 98 | testModule1, 99 | )).toEqual(testModule); 100 | expect(testMergeModule( 101 | testModule1, 102 | testModule, 103 | )).toEqual(testModule); 104 | }); 105 | 106 | it('logs the difference when unable to differentiate [info level]', () => { 107 | const testModule = { Data: 'this has data' }; 108 | const testModule1 = { Data: 'also has data, but different' }; 109 | expect(testMergeModule( 110 | testModule, 111 | testModule1, 112 | )).toEqual(testModule1); 113 | expect(mockLog.info).toHaveBeenCalled(); 114 | }); 115 | 116 | it('logs the difference when unable to differentiate [warn level]', () => { 117 | const testModule = { ExamDate: 'this has data' }; 118 | const testModule1 = { ExamDate: 'also has data, but different' }; 119 | expect(testMergeModule( 120 | testModule, 121 | testModule1, 122 | )).toEqual(testModule1); 123 | expect(mockLog.warn).toHaveBeenCalled(); 124 | }); 125 | }); 126 | -------------------------------------------------------------------------------- /gulp-tasks/utils/pdf.js: -------------------------------------------------------------------------------- 1 | import pdfjs from 'pdfjs-dist'; 2 | import R from 'ramda'; 3 | 4 | async function getPagesFromPdf(fileData) { 5 | const file = new Uint8Array(fileData); 6 | const pdf = await pdfjs.getDocument(file); 7 | const pagesRange = R.range(1, pdf.numPages + 1); 8 | const pages = Promise.all(pagesRange.map(num => pdf.getPage(num))); 9 | return pages; 10 | } 11 | 12 | async function getTextFromPages(pages) { 13 | function joinStrings(arrayOfObj) { 14 | let yPos = arrayOfObj[0].width; 15 | return arrayOfObj.reduce((str, obj) => { 16 | const previousYPos = yPos; 17 | yPos = obj.transform[4] - obj.width; 18 | 19 | // don't add space if they're too close 20 | const separator = (yPos - previousYPos <= 0) ? '' : ' '; 21 | return str + separator + obj.str; 22 | }, ''); 23 | } 24 | 25 | const getTextFromPage = R.pipeP( 26 | page => page.getTextContent(), 27 | R.prop('items'), 28 | R.groupWith((a, b) => a.transform[5] === b.transform[5]), 29 | R.map(joinStrings), 30 | ); 31 | 32 | return Promise.all(R.map(getTextFromPage, pages)); 33 | } 34 | 35 | const getPagesTextFromPdf = R.pipeP(getPagesFromPdf, getTextFromPages); 36 | 37 | export { getPagesFromPdf, getPagesTextFromPdf }; 38 | -------------------------------------------------------------------------------- /gulp-tasks/utils/pdf.test.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs-extra'; 2 | import { getPagesFromPdf, getPagesTextFromPdf } from './pdf'; 3 | 4 | jest.unmock('fs-extra'); 5 | 6 | /** 7 | * More files should be included as time passes to maintain compatability 8 | * with previous years' pdfs. 9 | */ 10 | const fileData = fs.readFileSync('__mocks__/fixtures/test1.pdf'); 11 | 12 | describe('pdf', () => { 13 | it('getPagesFromPdf gets pages from pdf', async () => { 14 | const pages = await getPagesFromPdf(fileData); 15 | expect(pages.length).toBe(1); 16 | }); 17 | 18 | it('getPagesTextFromPdf gets pages\' text from pdf', async () => { 19 | const textPages = await getPagesTextFromPdf(fileData); 20 | expect(textPages.length).toBe(1); 21 | expect(textPages[0].length).toBe(77); 22 | }); 23 | }); 24 | -------------------------------------------------------------------------------- /gulp-tasks/utils/sortByKey.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Reorders the object by the sorting order of the keys. 3 | */ 4 | export default function sortByKey(object) { 5 | const sortedObject = {}; 6 | Object.keys(object).sort().forEach((key) => { 7 | sortedObject[key] = object[key]; 8 | }); 9 | return sortedObject; 10 | } 11 | -------------------------------------------------------------------------------- /gulp-tasks/utils/sortByKey.test.js: -------------------------------------------------------------------------------- 1 | import sortByKey from './sortByKey'; 2 | 3 | describe('sortByKey', () => { 4 | it('sorts object in ascending order', () => { 5 | expect(sortByKey({ b: 1, a: 1 })).toEqual({ a: 1, b: 1 }); 6 | }); 7 | }); 8 | -------------------------------------------------------------------------------- /gulp-tasks/utils/timify.js: -------------------------------------------------------------------------------- 1 | import R from 'ramda'; 2 | 3 | const DAYS = [ 4 | 'Monday', 5 | 'Tuesday', 6 | 'Wednesday', 7 | 'Thursday', 8 | 'Friday', 9 | 'Saturday', 10 | 'Sunday', 11 | ]; 12 | 13 | /** 14 | * Converts a 24-hour format time string to an index. 15 | * @param {string} time - 24-hour format time to convert to index 16 | * @example 0000 -> 0, 0030 -> 1, 0100 -> 2, ... 17 | * @returns {number} index - integer representing array index 18 | */ 19 | export function convertTimeToIndex(time) { 20 | const hour = parseInt(time.substring(0, 2), 10); 21 | const minute = time.substring(2); 22 | /* eslint-disable quote-props */ 23 | return (hour * 2) + { '00': 0, '30': 1, '59': 2 }[minute]; 24 | } 25 | 26 | /** 27 | * Reverse of convertTimeToIndex - converts an index to 24-hour format time string. 28 | * @param {number} index - index to convert to 24-hour format time 29 | * @example 0 -> 0000, 1 -> 0030, 2 -> 0100, ... 30 | * @returns {string} time - 24-hour format time 31 | */ 32 | export function convertIndexToTime(index) { 33 | const hour = parseInt(index / 2, 10); 34 | const minute = (index % 2) === 0 ? '00' : '30'; 35 | return (hour < 10 ? `0${hour}` : hour.toString()) + minute; 36 | } 37 | 38 | /** 39 | * Returns a range of 24-hour format time string, each 30 minutes apart. 40 | * @param {string} startTime - 24-hour format time to start from (inclusive) 41 | * @param {string} endTime - 24-hour format time to end (exclusive) 42 | * @example getTimeRange('0900', '2400') -> ['0900', '0930', ..., '2330'] 43 | * @returns {Array} listOfTime - 24-hour format time each 30 minutes apart. 44 | */ 45 | export function getTimeRange(startTime, endTime) { 46 | const timeRange = R.range( 47 | convertTimeToIndex(startTime), 48 | convertTimeToIndex(endTime), 49 | ); 50 | return timeRange.map(convertIndexToTime); 51 | } 52 | 53 | export function getAllDays() { 54 | return DAYS.slice(); 55 | } 56 | 57 | /** 58 | * List of all days in a school days, 59 | * currently means Sunday is not a school day. 60 | */ 61 | export function getSchoolDays() { 62 | return DAYS.slice(0, -1); 63 | } 64 | 65 | export function getWeekdays() { 66 | return DAYS.slice(0, -2); 67 | } 68 | 69 | export default { 70 | convertTimeToIndex, 71 | convertIndexToTime, 72 | getTimeRange, 73 | getAllDays, 74 | getSchoolDays, 75 | getWeekdays, 76 | }; 77 | -------------------------------------------------------------------------------- /gulp-tasks/utils/timify.test.js: -------------------------------------------------------------------------------- 1 | import { 2 | convertTimeToIndex, 3 | convertIndexToTime, 4 | getTimeRange, 5 | getAllDays, 6 | getSchoolDays, 7 | getWeekdays, 8 | } from './timify'; 9 | 10 | describe('convertTimeToIndex', () => { 11 | it('should convert time string to index', () => { 12 | for (let hour = 0; hour < 24; hour += 1) { 13 | const doubleDigitTime = `0${hour % 24}`.slice(-2); 14 | expect(convertTimeToIndex(`${doubleDigitTime}00`)).toBe(hour * 2); 15 | expect(convertTimeToIndex(`${doubleDigitTime}30`)).toBe((hour * 2) + 1); 16 | } 17 | }); 18 | 19 | it('should convert non-half hour string to index', () => { 20 | const actual = convertTimeToIndex('2359'); 21 | const expected = 48; 22 | expect(actual).toBe(expected); 23 | }); 24 | }); 25 | 26 | describe('convertIndexToTime', () => { 27 | it('should convert time index to string', () => { 28 | for (let hour = 0; hour < 24; hour += 1) { 29 | const doubleDigitTime = `0${hour % 24}`.slice(-2); 30 | expect(convertIndexToTime(hour * 2)).toBe(`${doubleDigitTime}00`); 31 | expect(convertIndexToTime((hour * 2) + 1)).toBe(`${doubleDigitTime}30`); 32 | } 33 | }); 34 | }); 35 | 36 | describe('getTimeRange', () => { 37 | it('should convert time range to array in intervals of 30', () => { 38 | expect(getTimeRange('0900', '1000')).toEqual(['0900', '0930']); 39 | }); 40 | 41 | it('should convert time range with strings after 1200', () => { 42 | expect(getTimeRange('1300', '1330')).toEqual(['1300']); 43 | }); 44 | }); 45 | 46 | describe('getAllDays', () => { 47 | it('should get array of only school days', () => { 48 | const schoolDays = getAllDays(); 49 | expect(schoolDays).toHaveLength(7); 50 | expect(schoolDays).toMatchSnapshot(); 51 | }); 52 | }); 53 | 54 | describe('getSchoolDays', () => { 55 | it('should get array of only school days', () => { 56 | const schoolDays = getSchoolDays(); 57 | expect(schoolDays).toHaveLength(6); 58 | expect(schoolDays).toMatchSnapshot(); 59 | }); 60 | }); 61 | 62 | describe('getWeekdays', () => { 63 | it('should get array of only weekdays', () => { 64 | expect(getWeekdays()).toMatchSnapshot(); 65 | }); 66 | }); 67 | -------------------------------------------------------------------------------- /gulp-tasks/utils/titleize.js: -------------------------------------------------------------------------------- 1 | /** 2 | * NUS specific title case function that accounts for school names, etc. 3 | */ 4 | export default function titleize(str) { 5 | return str.toLowerCase() 6 | .replace(/(?:^|\s\(?|-|\/)\S/g, string => string.toUpperCase()) // http://stackoverflow.com/a/7592235 7 | .replace(/\bIp\b/, 'IP') 8 | .replace(/\bMit\b/, 'MIT') 9 | .replace(/^Na$/, 'NA') 10 | .replace(/\bNus\b/, 'NUS'); 11 | } 12 | -------------------------------------------------------------------------------- /gulpfile.babel.js: -------------------------------------------------------------------------------- 1 | import 'babel-polyfill'; 2 | import gulp from 'gulp'; 3 | import Promise from 'bluebird'; 4 | import bunyan from 'bunyan'; 5 | import moment from 'moment'; 6 | import nusmoderator from 'nusmoderator'; 7 | import R from 'ramda'; 8 | import config from './config'; 9 | import tasks from './gulp-tasks'; 10 | import iterateSems from './gulp-tasks/utils/iterateSems'; 11 | import { REGULAR_SEMESTER, SPECIAL_SEMESTER } from './gulp-tasks/remote/cors'; 12 | 13 | const monthsAhead = moment().add(2, 'months'); 14 | const acadObj = nusmoderator.academicCalendar.getAcadYear(monthsAhead.toDate()); 15 | const schoolSem = nusmoderator.academicCalendar.getAcadSem(monthsAhead.week()); 16 | const schoolYear = 2000 + parseInt(acadObj.year.substr(0, 2), 10); 17 | 18 | const yearStart = config.defaults.year || schoolYear; 19 | const yearEnd = yearStart + 1; 20 | 21 | const log = bunyan.createLogger({ 22 | name: 'gulpfile', 23 | level: process.env.NODE_ENV === 'development' ? 'debug' : 'info', 24 | }); 25 | log.info(`gulp process started: parsing AY ${yearStart}/${yearEnd}`); 26 | 27 | log.debug(`cache path is ${config.defaults.cachePath}`); 28 | 29 | gulp.task('bulletinModules', () => { 30 | const subtasks = iterateSems({ 31 | from: yearStart, 32 | to: yearEnd, 33 | semesters: [0, 1, 2, 3, 4], 34 | config: config.bulletinModules, 35 | }); 36 | 37 | const bulletinModules = R.map(tasks.bulletinModules, subtasks); 38 | return Promise.all(bulletinModules); 39 | }); 40 | 41 | gulp.task('cors', () => { 42 | const subtasks = iterateSems({ 43 | from: yearStart, 44 | to: yearEnd, 45 | semesters: [REGULAR_SEMESTER, SPECIAL_SEMESTER], 46 | config: config.cors, 47 | }); 48 | const cors = R.map(tasks.cors, subtasks); 49 | return Promise.all(cors); 50 | }); 51 | 52 | gulp.task('corsBiddingStats', () => { 53 | const subtasks = iterateSems({ 54 | from: yearStart, 55 | to: yearEnd, 56 | semesters: [1, 2], 57 | config: config.corsBiddingStats, 58 | }); 59 | const corsBiddingStats = R.map(tasks.corsBiddingStats, subtasks); 60 | return Promise.all(corsBiddingStats); 61 | }); 62 | 63 | gulp.task('examTimetable', () => { 64 | const subtasks = iterateSems({ 65 | from: yearStart, 66 | to: yearEnd, 67 | semesters: [1, 2, 3, 4], 68 | config: config.examTimetable, 69 | }); 70 | 71 | const examTimetables = R.map(tasks.examTimetable, subtasks); 72 | return Promise.all(examTimetables); 73 | }); 74 | 75 | gulp.task('ivle', () => { 76 | const subtasks = iterateSems({ 77 | from: yearStart, 78 | to: yearEnd, 79 | semesters: [1, 2, 3, 4], 80 | config, 81 | }); 82 | const ivle = R.map(tasks.ivle, subtasks); 83 | return Promise.all(ivle); 84 | }); 85 | 86 | gulp.task('moduleTimetableDelta', () => tasks.moduleTimetableDelta(config.moduleTimetableDelta)); 87 | 88 | gulp.task('venues', () => tasks.venues(config.venues)); 89 | 90 | gulp.task('remote', gulp.parallel( 91 | 'bulletinModules', 92 | 'cors', 93 | 'corsBiddingStats', 94 | 'examTimetable', 95 | 'venues', 96 | 'moduleTimetableDelta', 97 | )); 98 | 99 | gulp.task('mergeCorsBiddingStats', () => { 100 | const toMerge = iterateSems({ 101 | from: yearStart - 7, // merge at most 7 years of bidding stats 102 | to: yearEnd, 103 | semesters: [1, 2], 104 | config: config.corsBiddingStats, 105 | }); 106 | return tasks.mergeCorsBiddingStats(toMerge); 107 | }); 108 | 109 | gulp.task('consolidateForSem', () => { 110 | const subtasks = iterateSems({ 111 | from: yearStart, 112 | to: yearEnd, 113 | semesters: [1, 2, 3, 4], 114 | config, 115 | }); 116 | return Promise.each(subtasks, tasks.consolidateForSem); 117 | }); 118 | 119 | gulp.task('splitForSem', () => { 120 | const subtasks = iterateSems({ 121 | from: yearStart, 122 | to: yearEnd, 123 | semesters: [1, 2, 3, 4], 124 | config, 125 | }); 126 | return Promise.each(subtasks, tasks.splitForSem); 127 | }); 128 | 129 | gulp.task('consolidateForYear', () => { 130 | const subtasks = iterateSems({ 131 | from: yearStart, 132 | to: yearEnd, 133 | semesters: [schoolSem], 134 | config: config.consolidate, 135 | }); 136 | return Promise.each(subtasks, tasks.consolidateForYear); 137 | }); 138 | 139 | gulp.task('splitForYear', () => { 140 | const subtasks = iterateSems({ 141 | from: yearStart, 142 | to: yearEnd, 143 | semesters: [schoolSem], 144 | config, 145 | }); 146 | return Promise.each(subtasks, tasks.splitForYear); 147 | }); 148 | 149 | gulp.task('local', gulp.series( 150 | 'mergeCorsBiddingStats', 151 | 'consolidateForSem', 152 | 'splitForSem', 153 | 'consolidateForYear', 154 | 'splitForYear', 155 | )); 156 | 157 | gulp.task('default', gulp.series('remote', 'local')); 158 | 159 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | roots: [ 3 | '/__mocks__', 4 | '/gulp-tasks', 5 | '/src', 6 | ], 7 | testPathIgnorePatterns: [ 8 | '.eslintrc.js', 9 | ], 10 | // Node environment 11 | testEnvironment: 'node', 12 | collectCoverageFrom: ['**/*.{js,jsx}', '!**/node_modules/**', '!**/vendor/**'], 13 | // Only write lcov files in CIs 14 | coverageReporters: ['text'].concat(process.env.CI ? 'lcov' : []), 15 | }; 16 | -------------------------------------------------------------------------------- /jsonp.php: -------------------------------------------------------------------------------- 1 | { 2 | const schoolsTable = knex.schema.createTable('schools', (table) => { 3 | table.increments('id').notNullable().primary(); 4 | table.string('name').notNullable().unique(); 5 | table.string('abbreviation', 32); 6 | }); 7 | 8 | const departmentsTable = knex.schema.createTable('departments', (table) => { 9 | table.increments('id').notNullable().primary(); 10 | table 11 | .integer('school_id') 12 | .notNullable() 13 | .references('id') 14 | .inTable('schools') 15 | .onDelete('CASCADE') 16 | .onUpdate('CASCADE'); 17 | table.string('name').notNullable(); 18 | table.unique(['school_id', 'name']); 19 | }); 20 | 21 | const venuesTable = knex.schema.createTable('venues', (table) => { 22 | table.increments('id').notNullable().primary(); 23 | table 24 | .integer('school_id') 25 | .notNullable() 26 | .references('id') 27 | .inTable('schools') 28 | .onDelete('CASCADE') 29 | .onUpdate('CASCADE'); 30 | table.string('name').notNullable(); 31 | table.string('type'); 32 | table.string('owned_by'); 33 | table.unique(['school_id', 'name']); 34 | }); 35 | 36 | return Promise.all([schoolsTable, departmentsTable, venuesTable]); 37 | }; 38 | 39 | exports.down = (knex, Promise) => { 40 | const tables = ['schools', 'departments', 'venues']; 41 | return Promise.all( 42 | tables.map(table => 43 | knex.schema.dropTableIfExists(table).then(() => table), 44 | ), 45 | ).then((tbls) => { 46 | if (process.env.NODE_ENV !== 'test') { 47 | console.log(`tables ${tbls.join(', ')} was dropped`); 48 | } 49 | }); 50 | }; 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nusmods-api", 3 | "description": "NUSMods API", 4 | "version": "0.0.1", 5 | "homepage": "http://api.nusmods.com", 6 | "author": { 7 | "name": "NUSModifications", 8 | "email": "mods@nusmods.com" 9 | }, 10 | "scripts": { 11 | "dev": "cross-env NODE_ENV=development nodemon server --exec babel-node --watch server | bunyan -o short --color", 12 | "scrape": "cross-env NODE_ENV=production gulp --gulpfile gulpfile.compiled.js | bunyan -o short --color", 13 | "scrape:dev": "cross-env BABEL_ENV=development gulp local | bunyan -l debug -o short --color", 14 | "build:scraper": "cross-env BABEL_ENV=development webpack --config webpack.config.babel.js", 15 | "test": "jest --config jest.config.js --coverage", 16 | "test:watch": "jest --config jest.config.js --watch", 17 | "lint": "eslint . --ignore-path .gitignore", 18 | "lint:fix": "eslint . --ignore-path .gitignore --fix" 19 | }, 20 | "repository": { 21 | "type": "git", 22 | "url": "git://github.com/nusmodifications/nusmods-api.git" 23 | }, 24 | "licenses": [ 25 | { 26 | "type": "MIT", 27 | "url": "https://github.com/nusmodifications/nusmods-api/blob/master/LICENSE-MIT" 28 | } 29 | ], 30 | "private": true, 31 | "dependencies": { 32 | "axios": "^0.16.2", 33 | "babel-polyfill": "^6.16.0", 34 | "bluebird": "^3.4.7", 35 | "boom": "^5.1.0", 36 | "bunyan": "^1.8.5", 37 | "cheerio": "^0.22.0", 38 | "chevrotain": "^0.32.1", 39 | "cross-env": "^5.0.1", 40 | "diff": "^3.3.0", 41 | "dotenv": "^4.0.0", 42 | "fs-extra": "^4.0.0", 43 | "got": "^6.6.3", 44 | "graphql": "^0.9.4", 45 | "graphql-server-koa": "^0.7.2", 46 | "graphql-tools": "^0.11.0", 47 | "gulp": "gulpjs/gulp.git#4.0", 48 | "gulp-cli": "^1.2.2", 49 | "is-binary-path": "^2.0.0", 50 | "knex": "^0.13.0", 51 | "koa": "^2.2.0", 52 | "koa-bodyparser": "^4.2.0", 53 | "koa-bunyan-logger": "^2.0.0", 54 | "koa-compose": "^4.0.0", 55 | "koa-router": "^7.1.1", 56 | "lodash": "^4.17.4", 57 | "moment": "^2.17.1", 58 | "nusmoderator": "^2.0.0", 59 | "parse5": "^3.0.1", 60 | "pdfjs-dist": "^1.8.552", 61 | "ramda": "^0.24.1", 62 | "romanify": "^1.0.0", 63 | "sanitize-filename": "^1.6.1", 64 | "sqlite3": "^3.1.9", 65 | "underscore.string": "^3.3.4", 66 | "uuid": "^3.1.0" 67 | }, 68 | "devDependencies": { 69 | "babel-cli": "^6.16.0", 70 | "babel-core": "^6.25.0", 71 | "babel-eslint": "^7.1.1", 72 | "babel-jest": "^20.0.3", 73 | "babel-loader": "^7.1.1", 74 | "babel-plugin-transform-object-rest-spread": "^6.19.0", 75 | "babel-preset-bluebird": "^1.0.1", 76 | "babel-preset-env": "^1.6.0", 77 | "babel-preset-flow": "^6.23.0", 78 | "babili-webpack-plugin": "^0.1.2", 79 | "eslint": "^4.3.0", 80 | "eslint-config-airbnb-base": "^11.1.3", 81 | "eslint-import-resolver-node": "^0.3.0", 82 | "eslint-plugin-import": "^2.7.0", 83 | "jest": "^20.0.4", 84 | "mock-fs": "^4.2.0", 85 | "nock": "^9.0.14", 86 | "nodemon": "^1.11.0", 87 | "webpack": "^3.3.0", 88 | "webpack-node-externals": "^1.6.0" 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /seeds/schools.js: -------------------------------------------------------------------------------- 1 | exports.seed = knex => 2 | // Deletes ALL existing entries 3 | knex('schools').truncate().then(() => 4 | // Inserts seed entries 5 | knex('schools').insert([ 6 | { name: 'National Univerity of Singapore', abbreviation: 'NUS' }, 7 | { name: 'National Technological Univesity', abbreviation: 'NTU' }, 8 | { name: 'Singapore Management University', abbreviation: 'SMU' }, 9 | ]), 10 | ) 11 | ; 12 | -------------------------------------------------------------------------------- /src/db.js: -------------------------------------------------------------------------------- 1 | // @flow 2 | import 'dotenv/config'; 3 | import Knex from 'knex'; 4 | 5 | import knexConfig from '../knexfile'; 6 | 7 | // Create an appropriate knex instance 8 | const knex = Knex(knexConfig[process.env.NODE_ENV]); 9 | 10 | export default knex; 11 | -------------------------------------------------------------------------------- /src/graphql/__snapshots__/index.test.js.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`graphql should be not be null when modules are found 1`] = ` 4 | Object { 5 | "modules": Array [ 6 | Object { 7 | "code": "testCode", 8 | }, 9 | Object { 10 | "code": "anotherTestCode", 11 | }, 12 | ], 13 | } 14 | `; 15 | 16 | exports[`graphql should not be null when module is valid 1`] = ` 17 | Object { 18 | "module": Object { 19 | "code": "anotherTestCode", 20 | "credit": 4, 21 | "department": null, 22 | "description": null, 23 | "title": "anotherTestTitle", 24 | }, 25 | } 26 | `; 27 | -------------------------------------------------------------------------------- /src/graphql/index.js: -------------------------------------------------------------------------------- 1 | import { makeExecutableSchema } from 'graphql-tools'; 2 | import bunyan from 'bunyan'; 3 | import R from 'ramda'; 4 | 5 | import jsonData from './jsonData'; 6 | 7 | const log = bunyan.createLogger({ name: 'graphql' }); 8 | 9 | const Schema = ` 10 | # Describes a module for, may span different semesters 11 | type Module { 12 | code: String! 13 | title: String! 14 | department: String 15 | description: String 16 | credit: Float 17 | workload: String 18 | prerequisite: String 19 | corequisite: String 20 | corsBiddingStats: [CorsBiddingStats] 21 | # Refers to the history of the module throughout semesters 22 | history: [ModuleInfo]! 23 | } 24 | 25 | # Describes a particular module for a semester 26 | type ModuleInfo { 27 | semester: Int 28 | examDate: String 29 | examOpenBook: Boolean 30 | examDuration: String 31 | examVenue: String 32 | timetable: [Lesson] 33 | } 34 | 35 | # Bidding stats for Cors 36 | type CorsBiddingStats { 37 | quota: Int 38 | bidders: Int 39 | lowestBid: Int 40 | lowestSuccessfulBid: Int 41 | highestBid: Int 42 | faculty: String 43 | studentAcctType: String 44 | acadYear: String 45 | semester: Int 46 | round: String 47 | group: String 48 | } 49 | 50 | # A lesson conducted, may it be a lecture, laboratory or lecture 51 | type Lesson { 52 | classNo: String! 53 | lessonType: String! 54 | weekText: String! 55 | dayText: String! 56 | startTime: String! 57 | endTime: String! 58 | venue: String! 59 | } 60 | 61 | # the schema allows the following query: 62 | type Query { 63 | modules(acadYear: String!, first: Int, offset: Int): [Module]! 64 | module(acadYear: String!, code: String!): Module! 65 | } 66 | 67 | schema { 68 | query: Query 69 | } 70 | `; 71 | 72 | const Resolvers = { 73 | Query: { 74 | modules(root, { acadYear, first, offset }) { 75 | const yearData = jsonData[acadYear]; 76 | if (yearData == null) { 77 | return null; 78 | } 79 | const modules = Object.values(yearData); 80 | return modules.slice(offset, offset ? (offset + first) : first); 81 | }, 82 | module(root, { acadYear, code }) { 83 | return R.path([acadYear, code], jsonData); 84 | }, 85 | }, 86 | }; 87 | 88 | const subLog = log.child({ path: 'graphql' }); 89 | const logger = { 90 | log: e => subLog.error(e), 91 | }; 92 | 93 | const schema = makeExecutableSchema({ 94 | typeDefs: Schema, 95 | resolvers: Resolvers, 96 | logger, 97 | }); 98 | 99 | export default schema; 100 | -------------------------------------------------------------------------------- /src/graphql/index.test.js: -------------------------------------------------------------------------------- 1 | import { graphql } from 'graphql'; 2 | import schema from './index'; 3 | 4 | const gql = x => x.raw[0]; // identify function for template literals 5 | 6 | jest.mock('./jsonData', () => ({ 7 | '2016-2017': { 8 | CS1000: { 9 | code: 'testCode', 10 | title: 'testTitle', 11 | credit: 4.0, 12 | history: [], 13 | }, 14 | CS2100: { 15 | code: 'anotherTestCode', 16 | title: 'anotherTestTitle', 17 | credit: 4.0, 18 | history: [], 19 | }, 20 | }, 21 | })); 22 | 23 | describe('graphql', () => { 24 | it('should be null when modules are not found', async () => { 25 | const query = gql` 26 | query { 27 | modules(acadYear: "2017-2018") { 28 | code 29 | } 30 | } 31 | `; 32 | const { data } = await graphql(schema, query); 33 | 34 | expect(data).toBeNull(); 35 | }); 36 | 37 | it('should be not be null when modules are found', async () => { 38 | const query = gql` 39 | query { 40 | modules(acadYear: "2016-2017") { 41 | code 42 | } 43 | } 44 | `; 45 | const { data } = await graphql(schema, query); 46 | 47 | expect(data).not.toBeNull(); 48 | expect(data).toMatchSnapshot(); 49 | }); 50 | 51 | it('should return everything when first and offset are not specified', async () => { 52 | const query = gql` 53 | query { 54 | modules(acadYear: "2016-2017") { 55 | code 56 | } 57 | } 58 | `; 59 | const { data: { modules } } = await graphql(schema, query); 60 | 61 | expect(modules).toHaveLength(2); 62 | }); 63 | 64 | it('should return first n elements when specified', async () => { 65 | const query = gql` 66 | query { 67 | modules(acadYear: "2016-2017", first: 1) { 68 | code 69 | } 70 | } 71 | `; 72 | const { data: { modules } } = await graphql(schema, query); 73 | 74 | expect(modules).toHaveLength(1); 75 | }); 76 | 77 | it('should return offset n elements when specified', async () => { 78 | const query = gql` 79 | query { 80 | modules(acadYear: "2016-2017", offset: 2) { 81 | code 82 | } 83 | } 84 | `; 85 | const { data: { modules } } = await graphql(schema, query); 86 | 87 | expect(modules).toHaveLength(0); 88 | }); 89 | 90 | it('should return first n and offset n elements when specified', async () => { 91 | const query = gql` 92 | query { 93 | modules(acadYear: "2016-2017", first: 1, offset: 1) { 94 | code 95 | } 96 | } 97 | `; 98 | const { data: { modules } } = await graphql(schema, query); 99 | 100 | expect(modules).toHaveLength(1); 101 | expect(modules[0].code).toBe('anotherTestCode'); 102 | }); 103 | 104 | it('should be null when module is not found', async () => { 105 | const query = gql` 106 | query { 107 | module(acadYear: "2017-2018", code: "CS2100") { 108 | code 109 | } 110 | } 111 | `; 112 | const { data } = await graphql(schema, query); 113 | 114 | expect(data).toBeNull(); 115 | }); 116 | 117 | it('should not be null when module is valid', async () => { 118 | const query = gql` 119 | query { 120 | module(acadYear: "2016-2017", code: "CS2100") { 121 | code 122 | title 123 | credit 124 | department 125 | description 126 | } 127 | } 128 | `; 129 | const { data } = await graphql(schema, query); 130 | 131 | expect(data).not.toBeNull(); 132 | expect(data).toMatchSnapshot(); 133 | }); 134 | }); 135 | -------------------------------------------------------------------------------- /src/graphql/jsonData.js: -------------------------------------------------------------------------------- 1 | import R from 'ramda'; 2 | 3 | import { walkJsonDirSync } from '../util/walkDir'; 4 | import mapKeysDeep from '../util/mapKeysDeep'; 5 | import config from '../../config'; 6 | 7 | /** 8 | * Fetches data from the api folder, and exports it for consumption. 9 | */ 10 | const apiFolder = config.defaults.destFolder; 11 | const modulesFile = config.consolidate.destFileName; 12 | 13 | const removeModuleKeys = mapKeysDeep(key => key.replace('Module', '')); 14 | const camelizeAllKeys = mapKeysDeep(key => key.replace(/[A-Z]/, R.toLower)); 15 | const indexByModuleCode = R.map(R.indexBy(R.prop('code'))); 16 | const processData = R.pipe(removeModuleKeys, camelizeAllKeys, indexByModuleCode); 17 | 18 | const data = processData(walkJsonDirSync(apiFolder, modulesFile)); 19 | 20 | export default data; 21 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import Koa from 'koa'; 2 | 3 | import Router from 'koa-router'; 4 | import BodyParser from 'koa-bodyparser'; 5 | import { graphqlKoa, graphiqlKoa } from 'graphql-server-koa'; 6 | 7 | import Boom from 'boom'; 8 | import loggerMiddleware from 'koa-bunyan-logger'; 9 | import errorMiddleware from './middleware/error'; 10 | 11 | import log from './util/log'; 12 | import schema from './graphql'; 13 | 14 | const app = new Koa(); 15 | const router = new Router(); 16 | const bodyparser = new BodyParser(); 17 | 18 | // Register middleware 19 | app.use(bodyparser); 20 | app.use(loggerMiddleware(log)); 21 | app.use(loggerMiddleware.requestIdContext()); 22 | app.use(loggerMiddleware.requestLogger()); 23 | app.use(errorMiddleware()); 24 | 25 | // Registers routes 26 | router.post('/graphql', graphqlKoa({ schema })); 27 | router.get('/graphiql', graphiqlKoa({ endpointURL: '/graphql' })); 28 | 29 | app.use(router.routes()); 30 | app.use(router.allowedMethods({ 31 | throw: true, 32 | notImplemented: () => new Boom.notImplemented(), // eslint-disable-line new-cap 33 | methodNotAllowed: () => new Boom.methodNotAllowed(), // eslint-disable-line new-cap 34 | })); 35 | 36 | log.info('current environment: %s', process.env.NODE_ENV); 37 | log.info('server started at port: %d', process.env.PORT || 3000); 38 | app.listen(process.env.PORT || 3000); 39 | -------------------------------------------------------------------------------- /src/middleware/error.js: -------------------------------------------------------------------------------- 1 | import compose from 'koa-compose'; 2 | 3 | const handler = async (ctx, next) => { 4 | try { 5 | await next(); 6 | } catch (error) { 7 | ctx.log.error(error); 8 | 9 | if (error.isBoom) { 10 | ctx.body = error.output.payload; 11 | ctx.status = error.output.statusCode; 12 | 13 | return; 14 | } 15 | // TODO: Handle error that are not instance of `boom` 16 | 17 | throw error; 18 | } 19 | }; 20 | 21 | export default () => compose([ 22 | handler, 23 | ]); 24 | -------------------------------------------------------------------------------- /src/middleware/request.js: -------------------------------------------------------------------------------- 1 | import compose from 'koa-compose'; 2 | 3 | const logger = async (ctx, next) => { 4 | ctx.log.info(`request from ${ctx.request.ip} to ${ctx.path}`); 5 | await next(); 6 | }; 7 | 8 | export default () => compose([ 9 | logger, 10 | ]); 11 | -------------------------------------------------------------------------------- /src/scrapers/BaseTask.js: -------------------------------------------------------------------------------- 1 | // @flow 2 | import bunyan from 'bunyan'; 3 | import bluebird from 'bluebird'; 4 | import fs from 'fs-extra'; 5 | 6 | import db from '../db'; 7 | import http from './HttpService'; 8 | import config from '../../config'; 9 | 10 | /** 11 | * Base class for all scraping tasks, contains useful utilities 12 | * such as logging and writing of files. 13 | * 14 | * @class BaseTask 15 | */ 16 | export default class BaseTask { 17 | constructor() { 18 | this.log = bunyan.createLogger({ 19 | name: this.constructor.name, 20 | level: process.env.NODE_ENV === 'production' ? bunyan.INFO : bunyan.DEBUG, 21 | }); 22 | this.http = http; 23 | this.db = db; 24 | } 25 | 26 | getTransaction() { 27 | return bluebird.promisify(this.db.transaction); 28 | } 29 | 30 | /** 31 | * Simple write function to the disk. 32 | * 33 | * @param {string} pathToWrite absolute path to write to 34 | * @param {Object} data json object to write 35 | * @param {bunyan} [log=this.log] logger, defaults to this.log 36 | * @memberof BaseTask 37 | */ 38 | writeJson(pathToWrite: string, data: Object, log = this.log) { 39 | log.info(`saving to ${pathToWrite}`); 40 | fs.outputJson(pathToWrite, data, { spaces: config.jsonSpace }); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scrapers/BaseTask.test.js: -------------------------------------------------------------------------------- 1 | import bunyan from 'bunyan'; 2 | import fs from 'fs-extra'; 3 | 4 | import BaseTask from './BaseTask'; 5 | 6 | jest.mock('fs-extra'); 7 | jest.unmock('bunyan'); 8 | 9 | describe('BaseTask', () => { 10 | let base; 11 | beforeAll(() => { 12 | base = new BaseTask(); 13 | }); 14 | 15 | describe('constructor', () => { 16 | let env; // restore env 17 | beforeEach(() => { 18 | env = process.env.NODE_ENV; 19 | }); 20 | 21 | afterEach(() => { 22 | process.env.NODE_ENV = env; 23 | }); 24 | 25 | it('should generate a log object', () => { 26 | expect(base.log.fields.name).toBe('BaseTask'); 27 | }); 28 | 29 | it('should generate log at debug level when not production', () => { 30 | expect(base.log.level()).toBe(bunyan.DEBUG); 31 | }); 32 | 33 | it('should generate log at info level when production', () => { 34 | process.env.NODE_ENV = 'production'; 35 | base = new BaseTask(); 36 | expect(base.log.level()).toBe(bunyan.INFO); 37 | }); 38 | }); 39 | 40 | describe('writeJson', () => { 41 | it('should output with log', () => { 42 | base.log = { 43 | info: jest.fn(), 44 | }; 45 | 46 | base.writeJson('x'); 47 | 48 | expect(base.log.info).toHaveBeenCalled(); 49 | }); 50 | 51 | it('should output json to file', () => { 52 | base.writeJson('x.json', {}); 53 | expect(fs.outputJson).toHaveBeenCalled(); 54 | }); 55 | }); 56 | }); 57 | -------------------------------------------------------------------------------- /src/scrapers/HttpService.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import { URL, URLSearchParams } from 'url'; 4 | import sanitizeFilename from 'sanitize-filename'; 5 | import axios from 'axios'; 6 | import bunyan from 'bunyan'; 7 | import { isString } from 'lodash'; 8 | 9 | import config from '../../config'; 10 | 11 | const log = bunyan.createLogger({ 12 | name: 'HttpService', 13 | level: process.env.NODE_ENV === 'production' ? bunyan.INFO : bunyan.DEBUG, 14 | }); 15 | 16 | /** 17 | * Converts axios request configuration to equivalent valid filename. 18 | */ 19 | function getCacheFilePath(requestConfig) { 20 | const { baseURL, url, params } = requestConfig; 21 | // https://nodejs.org/docs/latest/api/url.html#url_url_strings_and_url_objects 22 | const { hostname, pathname, searchParams, href } = new URL(url, baseURL); 23 | 24 | // Merge params from config and search params from the URL 25 | const keyValuePairs = new Set(); 26 | const addKeyValuePair = (value, key) => { 27 | keyValuePairs.add(`${key}=${value}`); 28 | }; 29 | // URLSearchParams are iterables, not arrays (so no map, filter, etc) 30 | searchParams.forEach(addKeyValuePair); 31 | new URLSearchParams(params).forEach(addKeyValuePair); 32 | 33 | let filename = ''; 34 | if (keyValuePairs.size) { 35 | filename = sanitizeFilename(Array.from(keyValuePairs).sort().join('&')); 36 | if (filename === '') { 37 | throw new Error(`Invalid filename for url ${href}`); 38 | } 39 | } else { 40 | filename = 'index.html'; 41 | } 42 | 43 | return path.join( 44 | config.defaults.cachePath, 45 | hostname.replace(/^www\./, ''), 46 | pathname.replace(/\/index\.[a-z]+$/, ''), 47 | filename, 48 | ); 49 | } 50 | 51 | /** 52 | * Gets the time the file was last modified if it exists, null otherwise. 53 | */ 54 | async function getFileModifiedTime(cachedPath, urlStr) { 55 | try { 56 | const stats = await fs.stat(cachedPath); 57 | if (stats.isFile()) { 58 | return stats.mtime; 59 | } 60 | log.error(`${cachedPath} is not a file`); 61 | } catch (err) { 62 | log.debug(`no cached file for ${urlStr}`); 63 | } 64 | return null; 65 | } 66 | 67 | const HttpService = axios.create({ 68 | validateStatus: status => (status >= 200 && status < 300) || status === 304, 69 | }); 70 | 71 | /** 72 | * Intercepts and returns cached response when 73 | * 1) Cache file exists 74 | * 2) Cache file is within set cache limit 75 | */ 76 | HttpService.interceptors.request.use(async (request) => { 77 | // Only cache GET requests 78 | if (request.method === 'get') { 79 | const { maxCacheAge = config.defaults.maxCacheAge } = request; 80 | 81 | const cachedFilePath = getCacheFilePath(request); 82 | const modifiedTime = await getFileModifiedTime(cachedFilePath, request.url); 83 | 84 | request.isCached = Date.now() - modifiedTime < maxCacheAge; 85 | if (request.isCached) { 86 | request.data = await fs.readFile(cachedFilePath, 'utf8'); 87 | // Set the request adapter to send the cached response and 88 | // prevent the request from actually running 89 | request.adapter = () => 90 | Promise.resolve({ 91 | data: request.data, 92 | status: request.status, 93 | statusText: request.statusText, 94 | headers: request.headers, 95 | config: request, 96 | }); 97 | } else if (modifiedTime instanceof Date) { 98 | request.headers['if-modified-since'] = modifiedTime.toUTCString(); 99 | } 100 | } 101 | return request; 102 | }); 103 | 104 | /** 105 | * Cache response when it is not already cached. 106 | * Also handles 304 Not Modified scenarios. 107 | */ 108 | HttpService.interceptors.response.use(async (response) => { 109 | const cachedFilePath = getCacheFilePath(response.config); 110 | if (response.status === 304) { 111 | response.data = await fs.readFile(cachedFilePath, 'utf8'); 112 | } else if (response.config.method === 'get' && !response.config.isCached) { 113 | const outputFunc = isString(response.data) ? fs.outputFile : fs.outputJson; 114 | outputFunc(cachedFilePath, response.data); 115 | } 116 | return response; 117 | }); 118 | 119 | export default HttpService; 120 | export { getCacheFilePath, getFileModifiedTime }; 121 | -------------------------------------------------------------------------------- /src/scrapers/HttpService.test.js: -------------------------------------------------------------------------------- 1 | import nock from 'nock'; 2 | import fs from 'fs-extra'; 3 | import HttpService, { getCacheFilePath, getFileModifiedTime } from './HttpService'; 4 | 5 | jest.mock('../../config.js', () => ({ 6 | defaults: { 7 | cachePath: 'testBase', 8 | maxCacheAge: 0, 9 | }, 10 | })); 11 | 12 | describe('getCacheFilePath', () => { 13 | const getFilePath = (url, params) => getCacheFilePath({ url, params }); 14 | 15 | it('should output root to domain with index.html', () => { 16 | expect(getFilePath('https://www.example.com')).toBe('testBase/example.com/index.html'); 17 | expect(getFilePath('https://www.example.com/test')).toBe('testBase/example.com/test/index.html'); 18 | }); 19 | 20 | it('should normalize url', () => { 21 | const expectedOutput = 'testBase/example.com/index.html'; 22 | expect(getFilePath('https://www.example.com')).toBe(expectedOutput); 23 | expect(getFilePath('https://www.example.com:80')).toBe(expectedOutput); 24 | expect(getFilePath('http://www.example.com')).toBe(expectedOutput); 25 | expect(getFilePath('http://example.com/')).toBe(expectedOutput); 26 | expect(getFilePath('http://example.com/index.html')).toBe(expectedOutput); 27 | expect(getFilePath('http://www.example.com/#context')).toBe(expectedOutput); 28 | 29 | expect(getFilePath('http://example.com/', 'a=1&b=2')) 30 | .toBe(getFilePath('http://example.com/', 'b=2&a=1')); 31 | expect(getFilePath('http://example.com/?a=1', 'b=2')) 32 | .toBe(getFilePath('http://example.com/?b=2', 'a=1')); 33 | }); 34 | 35 | it('should output queries to file savable format', () => { 36 | expect(getFilePath('https://www.example.com/', 'query="test"&x="test"')) 37 | .toBe('testBase/example.com/query=test&x=test'); 38 | expect(getFilePath('https://www.example.com/', 'query=123')) 39 | .toBe('testBase/example.com/query=123'); 40 | expect(getFilePath('https://www.example.com/', '')) 41 | .toBe('testBase/example.com/te='); 42 | }); 43 | 44 | it('should split subpaths to many different subfolders', () => { 45 | expect(getFilePath('https://www.example.com/test/', 'query="test"')) 46 | .toBe('testBase/example.com/test/query=test'); 47 | expect(getFilePath('https://www.example.com/1/2/', 'query=123&x=5')) 48 | .toBe('testBase/example.com/1/2/query=123&x=5'); 49 | expect(getFilePath('https://www.example.com/1/2/hex')) 50 | .toBe('testBase/example.com/1/2/hex/index.html'); 51 | }); 52 | 53 | it('should throw when there is no valid filename', () => { 54 | expect(() => getFilePath('')).toThrow(); 55 | }); 56 | }); 57 | 58 | describe('getFileModifiedTime', () => { 59 | const mockFileSystemMeta = { 60 | testFile: { 61 | isFile: () => true, 62 | mtime: 0, 63 | }, 64 | testFolder: { 65 | isFile: () => false, 66 | mtime: 0, 67 | }, 68 | }; 69 | 70 | beforeAll(() => { 71 | fs.setMock({}, mockFileSystemMeta); 72 | }); 73 | 74 | it('should output the modified time of a file', async () => { 75 | expect(await getFileModifiedTime('testFile')).toBe(mockFileSystemMeta.testFile.mtime); 76 | }); 77 | 78 | it('should output null if it is not a file', async () => { 79 | expect(await getFileModifiedTime('testFolder')).toBe(null); 80 | }); 81 | 82 | it('should output null if no file', async () => { 83 | expect(await getFileModifiedTime('testNoFile')).toBe(null); 84 | }); 85 | }); 86 | 87 | describe('HttpService', () => { 88 | const HOST = 'http://example.com'; 89 | const EPOCH = new Date(0); 90 | const cachedData = 'cached test'; 91 | const freshData = 'no cached test'; 92 | const mockFileSystem = { 93 | 'testBase/example.com/index.html': cachedData, 94 | }; 95 | let mockFileSystemMeta; 96 | 97 | beforeEach(() => { 98 | mockFileSystemMeta = { 99 | 'testBase/example.com/index.html': { 100 | isFile: () => true, 101 | // Default should not be cached 102 | // since file is 40+ years old 103 | mtime: EPOCH, 104 | }, 105 | }; 106 | fs.setMock(mockFileSystem, mockFileSystemMeta); 107 | nock(HOST).get(/.*/).reply(200, freshData); 108 | }); 109 | 110 | afterAll(() => { 111 | nock.cleanAll(); 112 | }); 113 | 114 | describe('nock server', () => { 115 | it('should return mock reponse', async () => { 116 | const response = await HttpService.get(HOST); 117 | expect(response.data).toBe(freshData); 118 | }); 119 | }); 120 | 121 | describe('requestInterceptor', () => { 122 | it('should intercept and return cache file if it exists', async () => { 123 | mockFileSystemMeta['testBase/example.com/index.html'].mtime = Date.now() + 1000; 124 | fs.setMock(mockFileSystem, mockFileSystemMeta); 125 | const response = await HttpService.get(HOST); 126 | expect(response.data).toBe(cachedData); 127 | expect(response.config.isCached).toBeTruthy(); 128 | }); 129 | 130 | it('should not intercept if cache file has expired', async () => { 131 | const response = await HttpService.get(HOST); 132 | expect(response.data).toBe(freshData); 133 | expect(response.config.isCached).toBeFalsy(); 134 | }); 135 | 136 | it('should not intercept if cache file does not exist', async () => { 137 | const response = await HttpService.get(`${HOST}/noCached`); 138 | expect(response.data).toBe(freshData); 139 | expect(response.config.isCached).toBeFalsy(); 140 | }); 141 | 142 | it('should set if-modified-since if cache file has expired', async () => { 143 | const response = await HttpService.get(`${HOST}`); 144 | expect(response.config.headers['if-modified-since']).toBe(EPOCH.toUTCString()); 145 | }); 146 | }); 147 | 148 | describe('responseInterceptor', () => { 149 | beforeEach(() => { 150 | fs.outputFile = jest.fn(); 151 | }); 152 | 153 | it('should cache file if it is not already cached', async () => { 154 | await HttpService.get(HOST); 155 | expect(fs.outputFile).toBeCalled(); 156 | }); 157 | 158 | it('should not cache file if it is already cached', async () => { 159 | mockFileSystemMeta['testBase/example.com/index.html'].mtime = Date.now() + 1000; 160 | fs.setMock(mockFileSystem, mockFileSystemMeta); 161 | await HttpService.get(HOST); 162 | expect(fs.outputFile).not.toBeCalled(); 163 | }); 164 | 165 | it('should send cached file if server returns 304', async () => { 166 | nock.cleanAll(); 167 | nock(HOST).get(/.*/).reply(304); 168 | const response = await HttpService.get(HOST); 169 | expect(fs.outputFile).not.toBeCalled(); 170 | expect(response.data).toBe(cachedData); 171 | }); 172 | }); 173 | }); 174 | -------------------------------------------------------------------------------- /src/scrapers/VenuesScraper.js: -------------------------------------------------------------------------------- 1 | import R from 'ramda'; 2 | 3 | import BaseTask from './BaseTask'; 4 | 5 | const NUS_API_URL = 'http://nuslivinglab.nus.edu.sg/api_dev/api/Dept'; 6 | const FIELDS = ['name', 'type', 'owned_by']; 7 | const SCHOOL_ID = { school_id: 1 }; 8 | // Prevent "Too many SQL variable errors thrown by SQLite" 9 | // Default by SQLite is 999. Since we have 4 variables per row... 10 | const MAX_INSERT_SIZE = Math.floor(999 / 4); 11 | 12 | /** 13 | * Scrapes and saves venue data for the school. 14 | */ 15 | export default class VenuesScraper extends BaseTask { 16 | async save(existingRows, currentRows) { 17 | if (!currentRows.length) { 18 | throw new Error('No data found'); 19 | } 20 | const transaction = await this.getTransaction(); 21 | 22 | const map = R.indexBy(R.prop('name'), currentRows); 23 | 24 | const transactions = []; 25 | const transact = ({ name }) => this.db.table('venues').transacting(transaction).where({ name }); 26 | existingRows.forEach((row) => { 27 | const currentRow = map[row.name]; 28 | if (!currentRow) { 29 | // Content is no longer present 30 | transactions.push(transact(row).delete()); 31 | } else if (!R.equals(currentRow, row)) { 32 | // Content is different 33 | transactions.push(transact(row).update(currentRow)); 34 | } 35 | // Content is exactly the same, do nothing 36 | delete map[row.name]; 37 | }); 38 | 39 | return Promise.all(transactions) 40 | // Whatever remains must be new data 41 | .then(() => this.db.batchInsert('venues', Object.values(map), MAX_INSERT_SIZE)) 42 | .then(transaction.commit) 43 | .catch(transaction.rollback); 44 | } 45 | 46 | async scrape() { 47 | const response = await this.http.get(NUS_API_URL, { 48 | params: { 49 | name: '', 50 | output: 'json', 51 | }, 52 | }); 53 | const currentVenues = response.data.map(datum => this.convertToRow(datum)); 54 | const existingVenues = await this.db.table('venues').where(SCHOOL_ID).select(FIELDS); 55 | 56 | return this.save(existingVenues, currentVenues); 57 | } 58 | 59 | convertToRow(venue) { 60 | // The api is terribly named, name is not unique, 61 | // while code is more arguably more suitable as the name 62 | // and dept are not departments when they 63 | // can be owned by clubs and external vendors 64 | const { roomcode: name, roomname: type, dept: owned_by, ...extraProps } = venue; 65 | 66 | if (!R.isEmpty(extraProps)) { 67 | this.log.warn('Found extra properties', extraProps); 68 | } 69 | 70 | return { 71 | ...SCHOOL_ID, 72 | name, 73 | type, 74 | owned_by, 75 | }; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/scrapers/VenuesScraper.test.js: -------------------------------------------------------------------------------- 1 | import VenuesScraper from './VenuesScraper'; 2 | 3 | describe('VenuesScraper', () => { 4 | const scraper = new VenuesScraper(); 5 | const FIELDS = ['school_id', 'name', 'type', 'owned_by']; 6 | 7 | describe('save', () => { 8 | beforeEach(async () => { 9 | await scraper.db.migrate.rollback(); 10 | await scraper.db.migrate.latest(); 11 | await scraper.db.seed.run(); 12 | }); 13 | 14 | it('should start out with empty db', async () => { 15 | expect(await scraper.db.table('venues').select(FIELDS)).toEqual([]); 16 | }); 17 | 18 | it('should insert all venues if db is empty', async () => { 19 | const testData = [ 20 | { school_id: 1, name: 'test', type: 'room', owned_by: 'me' }, 21 | { school_id: 1, name: 'test1', type: 'room', owned_by: 'me' }, 22 | ]; 23 | await scraper.save([], testData); 24 | expect(await scraper.db.table('venues').select(FIELDS)).toEqual(testData); 25 | }); 26 | 27 | it('should merge venues if db exists', async () => { 28 | const existingData = [ 29 | { school_id: 1, name: 'test', type: 'room', owned_by: 'me' }, 30 | { school_id: 1, name: 'test1', type: 'room', owned_by: 'me' }, 31 | ]; 32 | await scraper.db.table('venues').insert(existingData); 33 | const testData = [{ school_id: 1, name: 'test', type: 'room', owned_by: 'me' }]; 34 | await scraper.save(existingData, testData); 35 | expect(await scraper.db.table('venues').select(FIELDS)).toEqual([existingData[0]]); 36 | }); 37 | }); 38 | 39 | describe('convertToRow', () => { 40 | it('should convert object to sql row equivalent', async () => { 41 | const row = scraper.convertToRow({ 42 | roomcode: 'test', 43 | roomname: 'some room', 44 | dept: 'subway', 45 | }); 46 | expect(() => scraper.db.table('venues').insert(row)).not.toThrow(); 47 | }); 48 | 49 | it('should warn if there is extra props', async () => { 50 | scraper.log.warn = jest.fn(); 51 | scraper.convertToRow({ 52 | roomcode: 'test', 53 | roomname: 'some room', 54 | dept: 'subway', 55 | surprise: '!', 56 | }); 57 | expect(scraper.log.warn).toBeCalled(); 58 | }); 59 | }); 60 | }); 61 | -------------------------------------------------------------------------------- /src/util/log.js: -------------------------------------------------------------------------------- 1 | import bunyan from 'bunyan'; 2 | 3 | /** 4 | * Provides the default logger for the server, 5 | * processes and serializes http calls 6 | */ 7 | export default bunyan.createLogger({ 8 | name: 'nusmods-api', 9 | level: process.env.LOG_LEVEL || 'info', 10 | stream: process.stdout, 11 | serializers: bunyan.stdSerializers, 12 | }); 13 | -------------------------------------------------------------------------------- /src/util/mapKeysDeep.js: -------------------------------------------------------------------------------- 1 | import _ from 'lodash'; 2 | 3 | /** 4 | * Recursively traverses the object and applies a function to all the keys. 5 | * Does not handle cyclic references. 6 | * This function is curried. 7 | * @param {* Function} func 8 | * @param {* Object} value 9 | */ 10 | function mapKeysDeep(func, value) { 11 | if (Array.isArray(value)) { 12 | return value.map(innerContent => mapKeysDeep(func, innerContent)); 13 | } 14 | if (_.isObjectLike(value)) { 15 | const obj = {}; 16 | Object.entries(value).forEach(([key, objValue]) => { 17 | obj[func(key)] = mapKeysDeep(func, objValue); 18 | }); 19 | return obj; 20 | } 21 | return value; // all other cases 22 | } 23 | 24 | export default _.curry(mapKeysDeep); 25 | -------------------------------------------------------------------------------- /src/util/mapKeysDeep.test.js: -------------------------------------------------------------------------------- 1 | import mapKeysDeep from './mapKeysDeep'; 2 | 3 | describe('mapKeysDeep', () => { 4 | const appendXToKeys = mapKeysDeep(key => `${key}X`); 5 | 6 | const testString = 'testString'; 7 | 8 | it('should return function without modification', () => { 9 | const func = () => {}; 10 | expect(appendXToKeys(func)).toBe(func); 11 | }); 12 | 13 | it('should return null without modification', () => { 14 | expect(appendXToKeys(null)).toBeNull(); 15 | }); 16 | 17 | it('should return strings without modification', () => { 18 | expect(appendXToKeys(testString)).toBe(testString); 19 | }); 20 | 21 | it('should return numbers without modification', () => { 22 | expect(appendXToKeys(1)).toBe(1); 23 | }); 24 | 25 | it('should map over array of objects', () => { 26 | const testObject = { 27 | testString, 28 | }; 29 | const expectedObject = { 30 | testStringX: testString, 31 | }; 32 | expect(appendXToKeys([testObject, testObject])).toEqual([expectedObject, expectedObject]); 33 | }); 34 | 35 | it('should change all keys in simple object', () => { 36 | const anotherString = 'anotherString'; 37 | const inputObject = { 38 | testString, 39 | anotherString, 40 | }; 41 | expect(appendXToKeys(inputObject)).toEqual({ 42 | testStringX: testString, 43 | anotherStringX: anotherString, 44 | }); 45 | }); 46 | 47 | it('should change all keys in nested object', () => { 48 | const inputObject = { 49 | testString, 50 | anotherString: { 51 | yetAnotherString: 'yetAnotherString', 52 | array: [{ 53 | testString, 54 | }], 55 | }, 56 | }; 57 | expect(appendXToKeys(inputObject)).toEqual({ 58 | testStringX: testString, 59 | anotherStringX: { 60 | yetAnotherStringX: 'yetAnotherString', 61 | arrayX: [{ 62 | testStringX: testString, 63 | }], 64 | }, 65 | }); 66 | }); 67 | }); 68 | -------------------------------------------------------------------------------- /src/util/walkDir.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | 4 | /** 5 | * Walks the directory, and returns all nested files of designated file name. 6 | * Only traverses one layer. 7 | * @example walkJsonDir('test', 'test.json') will find 'test/1/test.json' 8 | * @param {string} folderPath folder path to traverse 9 | * @param {string} destFileName file name to match 10 | */ 11 | async function walkJsonDir(folderPath, destFileName) { 12 | const folders = await fs.readdir(folderPath); 13 | const folderToJsonMap = {}; 14 | await Promise.all(folders.map(async (folder) => { 15 | const filePath = path.join(folderPath, folder, destFileName); 16 | const fileContent = await fs.readJson(filePath).catch(() => null); 17 | if (fileContent) { 18 | folderToJsonMap[folder] = fileContent; 19 | } 20 | })); 21 | return folderToJsonMap; 22 | } 23 | 24 | /** 25 | * Sync version of walkJsonDir 26 | * @example walkJsonDir('test', 'test.json') will find 'test/1/test.json' 27 | * @param {string} folderPath folder path to traverse 28 | * @param {string} destFileName file name to match 29 | */ 30 | function walkJsonDirSync(folderPath, destFileName) { 31 | const folders = fs.readdirSync(folderPath); 32 | const folderToJsonMap = {}; 33 | folders.forEach((folder) => { 34 | const filePath = path.join(folderPath, folder, destFileName); 35 | try { 36 | folderToJsonMap[folder] = fs.readJsonSync(filePath); 37 | } catch (error) { 38 | // ignore errors 39 | } 40 | }); 41 | return folderToJsonMap; 42 | } 43 | 44 | export { 45 | walkJsonDir, 46 | walkJsonDirSync, 47 | }; 48 | -------------------------------------------------------------------------------- /src/util/walkDir.test.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs-extra'; 3 | import { walkJsonDir, walkJsonDirSync } from './walkDir'; 4 | 5 | jest.mock('fs-extra'); 6 | 7 | describe('walkJsonDir', () => { 8 | it('walks the api directory for relevant files', async () => { 9 | const mockFileSystem = { 10 | app: { 11 | api: { 12 | '2016-2017': { 13 | 'modules.json': '["test"]', 14 | }, 15 | '2017-2018': { 16 | 'modules.json': '["test1"]', 17 | }, 18 | }, 19 | }, 20 | }; 21 | fs.setMock(mockFileSystem); 22 | const expected = { 23 | '2016-2017': ['test'], 24 | '2017-2018': ['test1'], 25 | }; 26 | const apiPath = path.join('app', 'api'); 27 | expect(await walkJsonDir(apiPath, 'modules.json')).toEqual(expected); 28 | }); 29 | }); 30 | 31 | describe('walkJsonDirSync', () => { 32 | it('walks the api directory for relevant files', async () => { 33 | const mockFileSystem = { 34 | app: { 35 | api: { 36 | '2016-2017': { 37 | 'modules.json': '["test"]', 38 | }, 39 | '2017-2018': { 40 | 'modules.json': '["test1"]', 41 | }, 42 | }, 43 | }, 44 | }; 45 | fs.setMock(mockFileSystem); 46 | const expected = { 47 | '2016-2017': ['test'], 48 | '2017-2018': ['test1'], 49 | }; 50 | const apiPath = path.join('app', 'api'); 51 | expect(walkJsonDirSync(apiPath, 'modules.json')).toEqual(expected); 52 | }); 53 | }); 54 | -------------------------------------------------------------------------------- /webpack.config.babel.js: -------------------------------------------------------------------------------- 1 | // @flow 2 | import 'dotenv/config'; 3 | import * as webpack from 'webpack'; 4 | import nodeExternals from 'webpack-node-externals'; 5 | import BabiliPlugin from 'babili-webpack-plugin'; 6 | import path from 'path'; 7 | import assert from 'assert'; 8 | 9 | // Undefine compilation environment 10 | delete process.env.BABEL_ENV; 11 | 12 | assert.equal(process.env.NODE_ENV, 'production', 'Not compiling in production environment'); 13 | 14 | const config: webpack.Configuration = { 15 | entry: './gulpfile.babel.js', 16 | output: { 17 | filename: 'gulpfile.compiled.js', 18 | path: path.resolve(__dirname), 19 | }, 20 | module: { 21 | rules: [ 22 | { 23 | test: /\.js$/, 24 | exclude: /node_modules/, 25 | use: { 26 | loader: 'babel-loader', 27 | options: { 28 | cacheDirectory: true, 29 | }, 30 | }, 31 | }, 32 | ], 33 | }, 34 | plugins: [ 35 | new webpack.EnvironmentPlugin(['NODE_ENV']), 36 | new webpack.optimize.ModuleConcatenationPlugin(), 37 | new BabiliPlugin(), 38 | ], 39 | target: 'node', 40 | externals: [nodeExternals()], 41 | }; 42 | 43 | export default config; 44 | --------------------------------------------------------------------------------