├── test └── tests.js ├── .gitignore ├── readme.md ├── mysql_config.example.js ├── package.json ├── LICENSE.md └── main.js /test/tests.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | 4 | #DB config 5 | mysql_config.js 6 | schema.sql -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | #### Install Dependencies 3 | - ```npm install``` 4 | - Rename and configure mysql_config.example.js to mysql_config.js 5 | 6 | #### Run Tests 7 | - ```./node_modules/mocha/bin/mocha``` 8 | 9 | #### Scrape and load DB 10 | - ```node main.js``` 11 | - Help: ``` node main.js --help``` 12 | 13 | ## Related Apps 14 | - https://github.com/kyleladd/MSCschedulizer_scraper 15 | - https://github.com/kyleladd/MSCschedulizer_api_swagger 16 | - https://github.com/kyleladd/MSCschedulizer_FrontEnd 17 | -------------------------------------------------------------------------------- /mysql_config.example.js: -------------------------------------------------------------------------------- 1 | var genericfunctions = require('node_generic_functions'); 2 | module.exports = { 3 | mySQLConfiguration: function (isTest) { 4 | // Multiple Statements is for resetting the database from schema sql file 5 | if(genericfunctions.toBoolean(isTest)===true){ 6 | return { 7 | host:"", 8 | database:"", 9 | user: "", 10 | password: "", 11 | multipleStatements: true 12 | }; 13 | } 14 | return { 15 | host:"", 16 | database:"", 17 | user: "", 18 | password: "", 19 | multipleStatements: true 20 | }; 21 | } 22 | }; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "msc-db-scraper-loader", 3 | "version" : "0.0.1", 4 | "description" : "Scrape MSC course listings.", 5 | "author" : "Kyle Ladd", 6 | "engines": { 7 | "node": "4.2.4" 8 | }, 9 | "dependencies" : { 10 | "request" : "latest", 11 | "scraperjs" : "latest", 12 | "mysql":"latest", 13 | "async" : "latest", 14 | "fs":"latest", 15 | "node_generic_functions":"https://github.com/kyleladd/node_generic_functions.git" 16 | }, 17 | "devDependencies": { 18 | "mocha" : "latest", 19 | "expect.js":"latest", 20 | "chai":"latest", 21 | "chai-spies":"latest", 22 | "sinon":"latest", 23 | "sinon-chai":"latest", 24 | "stdio":"latest" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Kyle Ladd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | var request = require('request'); 2 | var async = require('async'); 3 | var sjs = require('scraperjs'); 4 | var mysql = require('mysql'); 5 | var genericfunctions = require('node_generic_functions'); 6 | var mysql_config = require('./mysql_config.js'); 7 | var fs = require('fs'); 8 | var exec = require('child_process').exec; 9 | var stdio = require('stdio'); 10 | var options = stdio.getopt({ 11 | 'input': {description: 'Location of the file', default: 'sched_data.txt'}, 12 | 'test': {description: 'Use the test database connection?', default: 'false'} 13 | }); 14 | console.dir(options); 15 | var script_start_time = new Date().getTime(); 16 | var connection = mysql.createConnection(mysql_config.mySQLConfiguration(options.test)); 17 | 18 | var dbDepartments = []; 19 | var dbCourseSections = []; 20 | var dbCourses = []; 21 | var dbMeetings = []; 22 | var dbRequiredIdentifiers = []; 23 | var dbTerms = []; 24 | 25 | var departments = []; 26 | var results = []; 27 | async.series([ 28 | function(callback){ 29 | var array = fs.readFileSync(options.input).toString().split("\n"); 30 | for(i in array) { 31 | var row = array[i].toString().split("\t"); 32 | if(typeof row[1] != 'undefined'){ 33 | row[9] = parseInt(row[9]); 34 | if(isNaN(row[9])){ 35 | row[9] = 0; 36 | } 37 | //Convert days to bits 38 | row[12] = genericfunctions.convertToBit(row[12]); 39 | row[13] = genericfunctions.convertToBit(row[13]); 40 | row[14] = genericfunctions.convertToBit(row[14]); 41 | row[15] = genericfunctions.convertToBit(row[15]); 42 | row[16] = genericfunctions.convertToBit(row[16]); 43 | results.push({Department:row[1].trim(),CourseNumber:row[2].trim(),CourseTitle:row[3].trim(),Weeks:row[4].trim(),CourseCRN:row[5].trim(),Section:row[6].trim(),Credits:row[9],CurrentEnrollment:row[10].trim(),MaxEnrollment:row[11].trim(),Monday:row[12],Tuesday:row[13],Wednesday:row[14],Thursday:row[15],Friday:row[16],StartTime:row[18].trim(),EndTime:row[19].trim(),Building:"".trim(),Room:"".trim(),Instructor:row[20].trim(),Identifier:row[7].trim(),RequiredIdentifiers:row[8].trim()}); 44 | } 45 | } 46 | callback() 47 | }, 48 | function(callback){ 49 | clearDatabase(callback); 50 | }, 51 | function(callback){ 52 | console.log("DB Cleared/Reset"); 53 | loadDatabase(results,callback); 54 | }, 55 | function(callback){ 56 | console.log("Everything is all loaded"); 57 | // end connection 58 | connection.end(); 59 | callback(null,"closed connection"); 60 | } 61 | 62 | ], 63 | // optional callback 64 | function(err, results){ 65 | var script_elapsed_time = (new Date().getTime() - script_start_time)/1000; 66 | console.log("Time to complete: " + script_elapsed_time + " seconds"); 67 | }); 68 | 69 | function clearDatabase(callback){ 70 | var schema = fs.readFileSync('schema.sql').toString() 71 | connection.query(schema, function(err, result) { 72 | if (err){ 73 | throw err; 74 | } 75 | callback(null,'reset db'); 76 | }); 77 | } 78 | 79 | 80 | function loadDatabase(results,callback){ 81 | async.series([ 82 | function(callback){ 83 | console.log("loading departments"); 84 | insertDepartments(results, callback); 85 | }, 86 | function(callback){ 87 | console.log("departments loaded"); 88 | connection.query('SELECT id,abbreviation FROM departments', function(err, result) { 89 | if (err){ 90 | throw err; 91 | } 92 | else{ 93 | dbDepartments = result; 94 | callback(null,"departments selected"); 95 | } 96 | }); 97 | }, 98 | function(callback){ 99 | console.log("departments selected"); 100 | insertCourses(results, callback); 101 | }, 102 | function(callback){ 103 | console.log("courses loaded"); 104 | connection.query('SELECT id,name,department_id,courseNumber FROM courses', function(err, result) { 105 | if (err){ 106 | throw err; 107 | } 108 | else{ 109 | dbCourses = result; 110 | callback(null,"courses selected"); 111 | } 112 | }); 113 | }, 114 | function(callback){ 115 | console.log("courses selected"); 116 | connection.query('SELECT id,abbreviation,name FROM course_terms', function(err, result) { 117 | if (err){ 118 | throw err; 119 | } 120 | else{ 121 | dbTerms = result; 122 | callback(null,"courses terms selected"); 123 | } 124 | }); 125 | }, 126 | function(callback){ 127 | console.log("course terms selected"); 128 | insertSections(results, callback); 129 | }, 130 | 131 | function(callback){ 132 | console.log("sections inserted"); 133 | connection.query('SELECT id,name,course_id,courseCRN,instructor,currentEnrollment,maxEnrollment,credits,identifier FROM course_sections', function(err, result) { 134 | if (err){ 135 | console.log("err here"); 136 | throw err; 137 | } 138 | else{ 139 | dbCourseSections = result; 140 | callback(null,"courses sections selected"); 141 | } 142 | }); 143 | }, 144 | function(callback){ 145 | console.log("course sections selected"); 146 | insertMeetings(results, callback); 147 | }, 148 | function(callback){ 149 | console.log("meetings loaded"); 150 | connection.query('SELECT id,monday,tuesday,wednesday,thursday,friday,startTime,endTime,coursesection_id,building,room FROM meetings', function(err, result) { 151 | if (err){ 152 | throw err; 153 | } 154 | else{ 155 | dbMeetings = result; 156 | callback(null,"meetings selected"); 157 | } 158 | }); 159 | }, 160 | function(callback){ 161 | console.log("meetings selected"); 162 | insertRequiredIdentifiers(results, callback); 163 | }, 164 | function(callback){ 165 | console.log("Identifiers loaded"); 166 | connection.query('SELECT id,identifier,section_id FROM required_identifiers', function(err, result) { 167 | if (err){ 168 | throw err; 169 | } 170 | else{ 171 | dbRequiredIdentifiers = result; 172 | callback(null,"Required Identifiers selected"); 173 | } 174 | }); 175 | }, 176 | function(callback){ 177 | console.log("required identifiers selected"); 178 | callback(null,"just console logging"); 179 | } 180 | ], 181 | // optional callback 182 | function(err, results){ 183 | callback(null,"loaded DB"); 184 | }); 185 | 186 | 187 | } 188 | 189 | function insertDepartments(results, callback){ 190 | console.log("insert departments"); 191 | var numRunningQueries = 0; 192 | var loadingDepartments = []; 193 | //For each department 194 | for(var c=0; c