├── requirements.txt ├── .gitignore ├── README.md ├── get_exams.py └── LICENSE.md /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | requests 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Folders containing exams. 2 | CS * 3 | 4 | # Log file. 5 | error.log 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CS Exams Downloader 2 | =================== 3 | 4 | The CS Exams Downloader can be used to download past exams for UC Berkeley's 5 | CS courses. It makes use of HKN and TBP's databases. The program downloads by 6 | default only those exams that also have their solutions available on those 7 | databases, though this may be adjusted with flags 8 | (use ``python3 get_exams.py --help`` to learn more)! 9 | 10 | ## Setup 11 | 12 | Simply clone this repo to get started. Note that the application makes use of 13 | the following extra modules: 14 | - [Beautiful Soup 4](http://www.crummy.com/software/BeautifulSoup/) 15 | - [Requests](http://docs.python-requests.org/en/latest/) 16 | 17 | Users of [pip](https://pip.pypa.io/en/stable/) can install these modules using 18 | ``pip install -r requirements.txt.`` 19 | 20 | ## Usage 21 | 22 | Run the code with a valid class to download exam-solution pairs! 23 | 24 | ```python 25 | python3 get_exams.py 61A 26 | ``` 27 | 28 | You can also get files for multiple classes in one run. 29 | 30 | ```python 31 | python3 get_exams.py 162 186 188 32 | ``` 33 | 34 | ## License 35 | 36 | This code is released under the [Apache License, Version 2.0.](LICENSE.md) 37 | 38 | > Copyright 2016 Shafqat Dulal 39 | > 40 | > Licensed under the Apache License, Version 2.0 (the "License"); 41 | > you may not use this file except in compliance with the License. 42 | > You may obtain a copy of the License at 43 | > 44 | > http://www.apache.org/licenses/LICENSE-2.0 45 | > 46 | > Unless required by applicable law or agreed to in writing, software 47 | > distributed under the License is distributed on an "AS IS" BASIS, 48 | > WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 49 | > See the License for the specific language governing permissions and 50 | > limitations under the License. 51 | -------------------------------------------------------------------------------- /get_exams.py: -------------------------------------------------------------------------------- 1 | """ 2 | This program can be used to get posted exams for UC Berkeley's CS courses. 3 | Given one or more courses, this downloader will search HKN and TBP's databases 4 | and pull out available exam-solution files. See --help to view some options. 5 | 6 | This program requires Python 3, as well as a few additional modules that are 7 | listed in the requirements file. 8 | """ 9 | 10 | from bs4 import BeautifulSoup 11 | from collections import defaultdict 12 | from threading import Thread 13 | import argparse 14 | import os 15 | import requests 16 | import sys 17 | import urllib.request 18 | 19 | """Setup for parsing arguments.""" 20 | parser = argparse.ArgumentParser( 21 | description="Download past computer science exams \ 22 | that have corresponding solutions available online.") 23 | parser.add_argument('classes', metavar='class', type=str, nargs="+", 24 | help="class to get exam-solution pairs \ 25 | for (ex. 170, 162)") 26 | parser.add_argument('-v', '--verbose', action='store_true', default=False, 27 | help="provide more detail on download progress") 28 | parser.add_argument('-u', '--unpaired', action='store_true', default=False, 29 | help="consider unpaired exam/solution files") 30 | g_one = parser.add_mutually_exclusive_group() 31 | g_one.add_argument('-e', '--exams', action='store_const', default='', 32 | const='Exam', help="download only exam files") 33 | g_one.add_argument('-s', '--solutions', action='store_const', default='', 34 | const='Solution', help="download only solution files") 35 | g_two = parser.add_mutually_exclusive_group() 36 | g_two.add_argument('-k', '--hkn', action='store_true', default=False, 37 | help="search only on HKN's database") 38 | g_two.add_argument('-t', '--tbp', action='store_true', default=False, 39 | help="search only on TBP's database") 40 | 41 | __author__ = "Shafqat Dulal" 42 | 43 | """Sources from which to find previous exams.""" 44 | source_sites = {"HKN": "https://hkn.eecs.berkeley.edu", 45 | "TBP": "https://tbp.berkeley.edu"} 46 | 47 | """An array used to deal with the row-data format of HKN's exam database.""" 48 | index_to_exam_type = [-1, -1, "Midterm 1", "Midterm 2", "Midterm 3", "Final"] 49 | 50 | """Special constants.""" 51 | _EXAM = "Exam" 52 | _SOLUTION = "Solution" 53 | 54 | 55 | def download(source, class_number, semester, 56 | exam_type, content_type, exam_link): 57 | """ 58 | Downloads the exam/solution file specified by the exam_link. Note that 59 | the naming of the file is determined by most of the function's arguments. 60 | 61 | Without any flags or options used for the program, this function will 62 | consider any download that has an existing exam_link. 63 | """ 64 | if is_valid_download(content_type, exam_link): 65 | extension = "pdf" 66 | if source == "HKN": 67 | extension = exam_link[-3:] 68 | file_name = "CS {0}/{1} {2} {3}.{4}".format( 69 | class_number, exam_type, semester, content_type, extension) 70 | file_link = "{0}{1}".format(source_sites[source], exam_link) 71 | if not os.path.exists(file_name): 72 | with open(file_name, 'wb') as exam: 73 | a = requests.get(file_link, stream=True) 74 | for block in a.iter_content(512): 75 | if not block: 76 | break 77 | exam.write(block) 78 | verbose_print("{0} ({1}) for {2} is complete!".format(exam_type, 79 | content_type, 80 | semester)) 81 | 82 | 83 | def download_files(source, class_number, dict_links): 84 | """ 85 | Scans the semester-exam-link mappings found by searching a source and 86 | attempts to download the files corresponding to the links found. 87 | 88 | Without any flags or options used for the program, this function will 89 | consider downloads only when both an exam and its solution is available. 90 | """ 91 | for semester in dict_links: 92 | for exam_type in dict_links[semester]: 93 | exam, solution = dict_links[semester][exam_type] 94 | if should_try_download(exam, solution): 95 | Thread(target=download, 96 | args=(source, class_number, semester, 97 | exam_type, _EXAM, exam)).start() 98 | Thread(target=download, 99 | args=(source, class_number, semester, 100 | exam_type, _SOLUTION, solution)).start() 101 | 102 | 103 | def pull_from_TBP(class_number, super_dict): 104 | """ 105 | Searches TBP's database of previous exams for the course corresponding to 106 | the class_number. 107 | """ 108 | if not args.tbp: 109 | return 110 | verbose_print("Pulling from TBP.") 111 | site = "https://tbp.berkeley.edu/courses/cs/{0}".format(class_number) 112 | resp = urllib.request.urlopen(site) 113 | soup = BeautifulSoup(resp, "html.parser", 114 | from_encoding=resp.info().get_param('charset')) 115 | 116 | for row in soup.find_all("tr"): 117 | if row.find("th"): 118 | continue 119 | 120 | data = row.find_all("td") 121 | exam_type = data[1].text 122 | semester = data[2].text 123 | for anchor in row.find_all("a", class_="exam-download-link"): 124 | index = 0 if anchor.text.strip() == _EXAM else 1 125 | super_dict[semester][exam_type][index] = anchor.get("href") 126 | 127 | download_files("TBP", class_number, super_dict) 128 | 129 | 130 | def pull_from_HKN(class_number, super_dict): 131 | """ 132 | Searches HKN's database of previous exams for the course corresponding to 133 | the class_number. 134 | """ 135 | if not args.hkn: 136 | return 137 | verbose_print("Pulling from HKN.") 138 | site = "https://hkn.eecs.berkeley.edu/exams/course/cs/{0}".format( 139 | class_number) 140 | resp = urllib.request.urlopen(site) 141 | soup = BeautifulSoup(resp, "html.parser", 142 | from_encoding=resp.info().get_param('charset')) 143 | 144 | for row in soup.find_all("tr"): 145 | if row.find("th"): 146 | continue 147 | 148 | data = row.find_all("td") 149 | semester = data[0].text.strip() 150 | for index, datum in enumerate(data): 151 | if index >= 2: 152 | exam_type = index_to_exam_type[index] 153 | for link in datum.find_all("a"): 154 | index = 0 if link.text == '[pdf]' else 1 155 | super_dict[semester][exam_type][index] = link.get('href') 156 | 157 | download_files("HKN", class_number, super_dict) 158 | 159 | 160 | def main(class_numbers): 161 | """ 162 | Runs the main download operation. Looks through HKN's and TBP's websites 163 | for exams for each class specified and downloads the exam/solution files. 164 | The -k/-t flags may limit searches to one of the two sites. 165 | """ 166 | for class_number in class_numbers: 167 | folder = "CS {0}".format(class_number) 168 | verbose_print("Starting searches for {0}.".format(folder)) 169 | try: 170 | os.makedirs(folder, exist_ok=True) 171 | pull_from_TBP( 172 | class_number, 173 | defaultdict(lambda: defaultdict(lambda: [None, None]))) 174 | pull_from_HKN( 175 | class_number, 176 | defaultdict(lambda: defaultdict(lambda: [None, None]))) 177 | except Exception as e: 178 | print("An exception has occurred.") 179 | with open('error.log', 'a') as error_log: 180 | error_log.write(str(e)) 181 | error_log.write('\n') 182 | if not os.listdir(folder): 183 | os.rmdir(folder) 184 | 185 | """ 186 | Parses arguments, then calls the main function 187 | to start the program. Exits upon completion. 188 | """ 189 | if __name__ == "__main__": 190 | args = parser.parse_args() 191 | 192 | """A print function that is active only when the -v flag is used.""" 193 | verbose_print = print if args.verbose else lambda *a, **k: None 194 | 195 | def should_try_download(exam, solution): 196 | """ 197 | Considers whether or not a particular exam-solution pair warrants a 198 | download attempt. Usually, the exam-solution pair must be complete 199 | (can be overriden with the -u flag). 200 | """ 201 | return ((exam and solution) or args.unpaired) 202 | 203 | def is_valid_download(content_type, exam_link): 204 | """ 205 | Checks that it is valid to download a certain exam. Non-existent exam 206 | links will always fail. The -e/-s flags may limit downloads to only 207 | exams or only solutions. 208 | """ 209 | if not exam_link: 210 | return False 211 | if args.exams: 212 | return content_type == args.exams 213 | if args.solutions: 214 | return content_type == args.solutions 215 | return True 216 | 217 | if (not args.hkn) and (not args.tbp): 218 | args.hkn = args.tbp = True 219 | 220 | main(args.classes) 221 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright [yyyy] [name of copyright owner] 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | --------------------------------------------------------------------------------