├── sacsid_cookie.txt ├── LICENSE ├── .gitignore ├── README.txt ├── gcip.py ├── client.py └── gciq.py /sacsid_cookie.txt: -------------------------------------------------------------------------------- 1 | ~AJKiYcFQxUaNlNL_l-r7f4kcpVKodlGNVJeyRKzMZZ44NuWn3_ldd5DyKl1XE0DfyFISeolb_AbJ-B-JiWTvxKjIrVB1HE0CiQQRfgQSXdw5r9Im-DWydYjU1NT-Zfg3IwqghedUX_oC32rtHPbaSzMC-SfUkzsfxppDrwGjvFNaHkluCTuhuZL_CL3IQVP7GV4sg14jLG6vLhlEG_9jQcrViNRW6hoQj_zPfvF-gZ3HaDoOtx69zNrLvGjjj0EWeGFSlFiZ9UiEZsDNscodEGnSKnWjJ6H4lztKEz2UaiEJ8iQi-T7EccwcREzb8txbhfHjK0IfCpaNLihdGwJPP_lyECESgQdJxQ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Free Public License 1.0.0 (0BSD) 2 | 3 | Permission to use, copy, modify, and/or distribute this software for 4 | any purpose with or without fee is hereby granted. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 7 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 8 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 9 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 10 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 11 | PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 12 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 13 | PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | # Edit at https://www.gitignore.io/?templates=python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # Environments 97 | .env 98 | .venv 99 | env/ 100 | venv/ 101 | ENV/ 102 | env.bak/ 103 | venv.bak/ 104 | 105 | # Spyder project settings 106 | .spyderproject 107 | .spyproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # mypy 116 | .mypy_cache/ 117 | .dmypy.json 118 | dmypy.json 119 | 120 | # Pyre type checker 121 | .pyre/ 122 | 123 | ### Python Patch ### 124 | .venv/ 125 | 126 | # End of https://www.gitignore.io/api/python 127 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | GCI Quotient 2 | ============ 3 | 4 | This script downloads Google Code-In (GCI) data. This includes task 5 | descriptions and claimed task instance data, including uploaded files. 6 | 7 | Everything is stashed by default into a 'gci_data' subdirectory. 8 | Tasks are stored in gci_data/tasks while instance data is saved into 9 | per-instance subdirectories named according to timestamp and title. 10 | 11 | gci_data/ 12 | tasks/ - contains all tasks in json format 13 | instances/ - contains all task instances 14 | DATE-TASK_NAME_-_ORG/ - contains a single task instance 15 | instance.txt - contains a text summary of the task 16 | instance.json - contains raw instance data 17 | activity.json - contains all comments and status changes 18 | 19 | 20 | HOW TO RUN 21 | ---------- 22 | 23 | 0. Install the necessary prerequisites: 24 | python 2.7+ 25 | pip install requests[security] 26 | 27 | 1. Enter the value of your 'SACSID' cookie into 'sacsid_cookie.txt'. 28 | 29 | (Log into the GCI website and examine your cookies using your 30 | browser's inspection or web development features.) 31 | Instructions for Chrome : 32 | 1. Log into the GCI Website 33 | 2. Chrome Controls (Top-right) --> Settings --> Advanced --> Content Settings --> Cookies --> See all cookies... 34 | 3. Search for 'codein.withgoogle.com' --> SACSID --> Copy Content (~AJKiYcG...) to sacsid_cookie.txt 35 | 36 | 2. $ python gciq.py --apikey 37 | 38 | (Log into the GCI website and find your API key under User Profile) 39 | 40 | 41 | FLAGS 42 | ----- 43 | --apikey is used to input your apikey 44 | --datadir is used to specify a different output directory 45 | 46 | 47 | RUNNING TIPS 48 | ------------ 49 | 50 | If instance downloading is interrupted, GCI Quotient will pick up 51 | where it left off the next time it's run. If you don't want that, 52 | delete your datadir or specify a new datadir. 53 | 54 | Empty activity.json files (i.e., containing just []) can happen when 55 | your session cookie has expired or was renewed. Check your browser to 56 | get the new value. 57 | 58 | 59 | TODO 60 | ---- 61 | * stash instances by status 62 | * tally instances apriori 63 | * stash raw html 64 | * skip client.GetTask during instances 65 | * close streams we're done with 66 | 67 | Majority development by: 68 | Christopher Sean Morrison 69 | Jeff Sieu (original author) 70 | Robert Spier (Google API) 71 | 72 | Additional contributions by:: 73 | Scott Sunarto, Vishal Gupta, Robby O'Connor, Frederick "Freso" Olesen 74 | -------------------------------------------------------------------------------- /gcip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import json 7 | import time 8 | from fnmatch import fnmatch 9 | 10 | import requests 11 | 12 | import client as gciclient 13 | 14 | argparser = argparse.ArgumentParser(description='GCI Tasks') 15 | argparser.add_argument('--apikey', type=str, nargs='?', required=True, 16 | help='api key') 17 | argparser.add_argument('--url', type=str, nargs='?', 18 | default='https://codein.withgoogle.com', 19 | help='server url') 20 | argparser.add_argument('--datadir', type=str, nargs='?', 21 | default='gci_data', 22 | help='directory in which to store all downloaded data') 23 | FLAGS = argparser.parse_args() 24 | 25 | INSTANCE_THROTTLE = 1 26 | 27 | 28 | def read_task(task_dir, task_file): 29 | file_path = os.path.join(task_dir, task_file) 30 | # print("\t@ %s" % file_path) 31 | with open(file_path, 'r') as infile: 32 | return json.load(infile) 33 | 34 | 35 | def get_tasks(task_dir): 36 | all_tasks = [] 37 | print('...reading tasks...', end='') 38 | sys.stdout.flush() 39 | for file in os.listdir(task_dir): 40 | if fnmatch(file, '*.json'): 41 | all_tasks.extend([read_task(task_dir, file)]) 42 | print('done! (%lu tasks)' % len(all_tasks)) 43 | return all_tasks 44 | 45 | 46 | def make_tasks(datadir, client): 47 | taskdir = os.path.join(datadir, 'tasks') 48 | tasks = get_tasks(taskdir) 49 | 50 | print('...submitting tasks to GCI [%s]' % taskdir, end='') 51 | sys.stdout.flush() 52 | for t in tasks: 53 | submit_task(t, client) 54 | time.sleep(1) 55 | print('.', end='') 56 | sys.stdout.flush() 57 | print('done!') 58 | 59 | 60 | def submit_task(task, client): 61 | # GCI API will err out if "max_instances" is set to 0, even if it 62 | # is possible to set it to 0 in the interface. 63 | if not task.get('max_instances'): 64 | task['max_instances'] = 1 65 | try: 66 | return client.NewTask(task) 67 | except requests.exceptions.HTTPError as e: 68 | from pprint import pprint 69 | pprint(task) 70 | pprint(e.response.text) 71 | raise 72 | 73 | def main(): 74 | print("GCI Product: noun | gē-sē-ī prŏd-əkt") 75 | print(" \"the triviality of a specified characteristic or quality\"") 76 | 77 | client = gciclient.GCIAPIClient( 78 | auth_token=FLAGS.apikey, 79 | url_prefix=FLAGS.url) 80 | 81 | if not os.path.isdir(FLAGS.datadir): 82 | print('...data directory does not exist! [%s]' % FLAGS.datadir) 83 | exit(1) 84 | else: 85 | print('...reading GCI task data from [%s]' % FLAGS.datadir) 86 | 87 | make_tasks(FLAGS.datadir, client) 88 | 89 | if __name__ == '__main__': 90 | main() 91 | 92 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """The GCI API Client. A thin wrapper around the GCI API. 16 | 17 | Exmple usage: 18 | 19 | client = gciclient.GCIAPIClient( 20 | auth_token='xxxxxxxxxxxxxx', 21 | url_prefix='https://codein.withgoogle.com', 22 | debug=False) 23 | 24 | client.NewTask(myTaskDict) 25 | """ 26 | 27 | import json 28 | import logging 29 | try: 30 | import urlparse as up # Python 2.7 31 | except: 32 | import urllib.parse as up # Python 3 33 | import requests 34 | 35 | 36 | 37 | class GCIAPIClient(object): 38 | """GCIAPIClient provides a thin wrapper around the GCI Task API. 39 | 40 | A GCIAPIClient simplifies working with tasks by forming the HTTP requests on 41 | behalf of the caller. 42 | 43 | Attributes: 44 | url_prefix: A string prefix for the codin URL 45 | headers: A dictionary of HTTP headers 46 | """ 47 | 48 | def __init__(self, auth_token=None, 49 | url_prefix='https://codein.withgoogle.com/', 50 | debug=False): 51 | self.url_prefix = up.urljoin(url_prefix, 'api/program/current/') 52 | self.headers = { 53 | 'Authorization': 'Bearer %s' % auth_token, 54 | 'Content-Type': 'application/json', 55 | } 56 | 57 | if debug: 58 | logging.basicConfig() 59 | logging.getLogger().setLevel(logging.DEBUG) 60 | requests_log = logging.getLogger('requests.packages.urllib3') 61 | requests_log.setLevel(logging.DEBUG) 62 | requests_log.propagate = True 63 | 64 | def _Url(self, path): 65 | return up.urljoin(self.url_prefix, path) + '/' 66 | 67 | def ListTasks(self, page=1): 68 | """Fetches a list of tasks. 69 | 70 | Args: 71 | page: Which page of results to return. 72 | 73 | Returns: 74 | A JSON encoded list of tasks. 75 | 76 | Raises: 77 | HTTPError: a 4XX client error or 5XX server error response was returned. 78 | """ 79 | r = requests.get(self._Url('tasks'), headers=self.headers, 80 | params={'page': page}) 81 | r.raise_for_status() 82 | return r.json() 83 | 84 | def GetTask(self, task_id): 85 | """Fetches a single task. 86 | 87 | Args: 88 | task_id: An integer id for the task. 89 | 90 | Returns: 91 | A JSON encoded task. 92 | 93 | Raises: 94 | HTTPError: a 4XX client error or 5XX server error response was returned. 95 | """ 96 | r = requests.get(self._Url('tasks/%d' % task_id), headers=self.headers) 97 | r.raise_for_status() 98 | return r.json() 99 | 100 | def NewTask(self, task): 101 | """Creates a single new task. 102 | 103 | Args: 104 | task: A task object. 105 | 106 | Returns: 107 | A JSON encoded response. 108 | 109 | Raises: 110 | HTTPError: a 4XX client error or 5XX server error response was returned. 111 | """ 112 | r = requests.post(self._Url('tasks'), headers=self.headers, 113 | data=json.dumps(task)) 114 | r.raise_for_status() 115 | return r.json() 116 | 117 | def UpdateTask(self, task_id, task): 118 | """Modifies a single task. 119 | 120 | Args: 121 | task_id: An integer id for the task. 122 | task: A task object. 123 | 124 | Returns: 125 | A JSON encoded response. 126 | 127 | Raises: 128 | HTTPError: a 4XX client error or 5XX server error response was returned. 129 | """ 130 | r = ( 131 | requests.put( 132 | self._Url('tasks/%d' % task_id), data=json.dumps(task), 133 | headers=self.headers)) 134 | r.raise_for_status() 135 | return r.json() 136 | 137 | def DeleteTask(self, task_id): 138 | """Deletes a single task. 139 | 140 | Args: 141 | task_id: An integer id for the task. 142 | 143 | Returns: 144 | A JSON encoded response, if there is content in the response. 145 | Otherwise None. 146 | 147 | Raises: 148 | HTTPError: a 4XX client error or 5XX server error response was returned. 149 | """ 150 | r = ( 151 | requests.delete( 152 | self._Url('tasks/%d' % task_id), 153 | headers=self.headers)) 154 | r.raise_for_status() 155 | # DELETE returns nothing on success, don't try and parse it. 156 | if r.content: 157 | return r.json() 158 | return 159 | 160 | def ListTaskInstances(self, page=1): 161 | """Fetches a list of tasks. 162 | 163 | Args: 164 | page: Which page of results to return. 165 | 166 | Returns: 167 | A JSON encoded list of task instances. 168 | 169 | Raises: 170 | HTTPError: a 4XX client error or 5XX server error response was returned. 171 | """ 172 | r = requests.get(self._Url('instances'), headers=self.headers, 173 | params={'page': page}) 174 | r.raise_for_status() 175 | return r.json() 176 | 177 | def GetTaskInstance(self, task_instance_id): 178 | """Fetches a single task. 179 | 180 | Args: 181 | task_instance_id: An integer id for the task instance. 182 | 183 | Returns: 184 | A JSON encoded task instance. 185 | 186 | Raises: 187 | HTTPError: a 4XX client error or 5XX server error response was returned. 188 | """ 189 | r = requests.get(self._Url('instances/%d' % task_instance_id), 190 | headers=self.headers) 191 | r.raise_for_status() 192 | return r.json() 193 | -------------------------------------------------------------------------------- /gciq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import os 6 | import sys 7 | if sys.version_info[0] >= 3: 8 | unicode = str 9 | try: 10 | reload(sys) # Python 2.7 11 | sys.setdefaultencoding('utf8') 12 | except NameError: 13 | pass 14 | import errno 15 | import argparse 16 | import re 17 | import json 18 | import io 19 | import time 20 | try: 21 | import urlparse as up # Python 2.7 22 | except: 23 | import urllib.parse as up # Python 3 24 | import requests 25 | import client as gciclient 26 | 27 | argparser = argparse.ArgumentParser(description='GCI Task Instances') 28 | argparser.add_argument('--apikey', type=str, nargs='?', required=True, 29 | help='api key') 30 | argparser.add_argument('--instance', type=str, nargs='?', required=False, 31 | help='download a single task instance') 32 | argparser.add_argument('--url', type=str, nargs='?', 33 | default='https://codein.withgoogle.com', 34 | help='server url') 35 | argparser.add_argument('--datadir', type=str, nargs='?', 36 | default='gci_data', 37 | help='directory in which to store all downloaded data') 38 | FLAGS = argparser.parse_args() 39 | 40 | INSTANCE_HTML = 'index.html' 41 | INSTANCE_SUMMARY_FILENAME = 'instance.txt' 42 | INSTANCE_FILENAME = 'instance.json' 43 | INSTANCE_ACTIVITY_FILENAME = 'activity.json' 44 | INSTANCE_THROTTLE = 1 45 | 46 | 47 | def sterilize(directory_str): 48 | # scrub chars that are a PITA in directory names 49 | forbidden_chars = '/\\*?<>:|' 50 | for char in forbidden_chars: 51 | directory_str = directory_str.replace(char, '_') 52 | # collapse sequences to a single scrub 53 | directory_str = re.sub(r'_+', '_', directory_str) 54 | # substitute non-printable chars and non-ascii 55 | directory_str = re.sub(r'[^ -~]+', 'X', directory_str) 56 | # some NAS-encrypted filesystems limit filenames to 144 chars, so stop short 57 | return (directory_str[:140] + '...') if len(directory_str) > 140 else directory_str 58 | 59 | 60 | def convert_to_utf8(input): 61 | if isinstance(input, dict): 62 | return {convert_to_utf8(key): convert_to_utf8(value) for key, value in input.items()} 63 | elif isinstance(input, list): 64 | return [convert_to_utf8(element) for element in input] 65 | elif isinstance(input, unicode): 66 | return input 67 | else: 68 | return str(input) 69 | 70 | 71 | def get_task_file_name(task): 72 | return sterilize(convert_to_utf8(task['id']) + '-' + convert_to_utf8(task['name']).replace('"', '') + '.json') 73 | 74 | 75 | def write_task(taskdir, task): 76 | file_name = get_task_file_name(task) 77 | file_path = os.path.join(taskdir, file_name) 78 | # print("\t@ %s" % file_path) 79 | with open(file_path, 'w') as outfile: 80 | outfile.write(json.dumps(task, indent=4)) 81 | outfile.close() 82 | 83 | 84 | def get_instance_folder_name(instance): 85 | if instance['modified'] == 'None': 86 | instance['modified'] = '0000-00-00 00_00_00' 87 | task_name = convert_to_utf8(instance['task_definition_name']).replace('"', '') 88 | return sterilize(convert_to_utf8(instance['modified']) + '-' + convert_to_utf8(instance['id']) + '-' + task_name + "_-_" + convert_to_utf8(instance['organization_name'])) 89 | 90 | 91 | def get_prettified_info(instance): 92 | task_id = str(instance['id']) 93 | task_def_id = str(instance['task_definition_id']) 94 | task_name = instance['task_definition_name'] 95 | task_desc = instance['description'] 96 | task_status = instance['status'] 97 | tags = instance['tags'] 98 | max_instances = instance['max_instances'] 99 | 100 | org_name = instance['organization_name'] 101 | org_id = str(instance['organization_id']) 102 | 103 | student_id = str(instance['student_id']) 104 | student_name = instance['student_display_name'] 105 | 106 | mentors = instance['mentors'] 107 | is_beginner = instance['is_beginner'] 108 | categories = instance['categories'] 109 | time_given = instance['time_to_complete_in_days'] 110 | deadline = instance['deadline'] 111 | completion_date = instance['completion_date'] 112 | modified = instance['modified'] 113 | category_names = ['Coding', 'User Interface', 'Documentation & Training', 'Quality Assurance', 'Outreach & Research'] 114 | 115 | output = org_name + ' (Org ID ' + org_id + ')' + '\n' 116 | output += 'Task ID ' + task_id + ' | Task Definition ID ' + task_def_id + '\n' 117 | output += 'Max instances: ' + max_instances + '\n' 118 | output += '\n' 119 | output += 'Title: ' + task_name + '\n' 120 | output += 'Description: ' + task_desc + '\n' 121 | output += 'Tags: ' + (', '.join(list(tags))) + '\n' 122 | output += '\n' 123 | output += 'Categories: ' + (', '.join(category_names[int(c)-1] for c in categories)) + '\n' 124 | output += 'Is Beginner: ' + ('Yes' if is_beginner == 'True' else 'No') + '\n' 125 | output += 'Time given to complete: ' + time_given + ' days' + '\n' 126 | output += '\n' 127 | output += 'Mentors: ' + (', '.join(list(mentors))) + '\n' 128 | output += 'Student: ' + student_name + ' (ID ' + student_id + ')' + '\n' 129 | output += '\n' 130 | output += 'Status: ' + task_status + (' (' + deadline + ')' if task_status == 'COMPLETED' else '') + '\n' 131 | output += 'Last modified: ' + modified + '\n' 132 | return convert_to_utf8(output) 133 | 134 | 135 | def get_instance_activity(instance, cookies): 136 | page = requests.get('https://codein.withgoogle.com/api/program/current/taskupdate/?task_instance=' + str(instance['id']), cookies=cookies) 137 | info = json.loads(page.text.encode('utf-8')) 138 | if 'results' in info: 139 | return info['results'] 140 | print('...WARNING: unknown instance activity result, see ' + INSTANCE_ACTIVITY_FILENAME) 141 | return info 142 | 143 | 144 | def get_instance_html(instance, cookies): 145 | page = requests.get('https://codein.withgoogle.com/dashboard/task-instances/' + str(instance['id']) + '/', cookies=cookies) 146 | return page.text 147 | 148 | 149 | def list_instance_attachments(activity): 150 | attachments = [] 151 | for result in activity: 152 | for attachment in result['attachments']: 153 | url = attachment['url'] 154 | name = attachment['filename'] 155 | attachments += [{'url': ('https://codein.withgoogle.com' + url).encode('utf-8'), 'filename': name}] 156 | return attachments 157 | 158 | 159 | def write_instance(datadir, instance, cookies): 160 | folder_name = get_instance_folder_name(instance) 161 | folder_path = os.path.join(datadir, folder_name) 162 | try: 163 | os.mkdir(folder_path) 164 | except OSError as e: 165 | if e.errno != errno.EEXIST: 166 | raise 167 | 168 | print("\t@ %s" % folder_path) 169 | 170 | # write a summary text file 171 | summary_file = os.path.join(folder_path, INSTANCE_SUMMARY_FILENAME) 172 | info = get_prettified_info(instance) 173 | with open(summary_file, 'w') as outfile: 174 | outfile.write(info) 175 | outfile.close() 176 | 177 | # get the discussion and state changes 178 | activity = get_instance_activity(instance, cookies) 179 | activity_file = os.path.join(folder_path, INSTANCE_ACTIVITY_FILENAME) 180 | with open(activity_file, 'w') as outfile: 181 | outfile.write(json.dumps(activity, indent=4)) 182 | outfile.close() 183 | 184 | # download attachments 185 | attachments = list_instance_attachments(activity) 186 | if attachments: 187 | for attachment in attachments: 188 | url = attachment['url'].decode('utf-8') 189 | urlpath = up.urlparse(url).path 190 | base = os.path.basename(os.path.dirname(urlpath)) 191 | filename = (base + '_' + attachment['filename']).encode('utf-8') 192 | filename = filename.decode('utf-8') 193 | print('\tgetting ' + filename) 194 | attachment_path = os.path.join(folder_path, filename) 195 | file_contents = requests.get(url, cookies=cookies, stream=True) 196 | # Throw an error for bad status codes 197 | # file_contents.raise_for_status() 198 | if file_contents.status_code != 200: 199 | print('\tWARNING: %s failed' % url) 200 | 201 | with io.open(attachment_path, 'wb') as outfile: 202 | for block in file_contents.iter_content(1024): 203 | outfile.write(block) 204 | outfile.close() 205 | 206 | # # mark this instance done 207 | # html_file = os.path.join(folder_path, INSTANCE_HTML) 208 | # html = get_instance_html(instance, cookies) 209 | # with open(html_file, 'w') as outfile: 210 | # outfile.write(html) 211 | # outfile.close() 212 | 213 | # write the raw instance json 214 | instance_file = os.path.join(folder_path, INSTANCE_FILENAME) 215 | with open(instance_file, 'w') as outfile: 216 | outfile.write(json.dumps(instance, indent=4)) 217 | outfile.close() 218 | 219 | 220 | def get_tasks(datadir, client, cookies): 221 | all_tasks = [] 222 | next_page = 1 223 | print('...downloading tasks...', end='') 224 | sys.stdout.flush() 225 | while next_page > 0: 226 | print('.', end='') 227 | sys.stdout.flush() 228 | tasks = client.ListTasks(page=next_page) 229 | time.sleep(INSTANCE_THROTTLE) 230 | for t in tasks['results']: 231 | all_tasks.append(t) 232 | 233 | next_page = 0 234 | if tasks['next']: 235 | result = re.search(r'page=(\d+)', tasks['next']) 236 | if result: 237 | next_page = int(result.group(1)) 238 | print('done! (%lu tasks)' % len(all_tasks)) 239 | return all_tasks 240 | 241 | 242 | def save_tasks(datadir, client, cookies): 243 | tasks = get_tasks(datadir, client, cookies) 244 | taskdir = os.path.join(datadir, 'tasks') 245 | try: 246 | os.mkdir(taskdir) 247 | except OSError as e: 248 | if e.errno != errno.EEXIST: 249 | raise 250 | 251 | print('...saving GCI tasks to [%s]' % taskdir, end='') 252 | sys.stdout.flush() 253 | for t in tasks: 254 | write_task(taskdir, t) 255 | print('.', end='') 256 | sys.stdout.flush() 257 | print('done!') 258 | 259 | 260 | def save_instances(datadir, client, cookies): 261 | instdir = os.path.join(datadir, 'instances') 262 | try: 263 | os.mkdir(instdir) 264 | except OSError as e: 265 | if e.errno != errno.EEXIST: 266 | raise 267 | 268 | next_page = 1 269 | count = 0 270 | print('...saving GCI instances to [%s]' % instdir) 271 | while next_page > 0: 272 | instances = client.ListTaskInstances(page=next_page) 273 | time.sleep(INSTANCE_THROTTLE) 274 | for ti in instances['results']: 275 | print('#%05u: %s' % (count, convert_to_utf8(ti['task_definition_name']))) 276 | 277 | # skip instances we already downloaded 278 | last_file = get_instance_folder_name(ti) 279 | last_file = os.path.join(instdir, last_file) 280 | last_file = os.path.join(last_file, INSTANCE_FILENAME) 281 | if os.path.isfile(last_file) and os.path.getsize(last_file) > 0: 282 | print('...skipped, already done') 283 | count += 1 284 | continue 285 | 286 | task_id = ti['task_definition_id'] 287 | task_definition = convert_to_utf8(client.GetTask(task_id)) 288 | # print(task_definition) 289 | time.sleep(INSTANCE_THROTTLE) 290 | useful_info = [ 291 | 'description', 292 | 'max_instances', 293 | 'tags', 294 | 'mentors', 295 | 'is_beginner', 296 | 'categories', 297 | 'time_to_complete_in_days' 298 | ] 299 | for key in useful_info: 300 | ti[key] = task_definition[key] 301 | 302 | write_instance(instdir, ti, cookies) 303 | count += 1 304 | next_page = 0 305 | if instances['next']: 306 | result = re.search(r'page=(\d+)', instances['next']) 307 | if result: 308 | next_page = int(result.group(1)) 309 | 310 | 311 | def main(): 312 | print("GCI Quotient: noun | gē-sē-ī kwō-shənt") 313 | print(" \"the magnitude of a specified characteristic or quality\"") 314 | 315 | client = gciclient.GCIAPIClient( 316 | auth_token=FLAGS.apikey, 317 | url_prefix=FLAGS.url) 318 | value = '' 319 | with open('sacsid_cookie.txt', 'r') as cookie_file: 320 | for line in cookie_file: 321 | value = line 322 | break 323 | cookie_file.close() 324 | 325 | value = value.replace('\n', '') 326 | cookies = { 327 | 'SACSID': value 328 | } 329 | 330 | try: 331 | os.mkdir(FLAGS.datadir) 332 | except OSError as e: 333 | if e.errno != errno.EEXIST: 334 | raise 335 | 336 | if os.path.isdir(FLAGS.datadir): 337 | print('...saving GCI data to [%s]' % FLAGS.datadir) 338 | 339 | save_tasks(FLAGS.datadir, client, cookies) 340 | save_instances(FLAGS.datadir, client, cookies) 341 | 342 | if __name__ == '__main__': 343 | main() 344 | 345 | --------------------------------------------------------------------------------