├── tests
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── utils_tests.py
    └── models
    │   ├── __init__.py
    │   ├── jobs_tests.py
    │   └── export_request_tests.py
├── courseraresearchexports
    ├── __init__.py
    ├── db
    │   ├── __init__.py
    │   └── db.py
    ├── exports
    │   ├── __init__.py
    │   ├── api.py
    │   └── utils.py
    ├── containers
    │   ├── __init__.py
    │   ├── utils.py
    │   └── client.py
    ├── constants
    │   ├── __init__.py
    │   ├── container_constants.py
    │   ├── db_constants.py
    │   └── api_constants.py
    ├── models
    │   ├── __init__.py
    │   ├── ContainerInfo.py
    │   ├── ClickstreamDownloadLinksRequest.py
    │   ├── utils.py
    │   ├── ExportDb.py
    │   ├── ExportRequestWithMetadata.py
    │   └── ExportRequest.py
    ├── commands
    │   ├── __init__.py
    │   ├── version.py
    │   ├── utils.py
    │   ├── db.py
    │   ├── containers.py
    │   └── jobs.py
    ├── sql
    │   ├── demographic_survey.sql
    │   └── enrollments.sql
    └── main.py
├── MANIFEST.in
├── test_requirements.txt
├── tox.ini
├── .travis.yml
├── .gitignore
├── setup.py
├── LICENSE
└── README.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/courseraresearchexports/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include courseraresearchexports/sql *
2 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | mock==1.0.1
2 | nose==1.3.7
3 | pep8==1.6.2
4 | testfixtures==4.1.2
5 | 


--------------------------------------------------------------------------------
/courseraresearchexports/db/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "db"
3 | ]
4 | 
5 | from . import *  # noqa
6 | 


--------------------------------------------------------------------------------
/courseraresearchexports/exports/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "api",
3 |     "utils"
4 | ]
5 | 
6 | from . import *  # noqa
7 | 


--------------------------------------------------------------------------------
/courseraresearchexports/containers/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "client",
3 |     "utils"
4 | ]
5 | 
6 | from . import *  # noqa
7 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27
 3 | [testenv]
 4 | deps = 
 5 |     nose
 6 |     mock
 7 |     testfixtures
 8 | commands =
 9 |     nosetests
10 | 


--------------------------------------------------------------------------------
/courseraresearchexports/constants/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "api_constants",
3 |     "db_constants",
4 |     "container_constants"
5 | ]
6 | 
7 | from . import *  # noqa
8 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "ExportRequestWithMetadata",
 3 |     "ExportRequest",
 4 |     "ClickstreamDownloadLinksRequest",
 5 |     "ContainerInfo",
 6 |     "ExportDb",
 7 |     "utils"
 8 | ]
 9 | 
10 | from . import *  # noqa
11 | 


--------------------------------------------------------------------------------
/courseraresearchexports/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | "Commands and their implementations for Coursera's research export tools."
 2 | 
 3 | __all__ = [
 4 |     "version",
 5 |     "jobs",
 6 |     "containers",
 7 |     "db",
 8 |     "utils"
 9 | ]
10 | 
11 | from . import *  # noqa
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 | 
 5 | # command to install dependencies
 6 | install:
 7 |   - "pip install ."
 8 |   - "pip install -r test_requirements.txt"
 9 | 
10 | # command to run tests & check style
11 | script:
12 |  - nosetests
13 |  - pep8 courseraresearchexports tests
14 | 


--------------------------------------------------------------------------------
/courseraresearchexports/constants/container_constants.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | COURSERA_DOCKER_LABEL = 'courseraResearchExport'
4 | COURSERA_LOCAL_FOLDER = os.path.expanduser('~/.coursera/exports/')
5 | POSTGRES_DOCKER_IMAGE = 'postgres:9.5'
6 | POSTGRES_INIT_MSG = 'PostgreSQL init process complete; ready for start up.'
7 | POSTGRES_READY_MSG = 'database system is ready to accept connections'
8 | 


--------------------------------------------------------------------------------
/courseraresearchexports/constants/db_constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | HASHED_USER_ID_COLUMN_TO_SOURCE_TABLE = {
16 |     '[partner_user_id]': 'users',
17 |     '[demographics_user_id]': 'demographics_answers',
18 |     '[feedback_user_id]': 'feedback_course_ratings',
19 |     '[assessments_user_id]': 'assessment_actions',
20 |     '[peer_assignments_user_id]': 'peer_submissions',
21 |     '[discussions_user_id]': 'discussion_answers',
22 |     '[programming_assignments_user_id]': 'programming_submissions',
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/utils/utils_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2016 Coursera
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from courseraresearchexports.models import utils
18 | from mock import Mock
19 | from mock import patch
20 | import requests
21 | 
22 | fake_partner_short_name = 'fake_partner_short_name'
23 | fake_partner_id = 1
24 | fake_partner_response = {'elements': [{"id": str(fake_partner_id)}]}
25 | 
26 | 
27 | @patch.object(requests, 'get')
28 | def test_partner_id_lookup(mockget):
29 |     mock_partners_get_response = Mock()
30 |     mock_partners_get_response.json.return_value = fake_partner_response
31 |     mockget.return_value = mock_partners_get_response
32 |     inferred_partner_id = utils.lookup_partner_id_by_short_name(
33 |         fake_partner_short_name)
34 | 
35 |     assert inferred_partner_id == fake_partner_id
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | # IDEA project settings
92 | .idea
93 | 
94 | # Ignore DS_STORE
95 | .DS_Store
96 | 


--------------------------------------------------------------------------------
/courseraresearchexports/constants/api_constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | RESEARCH_EXPORTS_APP = 'manage_research_exports'
17 | RESEARCH_EXPORTS_API = 'https://www.coursera.org/api/onDemandExports.v2/'
18 | COURSE_API = 'https://www.coursera.org/api/onDemandCourses.v1/'
19 | PARTNER_API = 'https://www.coursera.org/api/partners.v1/'
20 | CLICKSTREAM_API = 'https://www.coursera.org/api/clickstreamExportsDownload.v1/'
21 | ANONYMITY_LEVEL_COORDINATOR = 'HASHED_IDS_NO_PII'
22 | ANONYMITY_LEVEL_ISOLATED = 'HASHED_IDS_WITH_ISOLATED_UGC_NO_PII'
23 | ANONYMITY_LEVELS = [ANONYMITY_LEVEL_COORDINATOR, ANONYMITY_LEVEL_ISOLATED]
24 | EXPORT_TYPE_TABLES = 'RESEARCH_WITH_SCHEMAS'
25 | EXPORT_TYPE_CLICKSTREAM = 'RESEARCH_EVENTING'
26 | EXPORT_TYPE_GRADEBOOK = 'GRADEBOOK'
27 | EXPORT_TYPES = [EXPORT_TYPE_TABLES, EXPORT_TYPE_CLICKSTREAM,
28 |                 EXPORT_TYPE_GRADEBOOK]
29 | SCHEMA_NAMES = [
30 |     'demographics',
31 |     'users',
32 |     'course_membership',
33 |     'course_progress',
34 |     'feedback',
35 |     'assessments',
36 |     'course_grades',
37 |     'peer_assignments',
38 |     'staff_graded_assignments',
39 |     'discussions',
40 |     'programming_assignments',
41 |     'course_content',
42 |     'ecb',
43 |     'notebooks',
44 |     'transactions']
45 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | def readme():
 5 |     with open('README.rst') as f:
 6 |         return f.read()
 7 | 
 8 | setup(
 9 |     name='courseraresearchexports',
10 |     version='0.0.29',
11 |     description='Command line tool for convenient access to '
12 |     'Coursera Research Data Exports.',
13 |     long_description=readme(),
14 |     long_description_content_type='text/markdown',
15 |     classifiers=[
16 |         'Development Status :: 5 - Production/Stable',
17 |         'License :: OSI Approved :: Apache Software License',
18 |         'Programming Language :: Python :: 2.7',
19 |     ],
20 |     keywords='coursera',
21 |     url='https://github.com/coursera/courseraresearchexports',
22 |     author='Chris Liu',
23 |     author_email='cliu@coursera.org',
24 |     license='Apache',
25 |     entry_points={
26 |         'console_scripts': [
27 |             'courseraresearchexports = courseraresearchexports.main:main',
28 |         ],
29 |     },
30 |     packages=['courseraresearchexports',
31 |               'courseraresearchexports.commands',
32 |               'courseraresearchexports.constants',
33 |               'courseraresearchexports.exports',
34 |               'courseraresearchexports.containers',
35 |               'courseraresearchexports.models',
36 |               'courseraresearchexports.db'],
37 |     install_requires=[
38 |         'argcomplete>=1.4.1',
39 |         'courseraoauth2client>=0.0.1',
40 |         'requests>=2.7.0,<2.11',
41 |         'docker-py>=1.2.3',
42 |         'tqdm>=4.8.4',
43 |         'tabulate>=0.7.5',
44 |         'python-dateutil>=2.5.3',
45 |         'SQLAlchemy>=1.0.15',
46 |         'psycopg2>=2.6.2'
47 |     ],
48 |     test_suite='nose.collector',
49 |     tests_require=['nose', 'nose-cover3'],
50 |     # IMPORTANT: This makes MANIFEST.in work. DO NOT USE `package_data`, as
51 |     # it does not work with sdist correctly.
52 |     # See http://flask.pocoo.org/docs/0.11/patterns/distribute/ for details
53 |     include_package_data=True,
54 |     zip_safe=False
55 | )
56 | 


--------------------------------------------------------------------------------
/courseraresearchexports/commands/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | Coursera's command line SDK for interacting with research data exports.
17 | 
18 | You may install it from source, or via pip.
19 | """
20 | 
21 | import sys
22 | import logging
23 | 
24 | 
25 | def command_version(args):
26 |     """Implements the version subcommand"""
27 | 
28 |     # See http://stackoverflow.com/questions/17583443
29 |     from pkg_resources import get_distribution, DistributionNotFound
30 |     import os.path
31 | 
32 |     try:
33 |         _dist = get_distribution('courseraresearchexports')
34 |         # Normalize case for Windows systems
35 |         dist_loc = os.path.normcase(_dist.location)
36 |         here = os.path.normcase(__file__)
37 |         if not here.startswith(
38 |             os.path.join(
39 |                 dist_loc,
40 |                 'courseraresearchexports')):
41 |             # not installed, but there is another version that *is*
42 |             raise DistributionNotFound
43 |     except DistributionNotFound:
44 |         __version__ = 'Please install this project with setup.py'
45 |     else:
46 |         __version__ = _dist.version
47 | 
48 |     if args.quiet and args.quiet > 0:
49 |         logging.info(__version__)
50 |     else:
51 |         logging.info("Your {prog}'s version is:\n\t{version}"
52 |                      .format(prog=sys.argv[0], version=__version__))
53 | 
54 | 
55 | def parser(subparsers):
56 |     """Build an argparse argument parser to parse the command line."""
57 | 
58 |     # create the parser for the version subcommand.
59 |     parser_version = subparsers.add_parser(
60 |         'version',
61 |         help="Output the version of %(prog)s to the console.")
62 |     parser_version.set_defaults(func=command_version)
63 | 
64 |     return parser_version
65 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/ContainerInfo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import dateutil.parser
16 | 
17 | 
18 | class ContainerInfo:
19 |     """
20 |     Represents the relevant information about a docker container used to store
21 |     a database of Coursera Export data.
22 |     """
23 | 
24 |     def __init__(self, name=None, id=None, host_port=None, host_ip=None,
25 |                  creation_time=None, database_name=None, status=None):
26 |         self.name = name
27 |         self.id = id
28 |         self.short_id = id[:12] if id else None
29 |         self.host_port = host_port
30 |         self.host_ip = host_ip
31 |         self.creation_time = creation_time
32 |         self.status = status
33 |         self.database_name = database_name
34 | 
35 |     @classmethod
36 |     def from_container(cls, container_name, docker_client):
37 |         """
38 |         Create ContainerInfo using the response from docker-py Client's
39 |         `inspect-container` method.
40 |         :param container_dict:
41 |         :return container_info: ContainerInfo
42 |         """
43 |         container_dict = docker_client.inspect_container(container_name)
44 |         host_config = container_dict['HostConfig']['PortBindings']
45 |         network_settings = container_dict['NetworkSettings']['Ports']
46 | 
47 |         assigned_port = int(host_config['5432/tcp'][0]['HostPort'])
48 |         ip_if_running = network_settings and network_settings[
49 |             '5432/tcp'][0]['HostIp']
50 | 
51 |         return cls(
52 |             name=container_dict['Name'][1:],  # remove prepended '\'
53 |             id=container_dict['Id'],
54 |             creation_time=dateutil.parser.parse(container_dict['Created']),
55 |             database_name=container_dict['Config']['Labels']['database_name'],
56 |             status=container_dict['State']['Status'],
57 |             host_port=assigned_port,
58 |             host_ip=ip_if_running)
59 | 


--------------------------------------------------------------------------------
/tests/models/jobs_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2016 Coursera
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from courseraresearchexports.commands import jobs
18 | from courseraresearchexports.models.ExportRequest import ExportRequest
19 | from courseraresearchexports.models.ExportRequestWithMetadata import \
20 |     ExportRequestWithMetadata
21 | from mock import MagicMock
22 | from mock import patch
23 | import argparse
24 | 
25 | 
26 | fake_course_id = 'fake_course_id'
27 | fake_course_slug = 'fake_course_slug'
28 | 
29 | 
30 | @patch('courseraresearchexports.commands.jobs.api.get_all')
31 | def test_get_all(api_get_all):
32 |     api_get_all.return_value = []
33 | 
34 |     jobs.get_all(argparse.Namespace())
35 | 
36 |     api_get_all.assert_any_call()
37 | 
38 | 
39 | @patch('courseraresearchexports.models.utils.lookup_course_slug_by_id')
40 | @patch('courseraresearchexports.commands.jobs.api.get')
41 | def test_get(api_get, lookup_course_slug_by_id):
42 |     lookup_course_slug_by_id.return_value = fake_course_slug
43 |     api_get.return_value = [
44 |         ExportRequestWithMetadata(course_id=fake_course_id)
45 |     ]
46 |     args = argparse.Namespace()
47 |     args.id = fake_course_id
48 | 
49 |     jobs.get(args)
50 | 
51 |     api_get.assert_called_with(fake_course_id)
52 | 
53 | 
54 | @patch('courseraresearchexports.commands.jobs.api.post')
55 | def test_request(api_post):
56 |     api_post.return_value = [
57 |         ExportRequestWithMetadata(course_id=fake_course_id)
58 |     ]
59 |     args = argparse.Namespace()
60 |     args.course_id = fake_course_id
61 |     args.course_slug = None
62 |     args.partner_id = None
63 |     args.partner_short_name = None
64 |     args.group_id = None
65 |     args.export_type = None
66 |     args.user_id_hashing = None
67 |     args.purpose = None
68 |     args.schemas = None
69 | 
70 |     jobs.request_tables(args)
71 | 
72 |     export_request, = api_post.call_args[0]
73 |     assert export_request.course_id == fake_course_id
74 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/ClickstreamDownloadLinksRequest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | 
17 | from courseraresearchexports.models import utils
18 | 
19 | 
20 | class ClickstreamDownloadLinksRequest:
21 |     """
22 |     Represents a request for clickstream download links.
23 |     """
24 | 
25 |     def __init__(self, course_id=None, partner_id=None, interval=None,
26 |                  **kwargs):
27 |         self.course_id = course_id
28 |         self.partner_id = partner_id
29 |         self.interval = interval
30 | 
31 |     @staticmethod
32 |     def from_args(**kwargs):
33 |         """
34 |         Create a ClickstreamDownloadLinkRequest from arguments. Performs
35 |         course_id/partner_id inference.
36 |         :param kwargs:
37 |         :return eventing_links_request: ClickstreamDownloadLinksRequest
38 |         """
39 |         if kwargs.get('course_slug') and not kwargs.get('course_id'):
40 |             kwargs['course_id'] = utils.lookup_course_id_by_slug(
41 |                 kwargs['course_slug'])
42 |         elif kwargs.get('partner_short_name') and not kwargs.get('partner_id'):
43 |             kwargs['partner_id'] = \
44 |                 utils.lookup_partner_id_by_short_name(
45 |                     kwargs['partner_short_name'])
46 |         elif kwargs.get('group_id'):
47 |             logging.error(
48 |                 'Eventing exports by group is not currently supported. '
49 |                 'Please see: '
50 |                 'https://partner.coursera.help/hc/articles/360021121132'
51 |                 )
52 |             raise ValueError('Eventing exports by group is not supported.')
53 | 
54 |         return ClickstreamDownloadLinksRequest(**kwargs)
55 | 
56 |     @property
57 |     def scope(self):
58 |         """
59 |         API specific format for request scope context.
60 |         :return scope:
61 |         """
62 |         if self.course_id:
63 |             return 'courseContext~{}'.format(self.course_id)
64 |         elif self.partner_id:
65 |             return 'partnerContext~{}'.format(self.partner_id)
66 | 
67 |     def to_url_params(self):
68 |         """
69 |         API specific parameters for POST request.
70 |         :return:
71 |         """
72 |         url_params = {'action': 'generateLinks', 'scope': self.scope}
73 |         if self.interval:
74 |             url_params['startDate'] = self.interval[0]
75 |             url_params['endDate'] = self.interval[1]
76 | 
77 |         return url_params
78 | 


--------------------------------------------------------------------------------
/courseraresearchexports/sql/demographic_survey.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | demographic_survey
 3 | This query partially denormalizes the demographics tables to create aggregate
 4 | information about the users in the present data export.
 5 | 
 6 | Columns
 7 | coursera_user_id
 8 | demographic_survey_submission_dt
 9 | demographic_survey_gender
10 | demographic_survey_age
11 | demographic_survey_country_cd_of_birth
12 | demographic_survey_us_postal_code
13 | demographic_survey_spanish_hispanic_or_latino_descent
14 | demographic_survey_race
15 | demographic_survey_highest_level_of_schooling
16 | demographic_survey_currently_enrolled_in_an_educational_program
17 | demographic_survey_level_of_current_educational_program
18 | demographic_survey_subject_area_of_degree
19 | demographic_survey_current_employment_status
20 | demographic_survey_area_of_industry_currently_employed_in
21 | demographic_survey_english_proficiency
22 | demographic_survey_other_languages_spoken
23 | */
24 | 
25 | SELECT
26 |     a.[demographics_user_id]
27 |     ,MAX(a.submission_ts::DATE) AS demographic_survey_submission_dt
28 |     ,MAX(CASE WHEN a.question_id = 11
29 |         THEN c.choice_desc END) AS demographic_survey_gender
30 |     ,MAX(CASE WHEN a.question_id = 12
31 |         THEN DATE_PART('y', CURRENT_DATE) - a.answer_int END) AS demographic_survey_age
32 |     ,UPPER(LEFT(MAX(CASE WHEN a.question_id = 13
33 |         THEN c.choice_desc END), 2)) AS demographic_survey_country_cd_of_birth
34 |     ,MAX(CASE WHEN a.question_id = 15
35 |         THEN a.answer_int END) AS demographic_survey_us_postal_code
36 |     ,MAX(CASE WHEN a.question_id = 16
37 |         THEN c.choice_desc END) AS demographic_survey_spanish_hispanic_or_latino_descent
38 |     ,RTRIM(STRING_AGG(CASE WHEN a.question_id = 17 THEN c.choice_desc END, ';')) AS demographic_survey_race
39 |     ,MAX(CASE WHEN a.question_id = 18
40 |         THEN c.choice_desc END) AS demographic_survey_highest_level_of_schooling
41 |     ,MAX(CASE WHEN a.question_id = 19
42 |         THEN c.choice_desc END) AS demographic_survey_currently_enrolled_in_an_educational_program
43 |     ,MAX(CASE WHEN a.question_id = 20
44 |         THEN c.choice_desc END) AS demographic_survey_level_of_current_educational_program
45 |     ,RTRIM(STRING_AGG(CASE WHEN a.question_id = 21
46 |         THEN c.choice_desc END, ';')) AS demographic_survey_subject_area_of_degree
47 |     ,MAX(CASE WHEN a.question_id = 22
48 |         THEN c.choice_desc END) AS demographic_survey_current_employment_status
49 |     ,MAX(CASE WHEN a.question_id = 23
50 |         THEN c.choice_desc END) AS demographic_survey_area_of_industry_currently_employed_in
51 |     ,MAX(CASE WHEN a.question_id = 24
52 |         THEN c.choice_desc END) AS demographic_survey_english_proficiency
53 |     ,RTRIM(STRING_AGG(CASE WHEN a.question_id = 25
54 |         THEN c.choice_desc END, ';')) AS demographic_survey_other_languages_spoken
55 | FROM demographics_answers a
56 | JOIN demographics_choices c USING (question_id, choice_id)
57 | WHERE a.question_id BETWEEN 11 AND 25
58 |     AND a.question_id = c.question_id
59 |     AND a.choice_id = c.choice_id
60 | GROUP BY 1
61 | 


--------------------------------------------------------------------------------
/courseraresearchexports/commands/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import sys
17 | 
18 | import requests
19 | 
20 | 
21 | def add_logging_parser(main_parser):
22 |     """Build an argparse argument parser to parse the command line."""
23 | 
24 |     main_parser.set_defaults(setup_logging=set_logging_level)
25 | 
26 |     verbosity_group = main_parser.add_mutually_exclusive_group(required=False)
27 |     verbosity_group.add_argument(
28 |         '--verbose',
29 |         '-v',
30 |         action='count',
31 |         help='Output more verbose logging. Can be specified multiple times.')
32 |     verbosity_group.add_argument(
33 |         '--quiet',
34 |         '-q',
35 |         action='count',
36 |         help='Output less information to the console during operation. Can be '
37 |         'specified multiple times.')
38 | 
39 |     main_parser.add_argument(
40 |         '--silence-urllib3',
41 |         action='store_true',
42 |         help='Silence urllib3 warnings. See '
43 |         'https://urllib3.readthedocs.org/en/latest/security.html for details.')
44 | 
45 |     return verbosity_group
46 | 
47 | 
48 | def set_logging_level(args):
49 |     """Computes and sets the logging level from the parsed arguments."""
50 |     logging.basicConfig()
51 |     root_logger = logging.getLogger()
52 |     level = logging.INFO
53 |     logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
54 |     logging.getLogger('requests.packages.urllib3').setLevel(logging.WARNING)
55 |     if "verbose" in args and args.verbose is not None:
56 |         logging.getLogger('requests.packages.urllib3').setLevel(0)  # Unset
57 |         if args.verbose > 1:
58 |             level = 5  # "Trace" level
59 |         elif args.verbose > 0:
60 |             level = logging.DEBUG
61 |         else:
62 |             logging.critical("verbose is an unexpected value. {} exiting."
63 |                              .format(args.verbose))
64 |             sys.exit(2)
65 |         logging.getLogger('sqlalchemy.engine').setLevel(level)
66 |     elif "quiet" in args and args.quiet is not None:
67 |         if args.quiet > 1:
68 |             level = logging.ERROR
69 |         elif args.quiet > 0:
70 |             level = logging.WARNING
71 |         else:
72 |             logging.critical("quiet is an unexpected value. {} exiting."
73 |                              .format(args.quiet))
74 |     if level is not None:
75 |         root_logger.setLevel(level)
76 | 
77 |     if args.silence_urllib3:
78 |         # See: https://urllib3.readthedocs.org/en/latest/security.html
79 |         requests.packages.urllib3.disable_warnings()
80 | 


--------------------------------------------------------------------------------
/courseraresearchexports/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # PYTHON_ARGCOMPLETE_OK
 4 | 
 5 | # Copyright 2016 Coursera
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | """
20 | Coursera's tools for interacting with research data exports.
21 | 
22 | You may install it from source, or via pip.
23 | """
24 | 
25 | import argcomplete
26 | import argparse
27 | import logging
28 | import sys
29 | 
30 | from courseraresearchexports import commands
31 | from courseraresearchexports.commands import utils
32 | 
33 | 
34 | def build_parser():
35 |     """
36 |     Build an argparse argument parser to parse the command line.
37 |     """
38 | 
39 |     parser = argparse.ArgumentParser(
40 |         description="""Coursera tools for interacting with research exports.
41 |         There are a number of subcommands, each with their own help
42 |         documentation. Feel free to view them by executing `%(prog)s
43 |         SUB_COMMAND -h`. For example: `%(prog)s jobs -h`.""",
44 |         epilog="""Please file bugs on github at:
45 |         https://github.com/coursera/courseraresearchexports/issues. If you
46 |         would like to contribute to this tool's development, check us out at:
47 |         https://github.com/coursera/courseraresarchexports""")
48 | 
49 |     utils.add_logging_parser(parser)
50 | 
51 |     # We have a number of subcommands. These subcommands have their own
52 |     # subparsers. Each subcommand should set a default value for the 'func'
53 |     # option. We then call the parsed 'func' function, and execution carries on
54 |     # from there.
55 |     subparsers = parser.add_subparsers()
56 | 
57 |     # create the parser for the version subcommand.
58 |     commands.version.parser(subparsers)
59 | 
60 |     # create the parser for the jobs subcommand.
61 |     commands.jobs.parser(subparsers)
62 | 
63 |     # create the parser for the containers subcommand.
64 |     commands.containers.parser(subparsers)
65 | 
66 |     # create the parser for the db subcommand.
67 |     commands.db.parser(subparsers)
68 | 
69 |     return parser
70 | 
71 | 
72 | def main():
73 |     """
74 |     Boots up the command line tool
75 |     """
76 |     logging.captureWarnings(True)
77 |     parser = build_parser()
78 | 
79 |     argcomplete.autocomplete(parser)
80 | 
81 |     args = parser.parse_args()
82 |     # Configure logging
83 |     args.setup_logging(args)
84 |     # Dispatch into the appropriate subcommand function.
85 |     try:
86 |         return args.func(args)
87 |     except SystemExit:
88 |         raise
89 |     except:
90 |         logging.exception('Problem when running command. Sorry!')
91 |         sys.exit(1)
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/courseraresearchexports/sql/enrollments.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | enrollments
  3 | An enrollment is a unique learner-course pair. Many tables log a learner's
  4 | interactions in a course, and this view will aggregate key metrics for simple
  5 | reporting purposes.
  6 | 
  7 | Columns
  8 | coursera_user_id
  9 | course_id
 10 | commenced_dt
 11 | is_enrollment_active
 12 | activity_first_dt
 13 | activity_last_dt    
 14 | num_days_active
 15 | is_enrollment_completed
 16 | completion_dt
 17 | was_paid_or_finaid
 18 | */
 19 | 
 20 | /*
 21 | Any user that reaches the LEARNER membership role in a course is considered a
 22 | commenced enrolllment. This excludes those users that pre-enroll in the course,
 23 | and then later unenrolls before the course starts.
 24 | */
 25 | WITH enrollment_commenced AS (
 26 |     SELECT
 27 |         cm.[partner_user_id]
 28 |         ,course_id
 29 |         ,MIN(course_membership_ts)::DATE AS commenced_dt
 30 |     FROM course_memberships AS cm
 31 |     WHERE
 32 |         course_membership_role = 'LEARNER'
 33 |     GROUP BY 1,2    
 34 | )    
 35 | 
 36 | /*
 37 | Learners' progress on course items (e.g. lectures, quizzes, etc.) are
 38 | summarized in the course_progress table. Generate their "activity" metrics with
 39 | aggregate functions.
 40 | */
 41 | ,enrollment_progress AS (
 42 |     SELECT
 43 |         cp.[partner_user_id]
 44 |         ,course_id
 45 |         ,MIN(course_progress_ts)::DATE AS activity_first_dt
 46 |         ,MAX(course_progress_ts)::DATE AS activity_last_dt   
 47 |         ,COUNT(DISTINCT course_progress_ts::DATE) AS num_days_active
 48 |     FROM course_progress AS cp -- contains 'started' or 'completed' progress
 49 |     GROUP BY 1,2    
 50 | ) 
 51 | 
 52 | /*
 53 | Learners who complete the course are logged by reaching one of two passing
 54 | states in the the course_grades table. Generate when they first pass.
 55 | */
 56 | ,enrollment_completed AS (
 57 |     SELECT
 58 |         cg.[partner_user_id]
 59 |         ,course_id
 60 |         ,MIN(course_grade_ts)::DATE AS completion_dt
 61 |     FROM course_grades AS cg -- contains when the learner reached the highest grade
 62 |     WHERE
 63 |         course_passing_state_id IN (1,2) -- 'passed' or 'verified passed' states
 64 |     GROUP BY 1,2    
 65 | ) 
 66 | 
 67 | /*
 68 | Learners can own the course, either by payment or receiving financial aid.
 69 | */
 70 | ,enrollment_ownership AS (
 71 |     SELECT
 72 |         uccp.[partner_user_id]
 73 |         ,course_id
 74 |         ,was_payment OR was_finaid_grant AS was_paid_or_finaid
 75 |     FROM users_courses__certificate_payments AS uccp
 76 | )
 77 | 
 78 | /*
 79 | Combine all learner-course stats into one final table.
 80 | */
 81 | SELECT
 82 |     ec.[partner_user_id]
 83 |     ,course_id
 84 |     ,commenced_dt
 85 |     ,activity_first_dt IS NOT NULL AS is_enrollment_active
 86 |     ,activity_first_dt
 87 |     ,activity_last_dt    
 88 |     ,num_days_active
 89 |     ,completion_dt IS NOT NULL AS is_enrollment_completed
 90 |     ,completion_dt
 91 |     ,COALESCE(was_paid_or_finaid, FALSE) AS was_paid_or_finaid
 92 | FROM enrollment_commenced AS ec
 93 | LEFT JOIN enrollment_progress
 94 |     USING ([partner_user_id], course_id)
 95 | LEFT JOIN enrollment_completed
 96 |     USING ([partner_user_id], course_id)
 97 | LEFT JOIN enrollment_ownership
 98 |     USING ([partner_user_id], course_id)
 99 | 
100 | 


--------------------------------------------------------------------------------
/courseraresearchexports/exports/api.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Coursera
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | Coursera's wrapper for data exports API.
17 | """
18 | 
19 | import requests
20 | from courseraoauth2client import oauth2
21 | from courseraresearchexports.models.utils import requests_response_to_model
22 | from courseraresearchexports.constants.api_constants import \
23 |     RESEARCH_EXPORTS_APP, RESEARCH_EXPORTS_API, CLICKSTREAM_API
24 | from courseraresearchexports.models.ExportRequestWithMetadata import \
25 |     ExportRequestWithMetadata
26 | 
27 | 
28 | @requests_response_to_model(ExportRequestWithMetadata.from_response)
29 | def get(export_job_id):
30 |     """
31 |     Use Coursera's Research Export Resource to get a data export job given an
32 |     export job id.
33 |     :param export_job_id:
34 |     :return export_request_with_metadata: [ExportRequestWithMetaData]
35 |     """
36 |     auth = oauth2.build_oauth2(app=RESEARCH_EXPORTS_APP).build_authorizer()
37 |     response = requests.get(
38 |         url=requests.compat.urljoin(RESEARCH_EXPORTS_API, export_job_id),
39 |         auth=auth)
40 | 
41 |     return response
42 | 
43 | 
44 | @requests_response_to_model(ExportRequestWithMetadata.from_response)
45 | def get_all():
46 |     """
47 |     Uses Coursera's Research Exports Resource to get all data export job
48 |     requests created by a user. Limited to the 100 most recent requests.
49 |     :return export_requests: [ExportRequestWithMetaData]
50 |     """
51 |     auth = oauth2.build_oauth2(app=RESEARCH_EXPORTS_APP).build_authorizer()
52 |     response = requests.get(
53 |         url=RESEARCH_EXPORTS_API,
54 |         auth=auth,
55 |         params={'q': 'my'})
56 | 
57 |     return response
58 | 
59 | 
60 | @requests_response_to_model(ExportRequestWithMetadata.from_response)
61 | def post(export_request):
62 |     """
63 |     Creates a data export job using a formatted json request.
64 |     :param export_request:
65 |     :return export_request_with_metadata: [ExportRequestWithMetadata]
66 |     """
67 |     auth = oauth2.build_oauth2(app=RESEARCH_EXPORTS_APP).build_authorizer()
68 |     response = requests.post(
69 |         url=RESEARCH_EXPORTS_API,
70 |         json=export_request.to_json(),
71 |         auth=auth)
72 | 
73 |     return response
74 | 
75 | 
76 | @requests_response_to_model(lambda response: response.json())
77 | def get_clickstream_download_links(clickstream_download_links_request):
78 |     """
79 |     Return the download links for clickstream exports in a given scope.
80 |     :param clickstream_download_links_request: ClickstreamDownloadLinksRequest
81 |     """
82 |     auth = oauth2.build_oauth2(app=RESEARCH_EXPORTS_APP).build_authorizer()
83 |     response = requests.post(
84 |         url=CLICKSTREAM_API,
85 |         params=clickstream_download_links_request.to_url_params(),
86 |         auth=auth)
87 | 
88 |     return response
89 | 


--------------------------------------------------------------------------------
/courseraresearchexports/containers/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import argparse
 16 | from io import BytesIO
 17 | import logging
 18 | import os
 19 | import tarfile
 20 | import time
 21 | import zipfile
 22 | 
 23 | from docker import Client
 24 | 
 25 | 
 26 | def extract_zip_archive(archive, dest, delete_archive=True):
 27 |     """
 28 |     Extracts a zip archive to `dest`
 29 |     :param export_archive:
 30 |     :param dest:
 31 |     :param delete_archive: delete the archive after extracting
 32 |     :return dest:
 33 |     """
 34 |     try:
 35 |         logging.debug('Extracting archive to {}'.format(dest))
 36 |         with zipfile.ZipFile(archive, 'r') as z:
 37 |             z.extractall(dest)
 38 |         if delete_archive:
 39 |             os.remove(archive)
 40 |     except:
 41 |         logging.error('Error in extracting zip archive {} to {}'.format(
 42 |             archive, dest))
 43 |         raise
 44 | 
 45 | 
 46 | def create_tar_archive(str, name='init-user-db.sh'):
 47 |     """
 48 |     Creates tar archive to load single file as suggested by
 49 |     https://gist.github.com/zbyte64/6800eae10ce082bb78f0b7a2cca5cbc2
 50 |     """
 51 |     archive_tarstream = BytesIO()
 52 |     archive_file = tarfile.TarFile(fileobj=archive_tarstream, mode='w')
 53 | 
 54 |     file_data = str.encode('utf8')
 55 |     file_info = tarfile.TarInfo(name)
 56 |     file_info.size = len(file_data)
 57 |     file_info.mtime = time.time()
 58 | 
 59 |     archive_file.addfile(file_info, BytesIO(file_data))
 60 |     archive_file.close()
 61 |     archive_tarstream.seek(0)
 62 | 
 63 |     return archive_tarstream
 64 | 
 65 | 
 66 | def get_next_available_port(containers_info):
 67 |     """
 68 |     Find next available port to map postgres port to host.
 69 |     :param containers_info:
 70 |     :return port:
 71 |     """
 72 |     ports = [container_info.host_port for container_info in containers_info]
 73 | 
 74 |     return (max(ports) + 1) if ports else 5433
 75 | 
 76 | 
 77 | def is_container_running(container_name, docker_client):
 78 |     """
 79 |     Check whether container is still running.
 80 |     :param container_name:
 81 |     :param docker_client:
 82 |     :return isRunning: Boolean
 83 |     """
 84 |     container_details = docker_client.inspect_container(container_name)
 85 | 
 86 |     return container_details['State']['Running']
 87 | 
 88 | 
 89 | def docker_client_arg_parser():
 90 |     """Builds an argparse parser for docker client connection flags."""
 91 |     # The following subcommands operate on a single containers. We centralize
 92 |     # all these options here.
 93 |     docker_parser = argparse.ArgumentParser(add_help=False)
 94 |     docker_parser.add_argument(
 95 |         '--docker-url',
 96 |         help='The url of the docker demon.')
 97 |     docker_parser.add_argument(
 98 |         '--timeout',
 99 |         type=int,
100 |         default=60,
101 |         help='Set the default timeout when interacting with the docker demon')
102 |     return docker_parser
103 | 
104 | 
105 | def docker_client(docker_url=None, timeout=60):
106 |     """
107 |     Attempts to create a docker client.
108 | 
109 |      - docker_url: base url for docker
110 |      - timeout: timeout for docker client
111 |      - returns: a docker-py client
112 |     """
113 |     if docker_url:
114 |         return Client(
115 |             base_url=docker_url,
116 |             timeout=timeout,
117 |             version='auto')
118 |     else:
119 |         return Client(
120 |             timeout=timeout,
121 |             version='auto')
122 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | 
 17 | import requests
 18 | 
 19 | from courseraresearchexports.constants.api_constants import \
 20 |     COURSE_API, PARTNER_API
 21 | 
 22 | 
 23 | def requests_response_to_model(response_transformer):
 24 |     """
 25 |     Creates decorator to handles errors in response from API call and
 26 |     transforms response with response_handler_func
 27 |     :param response_transformer: function(response) -> Any
 28 |     :return:
 29 |     """
 30 |     def response_transform_decorator(original_func):
 31 |         """
 32 |         Creates wrapper around a function that returns response
 33 |         """
 34 |         def response_transformer_wrapper(*args, **kwargs):
 35 |             """
 36 |             Log errors and apply transformation in response_handler_func
 37 |             """
 38 |             try:
 39 |                 response = original_func(*args, **kwargs)
 40 |                 response.raise_for_status()
 41 | 
 42 |             except requests.exceptions.HTTPError:
 43 |                 help_string = ('Please consult the Coursera Data '
 44 |                                'Exports Guide for further assistance: '
 45 |                                'https://partner.coursera.help/hc/en-us/articles/360021121132.')  # noqa
 46 | 
 47 |                 if (response.status_code == 403):
 48 |                     help_string = ('Please authorize this application '
 49 |                                    'by running:\n'
 50 |                                    '\t$ courseraoauth2client config authorize --app manage_research_exports\n'  # noqa
 51 |                                    'See https://github.com/coursera/courseraoauth2client '  # noqa
 52 |                                    'for more information on authorization.\n'
 53 |                                    'For further assistance, consult the '
 54 |                                    'Coursera Data Exports Guide '
 55 |                                    'https://partner.coursera.help/hc/en-us/articles/360021121132.')  # noqa
 56 | 
 57 |                 logging.error(
 58 |                     'Request to {url} with body:\n\t{body}\nreceived response'
 59 |                     ':\n\t{text}\n'
 60 |                     '{help_string}\n'
 61 |                     .format(url=response.url,
 62 |                             text=response.text,
 63 |                             body=(response.request and response.request.body),
 64 |                             help_string=help_string))
 65 |                 raise
 66 | 
 67 |             return response_transformer(response)
 68 |         return response_transformer_wrapper
 69 |     return response_transform_decorator
 70 | 
 71 | 
 72 | @requests_response_to_model(
 73 |     lambda response: response.json()['elements'][0]['slug'])
 74 | def lookup_course_slug_by_id(course_id):
 75 |     """
 76 |     Find the course slug given an course_id
 77 |     """
 78 |     return requests.get(requests.compat.urljoin(COURSE_API, course_id))
 79 | 
 80 | 
 81 | @requests_response_to_model(
 82 |     lambda response: response.json()['elements'][0]['id'])
 83 | def lookup_course_id_by_slug(course_slug):
 84 |     """
 85 |     Find the course_id given a course_slug
 86 |     """
 87 |     payload = {'q': 'slug', 'slug': course_slug}
 88 |     return requests.get(COURSE_API, params=payload)
 89 | 
 90 | 
 91 | @requests_response_to_model(
 92 |     lambda response: int(response.json()['elements'][0]['id']))
 93 | def lookup_partner_id_by_short_name(partner_short_name):
 94 |     """
 95 |     Find the partner_id by short name
 96 |     """
 97 |     payload = {'q': 'shortName', 'shortName': partner_short_name}
 98 |     return requests.get(PARTNER_API, params=payload)
 99 | 
100 | 
101 | @requests_response_to_model(
102 |     lambda response: response.json()['elements'][0]['shortName'])
103 | def lookup_partner_short_name_by_id(partner_id):
104 |     """
105 |     Find the partner_id by short name
106 |     """
107 |     return requests.get(requests.compat.urljoin(PARTNER_API, str(partner_id)))
108 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/ExportDb.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import csv
 16 | 
 17 | from sqlalchemy import create_engine
 18 | from sqlalchemy.engine import reflection
 19 | 
 20 | from courseraresearchexports.models.ContainerInfo import ContainerInfo
 21 | 
 22 | 
 23 | class ExportDb:
 24 |     """
 25 |     Interface for accessing a database containing research export data.
 26 |     """
 27 |     def __init__(self, host_ip=None, host_port=None, db=None, **kwargs):
 28 | 
 29 |         if not (host_ip and host_port and db):
 30 |             raise ValueError(
 31 |                 'Host IP, port and database name must be specified')
 32 | 
 33 |         self.host_ip = host_ip
 34 |         self.host_port = host_port
 35 |         self.db = db
 36 |         self.engine = create_engine(
 37 |             "postgresql://{user}@{host}:{port}/{db}"
 38 |             .format(user='postgres',
 39 |                     host=self.host_ip,
 40 |                     port=self.host_port,
 41 |                     db=self.db))
 42 | 
 43 |     @classmethod
 44 |     def from_container(cls, container_name, docker_client):
 45 |         """
 46 |         Create ExportDb object directly from container_name identifier.
 47 |         :param container_name:
 48 |         :param docker_client:
 49 |         :return:
 50 |         """
 51 |         container_info = ContainerInfo.from_container(container_name,
 52 |                                                       docker_client)
 53 |         return cls(host_ip=container_info.host_ip,
 54 |                    host_port=container_info.host_port,
 55 |                    db=container_info.database_name)
 56 | 
 57 |     def create_view(self, name, sql_text):
 58 |         """
 59 |         Creates or overrides an existing view given a select statement.
 60 |         :param name:
 61 |         :param sql_text:
 62 |         :return:
 63 |         """
 64 |         view_statement = """
 65 |         DROP VIEW IF EXISTS {name};
 66 |         CREATE VIEW {name} AS {sql_text};
 67 |         """.format(name=name, sql_text=sql_text)
 68 | 
 69 |         self.engine.execute(view_statement)
 70 | 
 71 |     def unload(self, query, output_filename):
 72 |         """
 73 |         Unloads to a csv file given a query.
 74 |         :param query:
 75 |         :param output_filename:
 76 |         :return rowcount:
 77 |         """
 78 |         result = self.engine.execute(query)
 79 | 
 80 |         rowcount = result.rowcount
 81 | 
 82 |         with open(output_filename, 'wb') as csv_file:
 83 |             csv_obj = csv.writer(csv_file)
 84 |             csv_obj.writerow(result.keys())
 85 |             for row in result:
 86 |                 encoded_row = [col.encode('utf8')
 87 |                                if isinstance(col, unicode) else col
 88 |                                for col in row]
 89 |                 csv_obj.writerow(encoded_row)
 90 | 
 91 |         return rowcount
 92 | 
 93 |     def unload_relation(self, relation, output_filename):
 94 |         """
 95 |         Unload a table or view.
 96 |         :param relation:
 97 |         :param output_filename:
 98 |         :return rowcount:
 99 |         """
100 |         query = 'SELECT * FROM {relation};'.format(relation=relation)
101 |         rowcount = self.unload(query, output_filename)
102 |         return rowcount
103 | 
104 |     def get_columns(self, table):
105 |         """
106 |         Names of all the columns in a table.
107 |         :param table:
108 |         :return columns:
109 |         """
110 |         insp = reflection.Inspector.from_engine(self.engine)
111 |         return [column['name'] for column in insp.get_columns(table)]
112 | 
113 |     @property
114 |     def tables(self):
115 |         """
116 |         Names of all tables present on database.
117 |         """
118 |         insp = reflection.Inspector.from_engine(self.engine)
119 |         return insp.get_table_names()
120 | 
121 |     @property
122 |     def views(self):
123 |         """
124 |         Names of all views present on database.
125 |         """
126 |         insp = reflection.Inspector.from_engine(self.engine)
127 |         return insp.get_view_names()
128 | 


--------------------------------------------------------------------------------
/courseraresearchexports/exports/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | import os
 17 | from urlparse import urlparse
 18 | 
 19 | from tqdm import tqdm
 20 | import requests
 21 | 
 22 | from courseraresearchexports.constants.api_constants import \
 23 |     EXPORT_TYPE_CLICKSTREAM, EXPORT_TYPE_TABLES
 24 | 
 25 | from courseraresearchexports.exports import api
 26 | from courseraresearchexports.models.ClickstreamDownloadLinksRequest import \
 27 |     ClickstreamDownloadLinksRequest
 28 | 
 29 | 
 30 | def download(export_request, dest):
 31 |     """
 32 |     Download a data export job using a request id.
 33 |     """
 34 |     try:
 35 |         is_table_export = export_request.export_type == EXPORT_TYPE_TABLES
 36 |         is_clickstream_export = \
 37 |             export_request.export_type == EXPORT_TYPE_CLICKSTREAM
 38 | 
 39 |         _validate(export_request)
 40 | 
 41 |         if not os.path.exists(dest):
 42 |             logging.info('Creating destination folder: {}'.format(dest))
 43 |             os.makedirs(dest)
 44 | 
 45 |         if is_table_export:
 46 |             return [download_url(export_request.download_link, dest)]
 47 |         elif is_clickstream_export:
 48 |             links_request = ClickstreamDownloadLinksRequest.from_args(
 49 |                 course_id=export_request.course_id,
 50 |                 partner_id=export_request.partner_id,
 51 |                 interval=export_request.interval)
 52 |             download_links = api.get_clickstream_download_links(links_request)
 53 |             if len(download_links) == 0:
 54 |                 raise ValueError(
 55 |                     'Clickstream download links not found. This typically '
 56 |                     'means no data was available for the dates in '
 57 |                     'the specified interval: {interval}'
 58 |                     .format(interval=export_request.interval))
 59 |             return [download_url(link, dest) for link in download_links]
 60 |         else:
 61 |             raise ValueError('Require export_type is one of {} or {}'.format(
 62 |                 EXPORT_TYPE_TABLES,
 63 |                 EXPORT_TYPE_CLICKSTREAM))
 64 | 
 65 |     except Exception as err:
 66 |         logging.error('Download failed.\n{err}'.format(err=err))
 67 |         raise
 68 | 
 69 | 
 70 | def download_url(url, dest_folder):
 71 |     """
 72 |     Download url to dest_folder/FILENAME, where FILENAME is the last
 73 |     part of the url path.
 74 |     """
 75 |     filename = urlparse(url).path.split('/')[-1]
 76 |     full_filename = os.path.join(dest_folder, filename)
 77 |     response = requests.get(url, stream=True)
 78 |     chunk_size = 1024 * 1024
 79 |     logging.debug('Writing to file: {}'.format(full_filename))
 80 | 
 81 |     with open(full_filename, 'wb') as f:
 82 |         for data in tqdm(
 83 |                 iterable=response.iter_content(chunk_size),
 84 |                 total=int(response.headers['Content-length']) / chunk_size,
 85 |                 unit='MB',
 86 |                 desc=filename):
 87 |             f.write(data)
 88 |     return full_filename
 89 | 
 90 | 
 91 | def _validate(export_request):
 92 |     is_clickstream_export = \
 93 |         export_request.export_type == EXPORT_TYPE_CLICKSTREAM
 94 | 
 95 |     if not export_request.download_link:
 96 |         if export_request.status in ['PENDING', 'IN_PROGRESS']:
 97 |             logging.error(
 98 |                 'Export request {} is currently {} and is not ready for'
 99 |                 'download. Please wait until the request is completed.'
100 |                 .format(export_request.id, export_request.status))
101 |             raise ValueError(
102 |                 'Export request is not yet ready for download')
103 |         elif export_request.status == 'TERMINATED':
104 |             logging.error(
105 |                 'Export request has been TERMINATED. Please contact '
106 |                 'data-support@coursera.org if we have not resolved this '
107 |                 'within 24 hours.')
108 |             raise ValueError('Export request has been TERMINATED')
109 |         elif is_clickstream_export:
110 |             # We don't fill in download links for clickstream exports
111 |             pass
112 |         else:
113 |             logging.error('Download link was not found.')
114 |             raise ValueError('Download link was not found')
115 | 


--------------------------------------------------------------------------------
/courseraresearchexports/commands/db.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import print_function
 16 | 
 17 | import logging
 18 | 
 19 | from tabulate import tabulate
 20 | 
 21 | import courseraresearchexports.db.db as db
 22 | from courseraresearchexports.containers import utils
 23 | 
 24 | 
 25 | def connect(args):
 26 |     """
 27 |     Connect postgres shell to dockerized database.
 28 |     """
 29 |     d = utils.docker_client(args.docker_url, args.timeout)
 30 |     db.connect(args.container_name, docker_client=d)
 31 | 
 32 | 
 33 | def list_tables(args):
 34 |     """
 35 |     List all of the tables present in a dockerized database.
 36 |     """
 37 |     d = utils.docker_client(args.docker_url, args.timeout)
 38 |     tables = db.get_table_names(args.container_name, docker_client=d)
 39 |     print(tabulate([[table] for table in tables]))
 40 | 
 41 | 
 42 | def list_views(args):
 43 |     """
 44 |     List all of the views present in a dockerized database.
 45 |     """
 46 |     d = utils.docker_client(args.docker_url, args.timeout)
 47 |     tables = db.get_view_names(args.container_name, docker_client=d)
 48 |     print(tabulate([[table] for table in tables]))
 49 | 
 50 | 
 51 | def create_view(args):
 52 |     """
 53 |     Create a view from a sql query.
 54 |     """
 55 |     d = utils.docker_client(args.docker_url, args.timeout)
 56 | 
 57 |     if args.view_name:
 58 |         created_view = db.create_registered_view(
 59 |             args.container_name, args.view_name, d)
 60 |     elif args.sql_file:
 61 |         created_view = db.create_view_from_file(
 62 |             args.container_name, args.sql_file, d)
 63 | 
 64 |     logging.info('Created view {}'.format(created_view))
 65 | 
 66 | 
 67 | def unload_relation(args):
 68 |     """
 69 |     Unload a table or view to a CSV file.
 70 |     """
 71 |     d = utils.docker_client(args.docker_url, args.timeout)
 72 |     rowcount = db.unload_relation(args.container_name, args.dest,
 73 |                                   args.relation, d)
 74 | 
 75 |     logging.info('Unloaded {} rows'.format(rowcount))
 76 | 
 77 | 
 78 | def parser(subparsers):
 79 |     """Build an argparse argument parser to parse the command line."""
 80 | 
 81 |     # create the parser for the version subcommand.
 82 |     parser_db = subparsers.add_parser(
 83 |         'db',
 84 |         help='Tools for interacting with dockerized database',
 85 |         parents=[utils.docker_client_arg_parser()])
 86 | 
 87 |     db_subparsers = parser_db.add_subparsers()
 88 | 
 89 |     parser_tables = db_subparsers.add_parser(
 90 |         'list_tables',
 91 |         help=list_tables.__doc__)
 92 |     parser_tables.set_defaults(func=list_tables)
 93 |     parser_tables.add_argument(
 94 |         'container_name',
 95 |         help='Name of the container database.')
 96 | 
 97 |     parser_views = db_subparsers.add_parser(
 98 |         'list_views',
 99 |         help=list_views.__doc__)
100 |     parser_views.set_defaults(func=list_views)
101 |     parser_views.add_argument(
102 |         'container_name',
103 |         help='Name of the container database.')
104 | 
105 |     parser_create_view = db_subparsers.add_parser(
106 |         'create_view',
107 |         help=create_view.__doc__)
108 |     parser_create_view.set_defaults(func=create_view)
109 |     parser_create_view.add_argument(
110 |         'container_name',
111 |         help='Name of the container database.')
112 |     create_source_subparser = parser_create_view.add_mutually_exclusive_group(
113 |         required=True)
114 |     create_source_subparser.add_argument(
115 |         '--view_name',
116 |         help='Name of view')
117 |     create_source_subparser.add_argument(
118 |         '--sql_file',
119 |         help='SQL file with query.')
120 | 
121 |     parser_unload = db_subparsers.add_parser(
122 |         'unload_to_csv',
123 |         help=unload_relation.__doc__)
124 |     parser_unload.set_defaults(func=unload_relation)
125 |     parser_unload.add_argument(
126 |         'container_name',
127 |         help='Name of the container database.')
128 |     parser_unload.add_argument(
129 |         '--dest',
130 |         help='Destination folder.')
131 |     parser_unload.add_argument(
132 |         '--relation',
133 |         help='Table or view to export.')
134 | 
135 |     parser_connect = db_subparsers.add_parser(
136 |         'connect',
137 |         help=connect.__doc__)
138 |     parser_connect.set_defaults(func=connect)
139 |     parser_connect.add_argument(
140 |         'container_name',
141 |         help='Name of the container database.')
142 | 
143 |     return parser_db
144 | 


--------------------------------------------------------------------------------
/tests/models/export_request_tests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2016 Coursera
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from courseraresearchexports.constants.api_constants import SCHEMA_NAMES, \
 18 |     EXPORT_TYPE_TABLES, EXPORT_TYPE_CLICKSTREAM, EXPORT_TYPE_GRADEBOOK
 19 | from courseraresearchexports.models.ExportRequest import ExportRequest
 20 | from courseraresearchexports.models.ExportRequestWithMetadata import \
 21 |     ExportRequestWithMetadata
 22 | from mock import patch
 23 | from nose.tools import raises
 24 | 
 25 | fake_course_id = 'fake_course_id'
 26 | fake_course_slug = 'fake_course'
 27 | fake_partner_id = 1
 28 | bad_partner_id = 'bad_partner_id'
 29 | fake_partner_short_name = 'fake_partner'
 30 | fake_export_id = '1'
 31 | 
 32 | 
 33 | def test_export_request_serialize_to_json():
 34 |     export_request = ExportRequest(course_id=fake_course_id)
 35 |     expected_result = {
 36 |         'scope': {
 37 |             'typeName': 'courseContext',
 38 |             'definition': {
 39 |                 'courseId': fake_course_id}}}
 40 | 
 41 |     assert export_request.to_json() == expected_result
 42 | 
 43 | 
 44 | def test_export_request_deserialize_from_json():
 45 |     export_request_json = {
 46 |         'scope': {
 47 |             'typeName': 'courseContext',
 48 |             'definition': {
 49 |                 'courseId': fake_course_id}}}
 50 |     export_request = ExportRequest.from_json(export_request_json)
 51 | 
 52 |     assert ExportRequest(course_id=fake_course_id) == export_request
 53 | 
 54 | 
 55 | def test_create_from_args():
 56 |     export_request = ExportRequest.from_args(course_id=fake_course_id)
 57 |     assert ExportRequest(course_id=fake_course_id) == export_request
 58 | 
 59 | 
 60 | @raises(ValueError)
 61 | def test_create_from_args_non_integer_partner_id():
 62 |     export_request = ExportRequest.from_args(partner_id=bad_partner_id)
 63 | 
 64 | 
 65 | @patch('courseraresearchexports.models.utils.lookup_course_id_by_slug')
 66 | def test_course_id_inference(lookup_course_id_by_slug):
 67 |     lookup_course_id_by_slug.return_value = fake_course_id
 68 |     export_request = ExportRequest.from_args(course_slug=fake_course_slug)
 69 | 
 70 |     assert ExportRequest(course_id=fake_course_id) == export_request
 71 | 
 72 | 
 73 | @patch('courseraresearchexports.models.utils.'
 74 |        'lookup_partner_id_by_short_name')
 75 | def test_partner_id_inference(lookup_partner_id_by_short_name):
 76 |     lookup_partner_id_by_short_name.return_value = fake_partner_id
 77 |     export_request = ExportRequest.from_args(
 78 |         partner_short_name=fake_partner_short_name)
 79 | 
 80 |     assert ExportRequest(partner_id=fake_partner_id) == export_request
 81 | 
 82 | 
 83 | def test_scope_id():
 84 |     export_request = ExportRequest(course_id=fake_course_id)
 85 | 
 86 |     assert export_request.scope_id == fake_course_id
 87 | 
 88 | 
 89 | def test_schemas():
 90 |     eventing_request = ExportRequest(
 91 |         course_id=fake_course_id, export_type=EXPORT_TYPE_CLICKSTREAM)
 92 |     gradebook_request = ExportRequest(
 93 |         course_id=fake_course_id, export_type=EXPORT_TYPE_GRADEBOOK)
 94 |     all_tables_request = ExportRequest(
 95 |         course_id=fake_course_id, export_type=EXPORT_TYPE_TABLES,
 96 |         schema_names=SCHEMA_NAMES)
 97 | 
 98 |     assert eventing_request.schema_names_display is None
 99 |     assert gradebook_request.schema_names_display is None
100 |     assert all_tables_request.schema_names_display == 'all'
101 | 
102 | 
103 | def test_export_request_with_metadata_from_export_request():
104 |     export_request = ExportRequest.from_args(course_id=fake_course_id)
105 |     export_request_with_metadata = \
106 |         ExportRequestWithMetadata.from_export_request(
107 |             export_request, id=fake_export_id)
108 | 
109 |     assert export_request.course_id == export_request_with_metadata.course_id
110 | 
111 | 
112 | def test_export_request_with_metadata_serialize_to_json():
113 |     export_request = ExportRequestWithMetadata(course_id=fake_course_id,
114 |                                                id=fake_export_id)
115 |     expected_result = {
116 |         'scope': {
117 |             'typeName': 'courseContext',
118 |             'definition': {
119 |                 'courseId': fake_course_id}},
120 |         'id': fake_export_id}
121 | 
122 |     assert export_request.to_json() == expected_result
123 | 
124 | 
125 | def test_export_request_with_metadata_deserialize_from_json():
126 |     export_request_json = {
127 |         'scope': {
128 |             'typeName': 'courseContext',
129 |             'definition': {
130 |                 'courseId': fake_course_id}},
131 |         'id': fake_export_id}
132 |     export_request = ExportRequestWithMetadata.from_json(export_request_json)
133 | 
134 |     assert export_request == ExportRequestWithMetadata(
135 |         course_id=fake_course_id, id=fake_export_id)
136 | 


--------------------------------------------------------------------------------
/courseraresearchexports/db/db.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import logging
 17 | import pkg_resources
 18 | import subprocess
 19 | 
 20 | from courseraresearchexports.constants.container_constants import \
 21 |     POSTGRES_DOCKER_IMAGE
 22 | from courseraresearchexports.models.ContainerInfo import ContainerInfo
 23 | from courseraresearchexports.models.ExportDb import ExportDb
 24 | from courseraresearchexports.constants.db_constants import \
 25 |     HASHED_USER_ID_COLUMN_TO_SOURCE_TABLE
 26 | 
 27 | 
 28 | def replace_user_id_placeholders(export_db, sql_text):
 29 |     """
 30 |     Replace placeholders with actual user_id column names
 31 |     :param export_db:
 32 |     :param sql_text:
 33 |     :return sql_text_with_inferred_columns:
 34 |     """
 35 |     hashed_user_id_columns_dict = infer_hashed_user_id_columns(export_db)
 36 | 
 37 |     for placeholder, column_name in hashed_user_id_columns_dict.items():
 38 |         sql_text = sql_text.replace(placeholder, column_name)
 39 | 
 40 |     return sql_text
 41 | 
 42 | 
 43 | def infer_hashed_user_id_columns(export_db):
 44 |     """
 45 |     Infer hashed_user_id_columns from database using known placeholders
 46 |     :param export_db:
 47 |     :return:
 48 |     """
 49 |     hashed_user_id_columns_dict = {}
 50 | 
 51 |     for placeholder, table in HASHED_USER_ID_COLUMN_TO_SOURCE_TABLE.items():
 52 |         if table in export_db.tables:
 53 |             columns = export_db.get_columns(table)
 54 |             inferred_column = infer_user_id_column(columns)
 55 |             if inferred_column:
 56 |                 hashed_user_id_columns_dict[placeholder] = inferred_column
 57 | 
 58 |     return hashed_user_id_columns_dict
 59 | 
 60 | 
 61 | def infer_user_id_column(columns):
 62 |     """
 63 |     Infer partner_short_name
 64 |     :param columns:
 65 |     :return:
 66 |     """
 67 |     return next((column for column in columns
 68 |                  if column.endswith('user_id')), None)
 69 | 
 70 | 
 71 | def connect(container_name, docker_client):
 72 |     """
 73 |     Create psql shell to container databaise
 74 |     :param container_name:
 75 |     :param docker_client:
 76 |     """
 77 |     container_info = ContainerInfo.from_container(
 78 |         container_name, docker_client)
 79 | 
 80 |     subprocess.call([
 81 |         'docker', 'run', '-it', '--rm',
 82 |         '--link', container_info.name,
 83 |         POSTGRES_DOCKER_IMAGE, 'psql',
 84 |         '-h', container_info.name,
 85 |         '-d', container_info.database_name,
 86 |         '-U', 'postgres'
 87 |     ], shell=False)
 88 | 
 89 | 
 90 | def get_table_names(container_name, docker_client):
 91 |     """
 92 |     Returns table names present in containerized database.
 93 |     :param container_name:
 94 |     :param docker_client:
 95 |     :return table_names:
 96 |     """
 97 |     export_db = ExportDb.from_container(container_name, docker_client)
 98 | 
 99 |     return export_db.tables
100 | 
101 | 
102 | def get_view_names(container_name, docker_client):
103 |     """
104 |     Returns view names present in containerized database.
105 |     :param container_name:
106 |     :param docker_client:
107 |     :return table_names:
108 |     """
109 |     export_db = ExportDb.from_container(container_name, docker_client)
110 | 
111 |     return export_db.views
112 | 
113 | 
114 | def unload_relation(container_name, dest, relation, docker_client):
115 |     """
116 |     Unloads a table or view to a csv file.
117 |     :param container_name:
118 |     :param dest_file:
119 |     :param relation:
120 |     :param docker_client:
121 |     :return:
122 |     """
123 |     if not os.path.exists(dest):
124 |         logging.debug('Creating destination folder: {}'.format(dest))
125 |         os.makedirs(dest)
126 | 
127 |     export_db = ExportDb.from_container(container_name, docker_client)
128 |     output_filename = os.path.join(dest, '{}.csv'.format(relation))
129 |     rowcount = export_db.unload_relation(relation, output_filename)
130 |     return rowcount
131 | 
132 | 
133 | def create_registered_view(container_name, view_name, docker_client):
134 |     """
135 |     Create a prepackaged view
136 |     :param container_name:
137 |     :param view_name:
138 |     :param partner_short_name:
139 |     :param docker_client:
140 |     :return view_name:
141 |     """
142 |     export_db = ExportDb.from_container(container_name, docker_client)
143 | 
144 |     sql_text = pkg_resources.resource_string(
145 |         __name__.split('.')[0], 'sql/{}.sql'.format(view_name))
146 |     sql_text_with_inferred_columns = replace_user_id_placeholders(
147 |         export_db, sql_text)
148 | 
149 |     export_db.create_view(view_name, sql_text_with_inferred_columns)
150 | 
151 |     return view_name
152 | 
153 | 
154 | def create_view_from_file(container_name, sql_file, docker_client):
155 |     """
156 |     Create a view from a sql file.
157 |     :param container_name:
158 |     :param sql_file:
159 |     :param partner_short_name:
160 |     :param docker_client:
161 |     :return view_name:
162 |     """
163 |     export_db = ExportDb.from_container(container_name, docker_client)
164 | 
165 |     with open(sql_file, 'r') as sf:
166 |         sql_text = sf.read()
167 | 
168 |     view_name = os.path.splitext(os.path.basename(sql_file))[0]
169 | 
170 |     sql_text_with_inferred_columns = replace_user_id_placeholders(
171 |         export_db, sql_text)
172 | 
173 |     export_db.create_view(view_name, sql_text_with_inferred_columns)
174 | 
175 |     return view_name
176 | 


--------------------------------------------------------------------------------
/courseraresearchexports/commands/containers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import print_function
 16 | 
 17 | import logging
 18 | 
 19 | from tabulate import tabulate
 20 | 
 21 | from courseraresearchexports.containers import client
 22 | from courseraresearchexports.containers import utils
 23 | 
 24 | 
 25 | def create_container(args):
 26 |     """
 27 |     Create a container containing a postgres database using an export job id.
 28 |     Export job will be downloaded and loaded into dockerized database.
 29 |     Automatically starts container.
 30 |     """
 31 |     d = utils.docker_client(args.docker_url, args.timeout)
 32 | 
 33 |     kwargs = {}
 34 |     if args.container_name:
 35 |         kwargs['container_name'] = args.container_name
 36 |     if args.database_name:
 37 |         kwargs['database_name'] = args.database_name
 38 | 
 39 |     if args.export_request_id:
 40 |         container_id = client.create_from_export_request_id(
 41 |             args.export_request_id, docker_client=d, **kwargs)
 42 |     elif args.export_data_folder:
 43 |         container_id = client.create_from_folder(
 44 |             args.export_data_folder, docker_client=d, **kwargs)
 45 | 
 46 |     logging.info('Container {:.12} ready.'.format(container_id))
 47 | 
 48 | 
 49 | def list_containers(args):
 50 |     """
 51 |     List docker containers created with Coursera data exports.
 52 |     """
 53 |     d = utils.docker_client(args.docker_url, args.timeout)
 54 |     containers_info = client.list_all(docker_client=d)
 55 | 
 56 |     if containers_info:
 57 |         containers_info_table = [['Name', 'Container Id', 'Database',
 58 |                                   'Created', 'Status', 'Host IP', 'Port']]
 59 | 
 60 |         for container_info in containers_info:
 61 |             containers_info_table.append([
 62 |                 container_info.name,
 63 |                 container_info.short_id,
 64 |                 container_info.database_name,
 65 |                 container_info.creation_time.strftime('%c'),
 66 |                 container_info.status,
 67 |                 container_info.host_ip,
 68 |                 container_info.host_port
 69 |             ])
 70 | 
 71 |         print(tabulate(containers_info_table, headers='firstrow'))
 72 | 
 73 | 
 74 | def start_container(args):
 75 |     """
 76 |     Start a docker container.
 77 |     """
 78 |     d = utils.docker_client(args.docker_url, args.timeout)
 79 |     client.start(args.container_name, docker_client=d)
 80 | 
 81 | 
 82 | def stop_container(args):
 83 |     """
 84 |     Stop a docker container.
 85 |     """
 86 |     d = utils.docker_client(args.docker_url, args.timeout)
 87 |     client.stop(args.container_name, docker_client=d)
 88 | 
 89 | 
 90 | def remove_container(args):
 91 |     """
 92 |     Remove a docker container, stop the container
 93 |     before removing.
 94 |     """
 95 |     d = utils.docker_client(args.docker_url, args.timeout)
 96 |     client.remove(args.container_name, docker_client=d)
 97 | 
 98 | 
 99 | def parser(subparsers):
100 |     parser_containers = subparsers.add_parser(
101 |         'containers',
102 |         help='Create docker container from export jobs',
103 |         description='Command line tools for creating a docker container'
104 |         'containing the results of a research export. Please first '
105 |         'authenticate with the OAuth2 client before making requests ('
106 |         'courseraoauth2client config authorize --app manage-research-exports)',
107 |         epilog='Please file bugs on github at: '
108 |         'https://github.com/coursera/courseraresearchexports/issues. If you '
109 |         'would like to contribute to this tool\'s development, check us out '
110 |         'at: https://github.com/coursera/courseraresarchexports',
111 |         parents=[utils.docker_client_arg_parser()])
112 | 
113 |     containers_subparsers = parser_containers.add_subparsers()
114 | 
115 |     parser_create = containers_subparsers.add_parser(
116 |         'create',
117 |         help=create_container.__doc__,
118 |         description=create_container.__doc__)
119 |     parser_create.set_defaults(func=create_container)
120 | 
121 |     source_subparser = parser_create.add_mutually_exclusive_group(
122 |         required=True)
123 | 
124 |     source_subparser.add_argument(
125 |         '--export_request_id',
126 |         help='Export job to download and create containers')
127 |     source_subparser.add_argument(
128 |         '--export_data_folder',
129 |         help='Location of already downloaded export data')
130 | 
131 |     parser_create.add_argument(
132 |         '--container_name',
133 |         help='Name for docker container.')
134 |     parser_create.add_argument(
135 |         '--database_name',
136 |         help='Name for database inside container.')
137 | 
138 |     parser_list = containers_subparsers.add_parser(
139 |         'list',
140 |         help=list_containers.__doc__)
141 |     parser_list.set_defaults(func=list_containers)
142 | 
143 |     parser_stop = containers_subparsers.add_parser(
144 |         'stop',
145 |         help=stop_container.__doc__)
146 |     parser_stop.add_argument(
147 |         'container_name',
148 |         help='Name of the container to stop.')
149 |     parser_stop.set_defaults(func=stop_container)
150 | 
151 |     parser_start = containers_subparsers.add_parser(
152 |         'start',
153 |         help=start_container.__doc__)
154 |     parser_start.add_argument(
155 |         'container_name',
156 |         help='Name of the container to start.')
157 |     parser_start.set_defaults(func=start_container)
158 | 
159 |     parser_remove = containers_subparsers.add_parser(
160 |         'remove',
161 |         help=remove_container.__doc__)
162 |     parser_remove.add_argument(
163 |         'container_name',
164 |         help='Name of the container to remove.')
165 |     parser_remove.set_defaults(func=remove_container)
166 | 
167 |     return parser_containers
168 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/ExportRequestWithMetadata.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from datetime import datetime
 16 | import time
 17 | 
 18 | from courseraresearchexports.models.ExportRequest import ExportRequest
 19 | 
 20 | 
 21 | class ExportRequestMetadata:
 22 |     """Metadata about the internal timings of the export request"""
 23 | 
 24 |     def __init__(self, created_by=None, created_at=None, started_at=None,
 25 |                  completed_at=None, snapshot_at=None, **kwargs):
 26 |         self._created_by = created_by
 27 |         self._created_at = created_at
 28 |         self._started_at = started_at
 29 |         self._completed_at = completed_at
 30 |         self._snapshot_at = snapshot_at
 31 | 
 32 |     def to_json(self):
 33 |         """
 34 |         Serialize metadata from json object.
 35 |         :return json_metadata:
 36 |         """
 37 |         json_metadata = {}
 38 |         if self._created_by:
 39 |             json_metadata['createdBy'] = self._created_by
 40 |         if self._created_at:
 41 |             json_metadata['createdAt'] = datetime_to_unix_ms(self._created_at)
 42 |         if self._started_at:
 43 |             json_metadata['startedAt'] = datetime_to_unix_ms(self._started_at)
 44 |         if self._completed_at:
 45 |             json_metadata['completedAt'] = datetime_to_unix_ms(
 46 |                 self._completed_at)
 47 |         if self._snapshot_at:
 48 |             json_metadata['snapshotAt'] = datetime_to_unix_ms(
 49 |                 self._snapshot_at)
 50 | 
 51 |         return json_metadata
 52 | 
 53 |     @classmethod
 54 |     def from_json(cls, json_metadata):
 55 |         """
 56 |         Deserialize ExportRequestMetaData from json object.
 57 |         :param json_metadata:
 58 |         :return export_request_metadata: ExportRequestMetadata
 59 |         """
 60 |         if json_metadata:
 61 |             kwargs = {}
 62 |             if json_metadata.get('createdBy'):
 63 |                 kwargs['created_by'] = json_metadata['createdBy']
 64 |             if json_metadata.get('createdAt'):
 65 |                 kwargs['created_at'] = unix_ms_to_datetime(
 66 |                     json_metadata['createdAt'])
 67 |             if json_metadata.get('completedAt'):
 68 |                 kwargs['completed_at'] = unix_ms_to_datetime(
 69 |                     json_metadata['completedAt'])
 70 |             if json_metadata.get('startedAt'):
 71 |                 kwargs['started_at'] = unix_ms_to_datetime(
 72 |                     json_metadata['startedAt'])
 73 |             if json_metadata.get('snapshotAt'):
 74 |                 kwargs['snapshot_at'] = unix_ms_to_datetime(
 75 |                     json_metadata['snapshotAt'])
 76 |             return cls(**kwargs)
 77 | 
 78 |         else:
 79 |             return None
 80 | 
 81 | 
 82 | class ExportRequestWithMetadata(ExportRequest):
 83 |     """
 84 |     Class representing a export request from Coursera's research data export
 85 |     service with metadata about its status.
 86 |     """
 87 | 
 88 |     def __init__(self, course_id=None, partner_id=None, group_id=None,
 89 |                  export_type=None, anonymity_level=None,
 90 |                  statement_of_purpose=None, schema_names=None,
 91 |                  interval=None, ignore_existing=None, id=None,
 92 |                  status=None, download_link=None, metadata=None, **kwargs):
 93 |         ExportRequest.__init__(
 94 |             self, course_id=course_id, partner_id=partner_id,
 95 |             group_id=group_id, export_type=export_type,
 96 |             anonymity_level=anonymity_level,
 97 |             statement_of_purpose=statement_of_purpose,
 98 |             schema_names=schema_names, interval=interval,
 99 |             ignore_existing=ignore_existing)
100 |         self._id = id
101 |         self._status = status
102 |         self._download_link = download_link
103 |         self._metadata = metadata
104 | 
105 |     def to_json(self):
106 |         """
107 |         Serialize ExportRequestWithMetadata to json object
108 |         :return json_request:
109 |         """
110 |         json_request = ExportRequest.to_json(self)
111 | 
112 |         if self._id:
113 |             json_request['id'] = self._id
114 |         if self._status:
115 |             json_request['status'] = self._status
116 |         if self._download_link:
117 |             json_request['downloadLink'] = self._download_link
118 |         if self._metadata:
119 |             json_request['metadata'] = self._metadata.to_json()
120 | 
121 |         return json_request
122 | 
123 |     @classmethod
124 |     def from_export_request(cls, export_request, id=None, status=None,
125 |                             download_link=None, metadata=None, **kwargs):
126 |         """
127 |         Create an object of class ExportRequestWithMetadata from an object of
128 |         class ExportRequest.
129 |         :param export_request: ExportRequest, parent object
130 |         :param id:
131 |         :param status:
132 |         :param download_link:
133 |         :param metadata:
134 |         :param kwargs:
135 |         :return export_request_with_metadata: ExportRequestWithMetadata
136 |         """
137 |         return cls(
138 |             course_id=export_request._course_id,
139 |             partner_id=export_request._partner_id,
140 |             group_id=export_request._group_id,
141 |             export_type=export_request._export_type,
142 |             anonymity_level=export_request._anonymity_level,
143 |             statement_of_purpose=export_request._statement_of_purpose,
144 |             schema_names=export_request._schema_names,
145 |             interval=export_request._interval,
146 |             ignore_existing=export_request._ignore_existing,
147 |             id=id,
148 |             status=status,
149 |             download_link=download_link,
150 |             metadata=metadata)
151 | 
152 |     @classmethod
153 |     def from_json(cls, json_request):
154 |         """
155 |         Deserialize ExportRequestWithMetadata from json object.
156 |         :param json_request:
157 |         :return export_request: ExportRequestWithMetadata
158 |         """
159 |         export_request = ExportRequest.from_json(json_request)
160 | 
161 |         return cls.from_export_request(
162 |             export_request=export_request,
163 |             id=json_request.get('id'),
164 |             status=json_request.get('status'),
165 |             download_link=json_request.get('downloadLink'),
166 |             metadata=ExportRequestMetadata.from_json(
167 |                 json_request.get('metadata')))
168 | 
169 |     @classmethod
170 |     def from_response(cls, response):
171 |         """
172 |         Instantiate a list of ExportRequestWithMeta objects from
173 |         API call response.
174 |         :param response:
175 |         :return export_request_with_metadata_list: [ExportRequestWithMetadata]
176 |         """
177 |         return [cls.from_json(export_request)
178 |                 for export_request in response.json()['elements']]
179 | 
180 |     @property
181 |     def id(self):
182 |         return self._id
183 | 
184 |     @property
185 |     def status(self):
186 |         return self._status
187 | 
188 |     @property
189 |     def download_link(self):
190 |         return self._download_link
191 | 
192 |     @property
193 |     def metadata(self):
194 |         return self._metadata
195 | 
196 |     @property
197 |     def created_at(self):
198 |         if self._metadata and self._metadata._created_at:
199 |             return self._metadata._created_at
200 |         else:
201 |             return datetime.fromtimestamp(0)
202 | 
203 | 
204 | def datetime_to_unix_ms(dt):
205 |     """Convert datetime object to timestamp in milliseconds"""
206 |     return int(time.mktime(dt.timetuple()) * 1000)
207 | 
208 | 
209 | def unix_ms_to_datetime(unix_ms):
210 |     """Convert timestamp in milliseconds to datetime object"""
211 |     return datetime.fromtimestamp(unix_ms / 1000.0)
212 | 


--------------------------------------------------------------------------------
/courseraresearchexports/containers/client.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Coursera's tools for managing docker containers configured with a
 17 | postgres database.
 18 | """
 19 | 
 20 | import logging
 21 | import os
 22 | import shutil
 23 | import time
 24 | 
 25 | from courseraresearchexports import exports
 26 | from courseraresearchexports.constants.api_constants import \
 27 |     EXPORT_TYPE_TABLES
 28 | from courseraresearchexports.constants.container_constants import \
 29 |     COURSERA_DOCKER_LABEL, COURSERA_LOCAL_FOLDER, POSTGRES_DOCKER_IMAGE, \
 30 |     POSTGRES_INIT_MSG, POSTGRES_READY_MSG
 31 | from courseraresearchexports.containers import utils as container_utils
 32 | from courseraresearchexports.exports import utils as export_utils
 33 | from courseraresearchexports.models.ContainerInfo import ContainerInfo
 34 | 
 35 | 
 36 | def list_all(docker_client):
 37 |     """
 38 |     Return all containers that have Coursera label
 39 |     :param docker_client:
 40 |     :return containers_info: [ContainerInfo]
 41 |     """
 42 |     return [ContainerInfo.from_container(container['Id'], docker_client)
 43 |             for container in docker_client.containers(
 44 |             all=True, filters={'label': COURSERA_DOCKER_LABEL})]
 45 | 
 46 | 
 47 | def start(container_name, docker_client):
 48 |     """
 49 |     Start a docker container containing a research export database. Waits until
 50 |     """
 51 |     try:
 52 |         logging.debug('Starting container {}...'.format(container_name))
 53 |         docker_client.start(container_name)
 54 | 
 55 |         # poll logs to see if database is ready to accept connections
 56 |         while POSTGRES_READY_MSG not in docker_client.logs(
 57 |                 container_name, tail=4):
 58 | 
 59 |             logging.debug('Polling container for database connection...')
 60 |             if not container_utils.is_container_running(
 61 |                     container_name, docker_client):
 62 |                 raise RuntimeError('Container failed to start.')
 63 | 
 64 |             time.sleep(10)
 65 | 
 66 |         logging.info('Started container {}.'.format(container_name))
 67 | 
 68 |     except:
 69 |         logging.error(
 70 |             """Container failed to start, check log for errors:\n{}"""
 71 |             .format(docker_client.logs(container_name, tail=20)))
 72 |         raise
 73 | 
 74 | 
 75 | def stop(container_name, docker_client):
 76 |     """
 77 |     Stops a docker container
 78 |     """
 79 |     docker_client.stop(container_name)
 80 | 
 81 | 
 82 | def remove(container_name, docker_client):
 83 |     """
 84 |     Remove a stopped container
 85 |     """
 86 |     docker_client.remove_container(container_name)
 87 | 
 88 | 
 89 | def initialize(container_name, docker_client):
 90 |     """
 91 |     Initialize a docker container. Polls database for completion of
 92 |     entrypoint tasks.
 93 |     """
 94 |     try:
 95 |         logging.info('Initializing container {}...'.format(
 96 |             container_name))
 97 | 
 98 |         docker_client.start(container_name)
 99 |         while POSTGRES_INIT_MSG not in docker_client.logs(
100 |                 container_name, tail=20):
101 | 
102 |             logging.debug('Polling data for entrypoint initialization...')
103 |             if not container_utils.is_container_running(container_name,
104 |                                                         docker_client):
105 |                 raise RuntimeError('Container initialization failed.')
106 | 
107 |             time.sleep(10)
108 | 
109 |         logging.info('Initialized container {}.'.format(container_name))
110 | 
111 |     except:
112 |         logging.error(
113 |             """Container initialization failed, check log for errors:\n{}"""
114 |             .format(docker_client.logs(container_name, tail=20)))
115 |         logging.error(
116 |             """If error persists, consider restarting your docker engine.""")
117 |         raise
118 | 
119 | 
120 | def create_from_folder(export_data_folder, docker_client,
121 |                        container_name='coursera-exports',
122 |                        database_name='coursera-exports',
123 |                        database_password=''):
124 |     """
125 |     Using a folder containing a Coursera research export, create a docker
126 |      container with the export data loaded into a data base and start the
127 |      container
128 |     :param export_data_folder: folder where export data/scripts is stored
129 |     :param docker_client:
130 |     :param container_name:
131 |     :param database_name:
132 |     :param database_password:
133 |     :return container_id:
134 |     """
135 |     logging.debug('Creating containers from {folder}'.format(
136 |         folder=export_data_folder))
137 | 
138 |     env = ({'POSTGRES_PASSWORD': database_password} if database_password
139 |            else {'POSTGRES_HOST_AUTH_METHOD': 'trust'})
140 |     create_container_args = {
141 |         'environment': env,
142 |         'volumes': ['/mnt/exportData'],
143 |         'host_config': docker_client.create_host_config(
144 |             binds=['{}:/mnt/exportData:ro'.format(export_data_folder)],
145 |             port_bindings={
146 |                 5432: ('127.0.0.1',
147 |                        container_utils.get_next_available_port(list_all(
148 |                            docker_client)))
149 |             })
150 |     }
151 |     container = create_postgres_container(
152 |         docker_client, container_name, database_name, create_container_args)
153 | 
154 |     container_id = container['Id']
155 | 
156 |     # copy containers initialization script to entrypoint
157 |     database_setup_script = """
158 |         createdb -U {user} {db}
159 |         cd /mnt/exportData
160 |         psql -e -U {user} -d {db} -f setup.sql
161 |         psql -e -U {user} -d {db} -f load.sql
162 |     """.format(user='postgres', db=database_name)
163 | 
164 |     docker_client.put_archive(
165 |         container_id,  # using a named argument causes NullResource error
166 |         path='/docker-entrypoint-initdb.d/',
167 |         data=container_utils.create_tar_archive(
168 |             database_setup_script, name='init-user-db.sh'))
169 | 
170 |     logging.info('Created container with id: {}'.format(container_id))
171 | 
172 |     initialize(container_id, docker_client)
173 | 
174 |     return container_id
175 | 
176 | 
177 | def create_postgres_container(docker_client, container_name, database_name,
178 |                               create_container_args):
179 |     if not docker_client.images(name=POSTGRES_DOCKER_IMAGE):
180 |         logging.info('Downloading image: {}'.format(POSTGRES_DOCKER_IMAGE))
181 |         docker_client.import_image(image=POSTGRES_DOCKER_IMAGE)
182 | 
183 |     for existing_container in docker_client.containers(
184 |             all=True, filters={'name': container_name}):
185 |         logging.info('Removing existing container with name: {}'.format(
186 |             container_name))
187 |         docker_client.stop(existing_container)
188 |         docker_client.remove_container(existing_container)
189 |     create_container_args['image'] = POSTGRES_DOCKER_IMAGE
190 |     create_container_args['name'] = container_name
191 |     create_container_args['labels'] = {
192 |         COURSERA_DOCKER_LABEL: None,
193 |         'database_name': database_name
194 |     }
195 |     return docker_client.create_container(**create_container_args)
196 | 
197 | 
198 | def create_from_export_request_id(export_request_id, docker_client,
199 |                                   container_name=None,
200 |                                   database_name=None,
201 |                                   database_password=''):
202 |     """
203 |     Create a docker container containing the export data from a given
204 |     export request. Container and database name will be inferred as the
205 |     course slug or partner short name from export_request if not provided.
206 |     :param export_request_id:
207 |     :param docker_client:
208 |     :param container_name:
209 |     :param database_name:
210 |     :param database_password:
211 |     :return container_id:
212 |     """
213 |     export_request = exports.api.get(export_request_id)[0]
214 | 
215 |     if export_request.export_type != EXPORT_TYPE_TABLES:
216 |         raise ValueError('Invalid Export Type. (Only tables exports supported.'
217 |                          'Given [{}])'.format(export_request.export_type))
218 | 
219 |     logging.info('Downloading export {}'.format(export_request_id))
220 |     downloaded_files = export_utils.download(
221 |         export_request, dest=COURSERA_LOCAL_FOLDER)
222 |     dest = os.path.join(COURSERA_LOCAL_FOLDER, export_request_id)
223 |     for f in downloaded_files:
224 |         container_utils.extract_zip_archive(
225 |             archive=f,
226 |             dest=dest,
227 |             delete_archive=True)
228 | 
229 |     container_id = create_from_folder(
230 |         export_data_folder=dest,
231 |         docker_client=docker_client,
232 |         database_name=(database_name if database_name
233 |                        else export_request.scope_name),
234 |         container_name=(container_name if container_name
235 |                         else export_request.scope_name),
236 |         database_password=(database_password if database_password
237 |                            else '')
238 |     )
239 | 
240 |     shutil.rmtree(dest)
241 | 
242 |     return container_id
243 | 


--------------------------------------------------------------------------------
/courseraresearchexports/models/ExportRequest.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from courseraresearchexports.constants.api_constants import \
 16 |     ANONYMITY_LEVEL_COORDINATOR, ANONYMITY_LEVEL_ISOLATED, EXPORT_TYPE_TABLES,\
 17 |     EXPORT_TYPE_CLICKSTREAM, EXPORT_TYPE_GRADEBOOK, SCHEMA_NAMES
 18 | from courseraresearchexports.models import utils
 19 | import re
 20 | import string
 21 | 
 22 | 
 23 | class ExportRequest:
 24 |     """
 25 |     Represents a export request for Coursera's research data export
 26 |     service and provides methods for serialization.
 27 |     """
 28 | 
 29 |     def __init__(self, course_id=None, partner_id=None, group_id=None,
 30 |                  export_type=None, anonymity_level=None,
 31 |                  statement_of_purpose=None, schema_names=None,
 32 |                  interval=None, ignore_existing=None, **kwargs):
 33 |         self._course_id = course_id
 34 |         if partner_id is not None:
 35 |             self._partner_id = int(partner_id)
 36 |         else:
 37 |             self._partner_id = partner_id
 38 |         self._group_id = group_id
 39 |         self._export_type = export_type
 40 |         self._anonymity_level = anonymity_level
 41 |         self._statement_of_purpose = statement_of_purpose
 42 |         self._schema_names = schema_names
 43 |         self._interval = interval
 44 |         self._ignore_existing = ignore_existing
 45 | 
 46 |     def to_json(self):
 47 |         """
 48 |         Serialize ExportRequest to a dictionary representing a json object.
 49 |         No validation is done with the exception that only specification of
 50 |         scope is used (course/partner/group).
 51 |         :return json_request:
 52 |         """
 53 |         json_request = {}
 54 | 
 55 |         if self._course_id:
 56 |             json_request['scope'] = {
 57 |                 'typeName': 'courseContext',
 58 |                 'definition': {
 59 |                     'courseId': self._course_id
 60 |                 }}
 61 |         elif self._partner_id:
 62 |             json_request['scope'] = {
 63 |                 'typeName': 'partnerContext',
 64 |                 'definition': {
 65 |                     'partnerId': {
 66 |                         'maestroId': self._partner_id
 67 |                     }}}
 68 |         elif self._group_id:
 69 |             json_request['scope'] = {
 70 |                 'typeName': 'groupContext',
 71 |                 'definition': {
 72 |                     'groupId': self._group_id
 73 |                 }}
 74 |         if self._export_type:
 75 |             json_request['exportType'] = self._export_type
 76 |         if self._anonymity_level:
 77 |             json_request['anonymityLevel'] = self._anonymity_level
 78 |         if self._statement_of_purpose:
 79 |             json_request['statementOfPurpose'] = self._statement_of_purpose
 80 |         if self._schema_names:
 81 |             json_request['schemaNames'] = self._schema_names
 82 |         if self._interval:
 83 |             json_request['interval'] = {
 84 |                 'start': self._interval[0], 'end': self._interval[1]}
 85 |         if self._ignore_existing:
 86 |             json_request['ignoreExisting'] = self._ignore_existing
 87 | 
 88 |         return json_request
 89 | 
 90 |     @classmethod
 91 |     def from_args(cls, **kwargs):
 92 |         """
 93 |         Create a ExportResource object using the parameters required. Performs
 94 |         course_id/partner_id inference if possible.
 95 |         :param kwargs:
 96 |         :return export_request: ExportRequest
 97 |         """
 98 |         if kwargs.get('course_slug') and not kwargs.get('course_id'):
 99 |             kwargs['course_id'] = utils.lookup_course_id_by_slug(
100 |                 kwargs['course_slug'])
101 |         elif kwargs.get('partner_short_name') and not kwargs.get('partner_id'):
102 |             kwargs['partner_id'] = utils.lookup_partner_id_by_short_name(
103 |                 kwargs['partner_short_name'])
104 | 
105 |         if kwargs.get('user_id_hashing'):
106 |             if kwargs['user_id_hashing'] == 'linked':
107 |                 kwargs['anonymity_level'] = ANONYMITY_LEVEL_COORDINATOR
108 |             elif kwargs['user_id_hashing'] == 'isolated':
109 |                 kwargs['anonymity_level'] = ANONYMITY_LEVEL_ISOLATED
110 | 
111 |         return cls(**kwargs)
112 | 
113 |     @classmethod
114 |     def from_json(cls, json_request):
115 |         """
116 |         Deserialize ExportRequest from json object.
117 |         :param json_request:
118 |         :return export_request: ExportRequest
119 |         """
120 |         kwargs = {}
121 |         request_scope = json_request['scope']
122 |         request_scope_context = request_scope['typeName']
123 | 
124 |         if request_scope_context == 'courseContext':
125 |             kwargs['course_id'] = request_scope['definition']['courseId']
126 |         elif request_scope_context == 'partnerContext':
127 |             kwargs['partner_id'] = \
128 |                 request_scope['definition']['partnerId']['maestroId']
129 |         elif request_scope_context == 'groupContext':
130 |             kwargs['group_id'] = request_scope['definition']['groupId']
131 | 
132 |         if json_request.get('interval'):
133 |             kwargs['interval'] = [
134 |                 json_request['interval']['start'],
135 |                 json_request['interval']['end']
136 |             ]
137 | 
138 |         return cls(
139 |             export_type=json_request.get('exportType'),
140 |             anonymity_level=json_request.get('anonymityLevel'),
141 |             statement_of_purpose=json_request.get('statementOfPurpose'),
142 |             schema_names=json_request.get('schemaNames'),
143 |             ignore_existing=json_request.get('ignoreExisting'),
144 |             **kwargs)
145 | 
146 |     @property
147 |     def course_id(self):
148 |         return self._course_id
149 | 
150 |     @property
151 |     def partner_id(self):
152 |         return self._partner_id
153 | 
154 |     @property
155 |     def export_type(self):
156 |         return self._export_type
157 | 
158 |     @property
159 |     def export_type_display(self):
160 |         if self._export_type == EXPORT_TYPE_GRADEBOOK:
161 |             return 'GRADEBOOK'
162 |         elif self._export_type == EXPORT_TYPE_CLICKSTREAM:
163 |             return 'CLICKSTREAM'
164 |         elif self._export_type == EXPORT_TYPE_TABLES:
165 |             return 'TABLES'
166 |         else:
167 |             return self._export_type
168 | 
169 |     @property
170 |     def anonymity_level(self):
171 |         return self._anonymity_level
172 | 
173 |     @property
174 |     def formatted_anonymity_level(self):
175 |         if self.anonymity_level == ANONYMITY_LEVEL_COORDINATOR:
176 |             return 'Linked'
177 |         elif self.anonymity_level == ANONYMITY_LEVEL_ISOLATED:
178 |             return 'Isolated'
179 |         else:
180 |             return 'Unknown'
181 | 
182 |     @property
183 |     def statement_of_purpose(self):
184 |         return self._statement_of_purpose
185 | 
186 |     @property
187 |     def interval(self):
188 |         return self._interval
189 | 
190 |     @property
191 |     def ignore_existing(self):
192 |         return self._ignore_existing
193 | 
194 |     @property
195 |     def scope_context(self):
196 |         """
197 |         Context for this ExportRequest, assume that only one identifier for
198 |         partner/course/group is defined.
199 |         """
200 |         if self._course_id:
201 |             return 'COURSE'
202 |         elif self._partner_id:
203 |             return 'PARTNER'
204 |         elif self._group_id:
205 |             return 'GROUP'
206 |         else:
207 |             return None
208 | 
209 |     @property
210 |     def scope_id(self):
211 |         """
212 |         Identifier for the scope, assume that only one of course/partner/group
213 |         is defined for a valid request.
214 |         :return scope_id:
215 |         """
216 |         return self._course_id or self._partner_id or self._group_id
217 | 
218 |     @property
219 |     def scope_name(self):
220 |         """
221 |         Human readable name for this scope context. Partner short names for
222 |         partners, but only group ids for groups and course ids for courses(apis
223 |         are not open)
224 |         :return:
225 |         """
226 |         if self._course_id:
227 |             try: 
228 |                 return utils.lookup_course_slug_by_id(self._course_id)
229 |             except:
230 |                 print("couldn't create human readable course name, using alphanumeric characters of course_id")
231 |                 chars = re.escape(string.punctuation)
232 |                 return  re.sub(r'['+chars+']', '', self._course_id)
233 |         elif self._partner_id:
234 |             try:
235 |                 return utils.lookup_partner_short_name_by_id(self._partner_id)
236 |             except:
237 |                 print("couldn't create human readable partner name, using course_id")
238 |                 return self._partner_id
239 |         elif self._group_id:
240 |             return self._group_id
241 |         else:
242 |             return 'UNKNOWN'
243 | 
244 |     @property
245 |     def schema_names(self):
246 |         return self._schema_names
247 | 
248 |     @property
249 |     def schema_names_display(self):
250 |         """
251 |         Display only property for schemas names.
252 |         :return schemas:
253 |         """
254 |         if self._schema_names:
255 |             if set(self._schema_names) == set(SCHEMA_NAMES):
256 |                 return 'all'
257 |             else:
258 |                 return ','.join(self._schema_names)
259 |         else:
260 |             return None
261 | 
262 |     def __eq__(self, other):
263 |         """
264 |         Override for internal equality checks as suggested at:
265 |         http://stackoverflow.com/a/390640
266 |         """
267 |         if type(other) is type(self):
268 |             return self.__dict__ == other.__dict__
269 |         return False
270 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/courseraresearchexports/commands/jobs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Coursera
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import print_function
 16 | 
 17 | import json
 18 | import logging
 19 | 
 20 | import argparse
 21 | from tabulate import tabulate
 22 | 
 23 | from courseraresearchexports.exports import api
 24 | from courseraresearchexports.constants.api_constants import \
 25 |     ANONYMITY_LEVEL_COORDINATOR, EXPORT_TYPE_CLICKSTREAM, \
 26 |     EXPORT_TYPE_TABLES, SCHEMA_NAMES
 27 | from courseraresearchexports.models.ClickstreamDownloadLinksRequest import \
 28 |     ClickstreamDownloadLinksRequest
 29 | from courseraresearchexports.models.ExportRequest import ExportRequest
 30 | from courseraresearchexports.exports import utils
 31 | 
 32 | 
 33 | def request_clickstream(args):
 34 |     """
 35 |     Create and send an clickstream data export request with Coursera. Only
 36 |     available for data coordinators.
 37 |     """
 38 |     export_request = ExportRequest.from_args(
 39 |         course_id=args.course_id,
 40 |         course_slug=args.course_slug,
 41 |         partner_id=args.partner_id,
 42 |         partner_short_name=args.partner_short_name,
 43 |         group_id=args.group_id,
 44 |         anonymity_level=ANONYMITY_LEVEL_COORDINATOR,
 45 |         statement_of_purpose=args.purpose,
 46 |         export_type=EXPORT_TYPE_CLICKSTREAM,
 47 |         interval=args.interval,
 48 |         ignore_existing=args.ignore_existing)
 49 | 
 50 |     export_request_with_metadata = api.post(export_request)[0]
 51 | 
 52 |     logging.info('Successfully created clickstream export request {id}.'
 53 |                  .format(id=export_request_with_metadata.id))
 54 |     logging.debug('Request created with json body:\n{json}'
 55 |                   .format(json=json.dumps(
 56 |                       export_request_with_metadata.to_json(), indent=2)))
 57 | 
 58 | 
 59 | def request_tables(args):
 60 |     """
 61 |     Create and send a tables data export request with Coursera.
 62 |     """
 63 |     export_request = ExportRequest.from_args(
 64 |         course_id=args.course_id,
 65 |         course_slug=args.course_slug,
 66 |         partner_id=args.partner_id,
 67 |         partner_short_name=args.partner_short_name,
 68 |         group_id=args.group_id,
 69 |         user_id_hashing=args.user_id_hashing,
 70 |         statement_of_purpose=args.purpose,
 71 |         export_type=EXPORT_TYPE_TABLES,
 72 |         schema_names=args.schemas)
 73 | 
 74 |     export_request_with_metadata = api.post(export_request)[0]
 75 | 
 76 |     logging.info('Successfully created tables export request {id}.'
 77 |                  .format(id=export_request_with_metadata.id))
 78 |     logging.debug('Request created with json body:\n{json}'
 79 |                   .format(json=json.dumps(
 80 |                       export_request_with_metadata.to_json(), indent=2)))
 81 | 
 82 | 
 83 | def get(args):
 84 |     """
 85 |     Get the details and status of a data export request using a job id.
 86 |     """
 87 |     export_request = api.get(args.id)[0]
 88 | 
 89 |     export_request_info = [
 90 |         ['Export Job Id:', export_request.id],
 91 |         ['Export Type:', export_request.export_type_display],
 92 |         ['Status:', export_request.status],
 93 |         ['Scope Context:', export_request.scope_context],
 94 |         ['Scope Id:', export_request.scope_id],
 95 |         ['Scope Name:', export_request.scope_name],
 96 |         ['User id Hashing: ', export_request.formatted_anonymity_level],
 97 |         ['Created:', export_request.created_at.strftime('%c')]]
 98 | 
 99 |     if export_request.schema_names:
100 |         export_request_info.append(
101 |             ['Schemas:', export_request.schema_names_display])
102 | 
103 |     if export_request.download_link:
104 |         export_request_info.append(
105 |             ['Download Link:', export_request.download_link])
106 | 
107 |     if export_request.interval:
108 |         export_request_info.append(
109 |             ['Interval:', ' to '.join(export_request.interval)])
110 | 
111 |     print(tabulate(export_request_info, tablefmt="plain"))
112 | 
113 | 
114 | def get_all(args):
115 |     """
116 |     Get the details and status of your data export requests.
117 |     """
118 |     export_requests = api.get_all()
119 | 
120 |     export_requests_table = [['Created', 'Request Id', 'Status', 'Type',
121 |                               'User Id Hashing', 'Scope', 'Schemas']]
122 |     for export_request in sorted(export_requests, key=lambda x: x.created_at):
123 |         export_requests_table.append([
124 |             export_request.created_at.strftime('%Y-%m-%d %H:%M'),
125 |             export_request.id,
126 |             export_request.status,
127 |             export_request.export_type_display,
128 |             export_request.formatted_anonymity_level,
129 |             export_request.scope_id,
130 |             export_request.schema_names_display])
131 | 
132 |     print(tabulate(export_requests_table, headers='firstrow'))
133 | 
134 | 
135 | def download(args):
136 |     """
137 |     Download a data export job using a request id.
138 |     """
139 |     try:
140 |         export_request = api.get(args.id)[0]
141 |         dest = args.dest
142 |         utils.download(export_request, dest)
143 |     except Exception as err:
144 |         logging.error('Download failed with exception:\n{}'.format(err))
145 |         raise
146 | 
147 | 
148 | def get_clickstream_links(args):
149 |     """
150 |     Generate links for clickstream data exports
151 |     """
152 |     clickstream_links_request = ClickstreamDownloadLinksRequest.from_args(
153 |         course_id=args.course_id,
154 |         course_slug=args.course_slug,
155 |         partner_id=args.partner_id,
156 |         partner_short_name=args.partner_short_name,
157 |         group_id=args.group_id,
158 |         interval=args.interval)
159 | 
160 |     clickstream_download_links = api.get_clickstream_download_links(
161 |         clickstream_links_request)
162 | 
163 |     # TODO: add more descriptive information or option write to text file
164 |     print(tabulate(
165 |         [[link] for link in clickstream_download_links],
166 |         tablefmt="plain"))
167 | 
168 | 
169 | def parser(subparsers):
170 |     parser_jobs = subparsers.add_parser(
171 |         'jobs',
172 |         help='Get status of current/completed research export job(s)',
173 |         description='Command line tools for requesting and reviewing the '
174 |         'status of Coursera research data exports. Please first authenticate '
175 |         'with the OAuth2 client before making requests (courseraoauth2client '
176 |         'config authorize --app manage-research-exports).',
177 |         epilog='Please file bugs on github at: '
178 |         'https://github.com/coursera/courseraresearchexports/issues. If you '
179 |         'would like to contribute to this tool\'s development, check us out '
180 |         'at: https://github.com/coursera/courseraresarchexports')
181 | 
182 |     jobs_subparsers = parser_jobs.add_subparsers()
183 | 
184 |     create_request_parser(jobs_subparsers)
185 | 
186 |     parser_get_all = jobs_subparsers.add_parser(
187 |         'get_all',
188 |         help=get_all.__doc__,
189 |         description=get_all.__doc__)
190 |     parser_get_all.set_defaults(func=get_all)
191 | 
192 |     parser_get = jobs_subparsers.add_parser(
193 |         'get',
194 |         help=get.__doc__,
195 |         description=get.__doc__)
196 |     parser_get.set_defaults(func=get)
197 | 
198 |     parser_get.add_argument(
199 |         'id',
200 |         help='Export request ID')
201 | 
202 |     parser_download = jobs_subparsers.add_parser(
203 |         'download',
204 |         help=download.__doc__,
205 |         description=download.__doc__)
206 |     parser_download.set_defaults(func=download)
207 | 
208 |     parser_download.add_argument(
209 |         'id',
210 |         help='Export request ID')
211 | 
212 |     parser_download.add_argument(
213 |         '--dest',
214 |         default='.',
215 |         help='Destination folder')
216 | 
217 |     parser_clickstream_links = jobs_subparsers.add_parser(
218 |         'clickstream_download_links',
219 |         help='Get download links for completed eventing exports.')
220 |     parser_clickstream_links.set_defaults(func=get_clickstream_links)
221 | 
222 |     create_scope_subparser(parser_clickstream_links)
223 | 
224 |     parser_clickstream_links.add_argument(
225 |         '--interval',
226 |         nargs=2,
227 |         metavar=('START', 'END'),
228 |         help='Interval of exported clickstream data, inclusive. '
229 |         '(i.e. 2016-08-01 2016-08-04).')
230 | 
231 |     return parser_jobs
232 | 
233 | 
234 | def create_scope_subparser(parser):
235 |     scope_subparser = parser.add_mutually_exclusive_group(
236 |         required=True)
237 |     scope_subparser.add_argument(
238 |         '--course_id',
239 |         help='Export rows corresponding to learners within a course according '
240 |         'to the unique id assigned by Coursera.')
241 |     scope_subparser.add_argument(
242 |         '--course_slug',
243 |         help='Export rows corresponding to learners within a course according '
244 |         'to the unique name of your course defined as the part after '
245 |         '/learn in the course url. (e.g. machine-learning for '
246 |         'https://www.coursera.org/learn/machine-learning).')
247 |     scope_subparser.add_argument(
248 |         '--partner_id',
249 |         type=int,
250 |         help='Export rows corresponding to learners within a partner.')
251 |     scope_subparser.add_argument(
252 |         '--partner_short_name',
253 |         help='Export rows corresponding to learners within a partner.')
254 |     scope_subparser.add_argument(
255 |         '--group_id',
256 |         help='Export rows corresponding to learners without a group.')
257 | 
258 | 
259 | def create_request_parser(subparsers):
260 |     parser_request = subparsers.add_parser(
261 |         'request',
262 |         help='Create and send a data export request with Coursera.',
263 |         description='Create and send a data export request with Coursera. '
264 |         'Use subcommands to specify the export request type.')
265 |     request_subparsers = parser_request.add_subparsers()
266 | 
267 |     # common arguments between schema and eventing exports
268 |     request_args_parser = argparse.ArgumentParser(add_help=False)
269 | 
270 |     create_scope_subparser(request_args_parser)
271 | 
272 |     request_args_parser.add_argument(
273 |         '--purpose',
274 |         required=True,
275 |         help='Please let us know how you plan to use the '
276 |         'data, what types of research questions you\'re asking, who will '
277 |         'be working with the data primarily, and with whom you plan to '
278 |         'share it.')
279 | 
280 |     # tables subcommand
281 |     parser_tables = request_subparsers.add_parser(
282 |         'tables',
283 |         help=request_tables.__doc__,
284 |         description=request_tables.__doc__,
285 |         parents=[request_args_parser])
286 |     parser_tables.set_defaults(func=request_tables)
287 | 
288 |     parser_tables.add_argument(
289 |         '--user_id_hashing',
290 |         choices=['linked', 'isolated'],
291 |         default='isolated',
292 |         help='The level of user_id hashing in the data export. With \'linked\''
293 |         ' user_id hashing, users can be identified between table schemas. '
294 |         'With \'isolated\' user_id hashing, users have independent ids in'
295 |         'different schemas and cannot be linked. Only data coordinators have '
296 |         'access to \'linked\' users_ids to restrict PII.')
297 | 
298 |     parser_tables.add_argument(
299 |         '--schemas',
300 |         choices=SCHEMA_NAMES,
301 |         nargs='+',
302 |         default=SCHEMA_NAMES,
303 |         help='Data schemas to export. Any combination of: {}. By default this '
304 |         'will be all available schemas.'.format(
305 |             ', '.join(SCHEMA_NAMES)))
306 | 
307 |     # clickstream subcommand
308 |     parser_clickstream = request_subparsers.add_parser(
309 |         'clickstream',
310 |         help=request_clickstream.__doc__,
311 |         description=request_clickstream.__doc__,
312 |         parents=[request_args_parser])
313 |     parser_clickstream.set_defaults(func=request_clickstream)
314 | 
315 |     parser_clickstream.add_argument(
316 |         '--interval',
317 |         nargs=2,
318 |         metavar=('START', 'END'),
319 |         help='Interval of clickstream data to be exported '
320 |         '(i.e. 2016-08-01 2016-08-04). By default this will be the past day.')
321 | 
322 |     parser_clickstream.add_argument(
323 |         '--ignore_existing',
324 |         action='store_true',
325 |         help='If flag is set, we will recompute clickstream data for all dates'
326 |         'in the interval. Otherwise, previously computed days are skipped.')
327 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | courseraresearchexports
  2 | =======================
  3 | 
  4 | .. image:: https://travis-ci.org/coursera/courseraresearchexports.svg
  5 |     :target: https://travis-ci.org/coursera/courseraresearchexports
  6 | 
  7 | This project is a library consisting of a command line interface and a client
  8 | for interacting with Coursera's research exports. Up to date documentation
  9 | of the data provided by Coursera for research purposes is available in the Partner Resource Center
 10 | , `Coursera Data Exports Guide <https://partner.coursera.help/hc/articles/360021121132/>`_.
 11 | 
 12 | Installation
 13 | ------------
 14 | 
 15 | To install this package, execute::
 16 | 
 17 |     pip install courseraresearchexports
 18 | 
 19 | `pip <https://pip.pypa.io/en/latest/index.html>`_ is a python package manager.
 20 | 
 21 | If you do not have ``pip`` installed on your machine, please follow the
 22 | `installation instructions <https://pip.pypa.io/en/latest/installing.html#install-or-upgrade-pip>`_ for your platform.
 23 | 
 24 | If you experience issues installing with `pip`, we recommend that you use the
 25 | python 2.7 distribution of `Anaconda <https://docs.conda.io/en/latest/miniconda.html>`_ and try the above
 26 | command again or to use a `virtualenv <https://pypi.python.org/pypi/virtualenv>`_
 27 | for installation::
 28 | 
 29 |     virtualenv venv -p python2.7
 30 |     source venv/bin/activate
 31 |     pip install courseraresearchexports
 32 | 
 33 | Note: the ``containers`` subcommand requires ``docker`` to already be installed
 34 | on your machine. Please see the `docker installation instructions <http://docs.docker.com/index.html>`_ for platform
 35 | specific information.
 36 | 
 37 | Refer to `Issues`_ section for additional debugging around installation.
 38 | 
 39 | autocomplete
 40 | ^^^^^^^^^^^^
 41 | 
 42 | To enable tab autocomplete, please install `argcomplete <https://github.com/kislyuk/argcomplete>`_ using
 43 | ``pip install autocomplete`` and execute ``activate-global-python-argcomplete``. Open a new shell and
 44 | press tab for autocomplete functionality.
 45 | 
 46 | See the argcomplete documentation for more details.
 47 | 
 48 | Setup
 49 | -----
 50 | 
 51 | Authorize your application using `courseraoauth2client <https://github.com/coursera/courseraoauth2client>`_::
 52 | 
 53 |     courseraoauth2client config authorize --app manage_research_exports
 54 | 
 55 | To use the ``containers`` functionality, a docker instance must be running.
 56 | Please see the docker `getting started guide <https://docs.docker.com/engine/getstarted/>`_
 57 | for installation instructions for your platform.
 58 | 
 59 | Upgrade
 60 | -------
 61 | 
 62 | If you have a previously installed version of `courseracourseexports`, execute::
 63 | 
 64 |     pip install courseraresearchexports --upgrade
 65 | 
 66 | This will upgrade your installation to the newest version.
 67 | 
 68 | Command Line Interface
 69 | ----------------------
 70 | 
 71 | The project includes a command line tool. Run::
 72 | 
 73 |     courseraresearchexports -h
 74 | 
 75 | for a complete list of features, flags, and documentation.  Similarly,
 76 | documentation for the subcommands listed below is also available (e.g. for
 77 | ``jobs``) by running::
 78 | 
 79 |     courseraresearchexports jobs -h
 80 | 
 81 | jobs
 82 | ^^^^
 83 | Submit a research export request or retrieve the status of pending and
 84 | completed export jobs.
 85 | 
 86 | request
 87 | ~~~~~~~
 88 | Creates an data export job request and return the export request id. To create a
 89 | data export requests for all available tables for a course::
 90 | 
 91 |     courseraresearchexports jobs request tables --course_id $COURSE_ID \
 92 |         --purpose "testing data export"
 93 | 
 94 | In order to know your course_id, you can take advantage
 95 | of our COURSE API, putting in the appropriate course_slug. 
 96 | 
 97 | For example,
 98 | if the course_slug is `developer-iot`, you can query the course_id by making the request in your browser logged in session::
 99 | 
100 |     https://api.coursera.org/api/onDemandCourses.v1?q=slug&slug=developer-iot
101 | 
102 | The response will be a JSON object containing an id field with the value::
103 | 
104 |     iRl53_BWEeW4_wr--Yv6Aw
105 | 
106 | **Note**: The course slug is the part after
107 | ``/learn`` in your course url. For ``https://www.coursera.org/learn/machine-learning``,
108 | the slug is `machine-learning`
109 | 
110 | If you have a publically available course, you can request the export using::
111 | 
112 |     courseraresearchexports jobs request tables --course_slug $COURSE_SLUG \
113 |         --purpose "testing data export"
114 | 
115 | Replace ``$COURSE_SLUG`` with your course slug (The course slug is the part after
116 | ``/learn`` in the url. For ``https://www.coursera.org/learn/machine-learning``,
117 | the slug is `machine-learning`).
118 | 
119 | If a more limited set of data is required, you can specify which schemas are
120 | included with the export. (e.g. for the demographics and notebooks tables)::
121 | 
122 |     courseraresearchexports jobs request tables --course_id $COURSE_ID \
123 |         --schemas demographics notebooks --purpose "testing data export"
124 | 
125 | You can look at all the possible ways to export using::
126 | 
127 |     courseraresearchexports jobs request tables -h
128 | 
129 | **Recommendations**
130 | 
131 | 
132 | 1. Always request the specific schemas that you need by adding the `schemas` while requesting the exports.  
133 | For more information on the available tables/schemas, please refer to the
134 | `Coursera Data Exports Guide <https://partner.coursera.help/hc/articles/360021121132/>`_.
135 | 
136 | 2. While requesting the exports for all courses in your institution, it is recommended to use the partner level export,
137 | rather than requesting individual course level exports. You can use the command::
138 | 
139 |     courseraresearchexports jobs request tables --partner_short_name $PARTNER_SHORT_NAME \
140 |         --schemas demographics notebooks --purpose "testing data export"
141 | 
142 | Your partner_short_name can be found in the University Assets section of your institution setting.
143 |  
144 | Note that the above command is available for only publicly available partners.
145 | If you have your partnerID, you can request the export using::
146 | 
147 |     courseraresearchexports jobs request tables --partner_id $PARTNER_ID \
148 |         --schemas demographics notebooks --purpose "testing data export"
149 | 
150 | You can find your partner_id using the API in your browser login session::
151 |     https://www.coursera.org/api/partners.v1?q=shortName&shortName=$PARTNER_SHORT_NAME
152 | 
153 | If you are a data coordinator, you can request that user ids are linked between
154 | domains of the data export::
155 | 
156 |     courseraresearchexports jobs request tables --course_id $COURSE_ID \
157 |         --purpose "testing data export" --user_id_hashing linked
158 | 
159 | Data coordinators can also request clickstream exports::
160 | 
161 |     courseraresearchexports jobs request clickstream --course_id $COURSE_ID \
162 |         --interval 2016-09-01 2016-09-02 --purpose "testing data export"
163 | 
164 | By default, clickstream exports will cache results for days already exported. To ignore the cache and request exports for the entire date range, pass in the flag ``--ignore_existing``.
165 | 
166 | Rate limits
167 | ~~~~~~~~~~~
168 | We have rate limits enabled for the number of exports that can be performed. The underlying export API returns the rate limit error message, 
169 | which is printed when the command fails. The error message reflects the reason why you might be rate limited.
170 | 
171 | get_all
172 | ~~~~~~~
173 | Lists the details and status of all data export requests that you have made::
174 | 
175 |     courseraresearchexports jobs get_all
176 | 
177 | get
178 | ~~~
179 | Retrieve the details and status of an export request::
180 | 
181 |     courseraresearchexports jobs get $EXPORT_REQUEST_ID
182 | 
183 | download
184 | ~~~~~~~~
185 | Download a completed table or clickstream to your local destination::
186 | 
187 |     courseraresearchexports jobs download $EXPORT_REQUEST_ID
188 | 
189 | clickstream_download_links
190 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
191 | Due to the size of clickstream exports, we persist download links for completed
192 | clickstream export requests on Amazon S3. The clickstream data for each day is
193 | saved into a separate file and download links to these files can be retrieved
194 | by running::
195 | 
196 |     courseraresearchexports jobs clickstream_download_links --course_id $COURSE_ID
197 | 
198 | containers
199 | ^^^^^^^^^^
200 | 
201 | create
202 | ~~~~~~
203 | Creates a docker container using the postgres image and loads export data
204 | into a postgres database on the container.  To create a docker container
205 | from an export, first ``request`` an export using the ``jobs`` command.  Then,
206 | using the ``$EXPORT_REQUEST_ID``, create a docker container with::
207 | 
208 |     courseraresearchexports containers create --export_request_id $EXPORT_REQUEST_ID
209 | 
210 | This will download the data export and load all the data into the database
211 | running on the container. This may take some time depending on the size of
212 | your export. To create a docker container with an already downloaded export
213 | (please decompress the archive first)::
214 | 
215 |     courseraresearchexports containers create --export_data_folder /path/to/data_export/
216 | 
217 | After creation use the ``list`` command to check the status of the
218 | container and view the container name, database name, address and port to
219 | connect to the database. Use the `db connect $CONTAINER_NAME` command to open
220 | a psql shell.
221 | 
222 | list
223 | ~~~~
224 | Lists the details of all the containers created by ``courseraresearchexports``::
225 | 
226 |     courseraresearchexports containers list
227 | 
228 | start
229 | ~~~~~
230 | Start a container::
231 | 
232 |     courseraresearchexports containers start $CONTAINER_NAME
233 | 
234 | stop
235 | ~~~~
236 | Stop a container::
237 | 
238 |     courseraresearchexports containers stop $CONTAINER_NAME
239 | 
240 | remove
241 | ~~~~~~
242 | Remove a container::
243 | 
244 |     courseraresearchexports containers remove $CONTAINER_NAME
245 | 
246 | db
247 | ^^
248 | 
249 | connect
250 | ~~~~~~~
251 | Open a shell to a postgres database::
252 | 
253 |     courseraresearchexports db connect $CONTAINER_NAME
254 | 
255 | create_view
256 | ~~~~~~~~~~~
257 | Create a view in the postgres database. We are planning to include commonly
258 | used denormalized views as part of this project. To create one of these views
259 | (i.e. for the demographic_survey view)::
260 | 
261 |     courseraresearchexports db create_view $CONTAINER_NAME --view_name demographic_survey
262 | 
263 | If you have your own sql script that you'd like to create as a view run::
264 | 
265 |     courseraresearchexports db create_view $CONTAINER_NAME --sql_file /path/to/sql/file/new_view.sql
266 | 
267 | This will create a view using the name of the file as the name of the view, in this case "new_view".
268 | 
269 | Note: as `user_id` columns vary with partner and user id hashing, please refer
270 | to the exports guide for SQL formatting guidelines.
271 | 
272 | unload_to_csv
273 | ~~~~~~~~~~~~~
274 | Export a table or view to a csv file.  For example, if the `demographic_survey`
275 | was created in the above section, use this commmand to create a csv::
276 | 
277 |     courseraresearchexports db unload_to_csv $CONTAINER_NAME --relation demographic_survey --dest /path/to/dest/
278 | 
279 | list_tables
280 | ~~~~~~~~~~~
281 | List all the tables present inside a dockerized database::
282 | 
283 |     courseraresearchexports db list_tables $CONTAINER_NAME
284 | 
285 | list_views
286 | ~~~~~~~~~~
287 | List all the views present inside a dockerized database::
288 | 
289 |     courseraresearchexports db list_views $CONTAINER_NAME
290 |     
291 | Using `courseraresearchexports` on a machine without a browser
292 | --------------------------------------------------------------
293 | Sometimes, a browser is not available, making the oauth flow not possible. Commonly, this occurs when users want to automate the data export process by using an external machine.
294 | 
295 | To get around this, you may generate the access token initially on a machine with browser access [e.g your laptop]. The access token is serialized in your local file system at `~/.coursera/manage_research_exports_oauth2_cache.pickle`.
296 | 
297 | Requests after the first can use the refresh token flow, which does not require a browser. By copying the initial pickled access token to a remote machine, that machine can continue to request updated data. 
298 | 
299 | 
300 | 
301 | Bugs / Issues / Feature Requests
302 | --------------------------------
303 | 
304 | Please us the github issue tracker to document any bugs or other issues you
305 | encounter while using this tool.
306 | 
307 | 
308 | Developing / Contributing
309 | -------------------------
310 | 
311 | We recommend developing ``courseraresearchexports`` within a python
312 | `virtualenv <https://pypi.python.org/pypi/virtualenv>`_.
313 | To get your environment set up properly, do the following::
314 | 
315 |     virtualenv venv
316 |     source venv/bin/activate
317 |     python setup.py develop
318 |     pip install -r test_requirements.txt
319 | 
320 | Tests
321 | ^^^^^
322 | 
323 | To run tests, simply run: ``nosetests``, or ``tox``.
324 | 
325 | Code Style
326 | ^^^^^^^^^^
327 | 
328 | Code should conform to pep8 style requirements. To check, simply run::
329 | 
330 |     pep8 courseraresearchexports tests
331 | 
332 | 
333 | Issues
334 | -------
335 | If you face following error when installling psycopg2 package for Mac::
336 | 
337 |     ld: library not found for -lssl
338 |     clang: error: linker command failed with exit code 1 (use -v to see invocation)
339 |     error: command 'gcc' failed with exit status 1
340 | 
341 | Install openssl package if not installed::
342 | 
343 |     brew install openssl
344 |     export LDFLAGS="-L/usr/local/opt/openssl/lib"
345 |     or 
346 |     export LDFLAGS=-L/usr/local/opt/openssl@3/lib
347 | 
348 | 


--------------------------------------------------------------------------------