├── .circleci └── config.yml ├── .flake8 ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ └── general.md └── pull_request_template.md ├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.md ├── MANIFEST.in ├── README.rst ├── docs ├── _static │ ├── civis.ico │ ├── civis.svg │ └── custom.css ├── cli.rst ├── client.rst ├── conf.py ├── index.rst ├── io.rst ├── ml.rst ├── parallel.rst ├── requirements.txt ├── responses.rst ├── user_guide.rst └── utils.rst ├── examples ├── CivisML.ipynb ├── CivisML_parallel_training.ipynb ├── Introducing_CivisML_v2.ipynb └── PythonAPIClientDemo.ipynb ├── pyproject.toml ├── src └── civis │ ├── __init__.py │ ├── _camel_to_snake.py │ ├── _deprecation.py │ ├── _utils.py │ ├── base.py │ ├── cli │ ├── __init__.py │ ├── __main__.py │ └── _cli_commands.py │ ├── client.py │ ├── client.pyi │ ├── futures.py │ ├── io │ ├── __init__.py │ ├── _databases.py │ ├── _files.py │ ├── _tables.py │ └── _utils.py │ ├── loggers.py │ ├── ml │ ├── __init__.py │ ├── _helper.py │ └── _model.py │ ├── parallel.py │ ├── polling.py │ ├── py.typed │ ├── resources │ ├── __init__.py │ ├── _api_spec.py │ ├── _client_pyi.py │ ├── _resources.py │ └── civis_api_spec.json │ ├── response.py │ ├── run_joblib_func.py │ ├── service_client.py │ ├── tests │ ├── __init__.py │ └── mocks.py │ ├── utils │ ├── __init__.py │ └── _jobs.py │ └── workflows │ ├── __init__.py │ ├── _schemas.py │ └── _validate.py ├── tests ├── petstore.json ├── test_base.py ├── test_camel_to_snake.py ├── test_cli.py ├── test_client.py ├── test_deprecate.py ├── test_futures.py ├── test_io.py ├── test_jobs.py ├── test_loggers.py ├── test_ml │ ├── __init__.py │ ├── test_helper.py │ └── test_model.py ├── test_mocks.py ├── test_parallel.py ├── test_polling.py ├── test_resources.py ├── test_response.py ├── test_run_joblib_func.py ├── test_service_client.py ├── test_utils.py ├── test_version.py └── test_workflows │ └── test_validate.py └── tools ├── check_if_civis_api_spec_has_updated.py ├── smoke_tests.py └── update_civis_api_spec.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | win: circleci/windows@5.0 5 | 6 | jobs: 7 | pre-build: 8 | description: A check that doesn't need every supported Python version (e.g., code style checks) 9 | parameters: 10 | command-run: 11 | type: string 12 | docker: 13 | # Pick the highest Python 3.x version that civis-python is known to support 14 | - image: cimg/python:3.13 15 | steps: 16 | - checkout 17 | - run: 18 | command: << parameters.command-run >> 19 | 20 | build-python: 21 | parameters: 22 | python-version: 23 | type: string 24 | docker: 25 | - image: cimg/python:<< parameters.python-version >> 26 | environment: 27 | CIVIS_API_KEY: FOOBAR 28 | steps: 29 | - checkout 30 | - run: 31 | # Intentionally not using CircleCI's "python/install-packages" step, 32 | # because we don't want to install packages from any requirements.txt/pyproject.toml 33 | # just yet. 34 | # Test that we can build a source distribution that can correctly 35 | # install from clean slate. 36 | # "python -m build" creates dist/civis-x.y.z-py3-none-any.whl 37 | name: Build source distribution and install package from it 38 | command: | 39 | pip install --progress-bar off --upgrade pip setuptools build && \ 40 | python -m build && \ 41 | pip install dist/`ls dist/ | grep .whl` 42 | - run: 43 | name: Install the full development requirements 44 | command: pip install --progress-bar off -e ".[dev-core,dev-civisml]" 45 | - run: 46 | name: Show installed Python packages 47 | command: pip list -v 48 | - run: 49 | name: Run python tests 50 | command: pytest --junitxml=/tmp/testxml/report.xml --durations=0 51 | - store_test_results: 52 | path: /tmp/testxml/ 53 | 54 | build-python-win: 55 | executor: 56 | name: win/default 57 | shell: powershell.exe 58 | steps: 59 | - checkout 60 | - run: systeminfo 61 | - run: 62 | name: Run tests on Windows 63 | shell: bash.exe 64 | command: | 65 | python --version && \ 66 | python.exe -m pip install --upgrade pip setuptools wheel && \ 67 | python.exe -m pip install ".[dev-core,dev-civisml]" && \ 68 | python.exe -m pip list && \ 69 | CIVIS_API_KEY=foobar pytest 70 | 71 | workflows: 72 | version: 2 73 | build-and-test: 74 | jobs: 75 | - pre-build: 76 | name: bandit 77 | command-run: | 78 | pip install --progress-bar off -e ".[dev-core]" && \ 79 | bandit --version && \ 80 | bandit -r src -x tests 81 | - pre-build: 82 | name: black 83 | command-run: | 84 | pip install --progress-bar off -e ".[dev-core]" && \ 85 | black --check src tools tests docs/conf.py 86 | - pre-build: 87 | name: flake8 88 | command-run: | 89 | pip install --progress-bar off -e ".[dev-core]" && \ 90 | flake8 src tools tests docs/conf.py 91 | - pre-build: 92 | name: pip-audit 93 | command-run: | 94 | pip install --progress-bar off -r docs/requirements.txt && \ 95 | pip install --progress-bar off -e ".[dev-core,dev-civisml]" && \ 96 | pip-audit --version && \ 97 | pip-audit --skip-editable 98 | - pre-build: 99 | name: twine 100 | command-run: | 101 | pip install --progress-bar off -e ".[dev-core]" && \ 102 | python -m build && \ 103 | twine check dist/`ls dist/ | grep .tar.gz` && \ 104 | twine check dist/`ls dist/ | grep .whl` 105 | - build-python: 106 | requires: 107 | - bandit 108 | - black 109 | - flake8 110 | - pip-audit 111 | - twine 112 | matrix: 113 | parameters: 114 | python-version: ["3.10", "3.11", "3.12", "3.13"] 115 | - build-python-win: 116 | requires: 117 | - bandit 118 | - black 119 | - flake8 120 | - pip-audit 121 | - twine 122 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /CHANGELOG.md merge=union 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General 3 | about: Ask a question, report a potential issue, etc. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Note:** Civis employees should _not_ use the GitHub Issues feature at the public "civis-python" codebase 11 | to file a ticket, and should instead use the internal ticketing system. 12 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | --- 4 | 5 | - [ ] (For Civis employees only) Reference to a relevant ticket in the pull request title 6 | - [ ] Changelog entry added to `CHANGELOG.md` at the repo's root level 7 | - [ ] Description of change in the pull request description 8 | - [ ] If applicable, unit tests have been added and/or updated 9 | - [ ] The CircleCI builds have all passed 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | .eggs/ 17 | parts/ 18 | sdist/ 19 | *.egg-info/ 20 | *.egg 21 | 22 | # Unit test / coverage reports 23 | htmlcov/ 24 | .tox/ 25 | .coverage 26 | .coverage.* 27 | .cache 28 | nosetests.xml 29 | coverage.xml 30 | *,cover 31 | 32 | # Django stuff: 33 | *.log 34 | 35 | # Internal 36 | swagger.json 37 | 38 | # vim 39 | *.swp 40 | 41 | # Sphinx docs 42 | docs/generated/ 43 | docs/api_*.rst 44 | docs/build/ 45 | 46 | # IDEs 47 | .idea 48 | 49 | .DS_Store 50 | .vscode 51 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.12" 10 | 11 | sphinx: 12 | builder: html 13 | configuration: docs/conf.py 14 | 15 | # Declare the full Python requirements required for stability. 16 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 17 | python: 18 | install: 19 | - requirements: docs/requirements.txt 20 | # Install the `civis` package from the local directory. 21 | - method: pip 22 | path: . 23 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, and in the interest of 4 | fostering an open and welcoming community, we pledge to respect all people who 5 | contribute through reporting issues, posting feature requests, updating 6 | documentation, submitting pull requests or patches, and other activities. 7 | 8 | We are committed to making participation in this project a harassment-free 9 | experience for everyone, regardless of level of experience, gender, gender 10 | identity and expression, sexual orientation, disability, personal appearance, 11 | body size, race, ethnicity, age, religion, or nationality. 12 | 13 | Examples of unacceptable behavior by participants include: 14 | 15 | * The use of sexualized language or imagery 16 | * Personal attacks 17 | * Trolling or insulting/derogatory comments 18 | * Public or private harassment 19 | * Publishing other's private information, such as physical or electronic 20 | addresses, without explicit permission 21 | * Other unethical or unprofessional conduct 22 | 23 | Project maintainers have the right and responsibility to remove, edit, or 24 | reject comments, commits, code, wiki edits, issues, and other contributions 25 | that are not aligned to this Code of Conduct, or to ban temporarily or 26 | permanently any contributor for other behaviors that they deem inappropriate, 27 | threatening, offensive, or harmful. 28 | 29 | By adopting this Code of Conduct, project maintainers commit themselves to 30 | fairly and consistently applying these principles to every aspect of managing 31 | this project. Project maintainers who do not follow or enforce the Code of 32 | Conduct may be permanently removed from the project team. 33 | 34 | This Code of Conduct applies both within project spaces and in public spaces 35 | when an individual is representing the project or its community. 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 38 | reported by contacting a project maintainer at opensource@civisanalytics.com. 39 | All complaints will be reviewed and investigated and will result in a response 40 | that is deemed necessary and appropriate to the circumstances. Maintainers are 41 | obligated to maintain confidentiality with regard to the reporter of an 42 | incident. 43 | 44 | 45 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 46 | version 1.3.0, available at 47 | [http://contributor-covenant.org/version/1/3/0/][version] 48 | 49 | [homepage]: http://contributor-covenant.org 50 | [version]: http://contributor-covenant.org/version/1/3/0/ 51 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to civis-python 2 | 3 | We welcome bug reports and pull requests from everyone! 4 | This project is intended to be a safe, welcoming space for collaboration, and 5 | contributors are expected to adhere to the 6 | [Contributor Covenant](http://contributor-covenant.org) code of conduct. 7 | 8 | 9 | ## Filing a Ticket 10 | 11 | If you'd like to add or update a feature in civis-python, 12 | it is recommended that you first file a ticket to discuss your proposed changes 13 | and check their compatibility with Civis Platform before making a pull request. 14 | 15 | To file a ticket: 16 | 17 | * [For non-Civis employees only] Please create a [GitHub issue](https://github.com/civisanalytics/civis-python/issues). 18 | * [For Civis employees only] Please file an internal ticket. 19 | 20 | 21 | ## Local Development Set-up 22 | 23 | These set-up steps need to be done only once per machine / OS. 24 | 25 | 1. Locally, create an isolated Python environment and activate it 26 | (e.g., using the built-in [venv](https://docs.python.org/3/tutorial/venv.html)). 27 | For the Python version, use the latest Python 3.x that civis-python supports, 28 | as indicated in `pyproject.toml` at the repo's top level. 29 | 2. [For non-Civis employees only] Fork the civis-python repo ( https://github.com/civisanalytics/civis-python/fork ). 30 | 3. Clone the civis-python repo to your local drive: 31 | 32 | ```bash 33 | # For non-Civis employees -- replace with your own, as you're cloning from your fork 34 | git clone https://github.com//civis-python.git 35 | 36 | # For Civis employees -- you should already have your SSH key set up locally and need git@ to push to this repo directly 37 | git clone git@github.com:civisanalytics/civis-python.git 38 | ``` 39 | 40 | 4. Use the name `upstream` to point to the upstream source repo `civisanalytics/civis-python` in `git remote`: 41 | 42 | ```bash 43 | # For non-Civis employees: 44 | git remote add upstream https://github.com/civisanalytics/civis-python.git 45 | 46 | # For Civis employees -- git uses `origin` by default, so change it into `upstream` 47 | git remote rename origin upstream 48 | ``` 49 | 50 | 5. Install civis-python in the editable mode, and install the development dependencies as well. 51 | 52 | ```bash 53 | cd civis-python 54 | pip install -e ".[dev-core,dev-civisml]" 55 | ``` 56 | 57 | ## Making Changes 58 | 59 | Follow these steps each time you plan to make a pull request to civis-python: 60 | 61 | 1. At your local civis-python copy, make sure the `main` branch is in sync with the 62 | `main` at the upstream repo (`git checkout main && git pull upstream main`). 63 | 2. Make sure you are able to run the test suite locally (`pytest civis`). 64 | 3. Create a feature branch (`git checkout -b my-new-feature`). 65 | 4. Make your change. Don't forget adding or updating tests (under `tests/`). 66 | 5. Make sure the test suite, including your new tests, passes 67 | (`pytest && flake8 src tools tests && black --check src tools tests`). 68 | 6. Commit your changes (`git commit -am 'Add some feature'`). 69 | 7. Push to a branch on GitHub: 70 | 71 | ```bash 72 | # For non-Civis employees -- your branch will be at your fork 73 | git push origin my-new-feature 74 | 75 | # For Civis employees -- your branch will be at the upstream repo 76 | git push upstream my-new-feature 77 | ``` 78 | 79 | 8. Create a new pull request on the GitHub interface. 80 | A civis-python maintainer will be automatically notified and start the code review process. 81 | 9. If the build fails, address any issues. 82 | 83 | ## Tips 84 | 85 | - All pull requests must include test coverage. If you’re not sure how to test 86 | your changes, feel free to ask for help. 87 | - Contributions must conform to the guidelines encoded by `flake8`, based on 88 | PEP-8. 89 | - Don’t forget to add your change to the [CHANGELOG](CHANGELOG.md). See 90 | [Keep a CHANGELOG](http://keepachangelog.com/) for guidelines. 91 | 92 | Thank you for taking the time to contribute! 93 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Civis Analytics 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGELOG.md 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Civis API Python Client 2 | ======================= 3 | 4 | .. start-include-marker-introductory-paragraph 5 | 6 | |PyPI| |PyVersions| |CircleCI| |Documentation| 7 | 8 | .. |CircleCI| image:: https://circleci.com/gh/civisanalytics/civis-python.svg?style=shield 9 | :target: https://circleci.com/gh/civisanalytics/civis-python 10 | :alt: CircleCI build status 11 | 12 | .. |PyPI| image:: https://img.shields.io/pypi/v/civis.svg 13 | :target: https://pypi.org/project/civis/ 14 | :alt: Latest version on PyPI 15 | 16 | .. |PyVersions| image:: https://img.shields.io/pypi/pyversions/civis.svg 17 | :target: https://pypi.org/project/civis/ 18 | :alt: Supported python versions for civis-python 19 | 20 | .. |Documentation| image:: https://readthedocs.org/projects/civis-python/badge/?version=latest 21 | :target: https://civis-python.readthedocs.io/en/latest/?badge=latest 22 | :alt: Documentation Status 23 | 24 | The Civis API Python client is a Python package that helps analysts 25 | and developers interact with Civis Platform programmatically. The package includes a set of 26 | tools around common workflows as well as a convenient interface to make 27 | requests directly to the Civis API. 28 | 29 | .. end-include-marker-introductory-paragraph 30 | 31 | Please see the 32 | `full documentation `_ for more details. 33 | 34 | .. start-include-marker-api-keys-section 35 | 36 | API Keys 37 | -------- 38 | 39 | In order to make requests to the Civis API, 40 | you will need a Civis Platform API key that is unique to you. 41 | Instructions for creating a new key are found 42 | `here `_. 43 | API keys have a set expiration date and new keys will need to be created at 44 | least every 30 days. The API client will look for a ``CIVIS_API_KEY`` 45 | environment variable to access your API key, so after creating a new API key, 46 | follow the steps below for your operating system to set up your environment. 47 | 48 | Linux / MacOS 49 | ~~~~~~~~~~~~~ 50 | 51 | 1. Add the following to your shell configuration file (``~/.zshrc`` for MacOS or ``~/.bashrc`` for Linux, by default):: 52 | 53 | export CIVIS_API_KEY="alphaNumericApiK3y" 54 | 55 | 2. Source your shell configuration file (or restart your terminal). 56 | 57 | Windows 58 | ~~~~~~~ 59 | 60 | 1. Navigate to "Settings" -> type "environment" in search bar -> 61 | "Edit environment variables for your account". This can also be found 62 | in "System Properties" -> "Advanced" -> "Environment Variables...". 63 | 2. In the user variables section, if ``CIVIS_API_KEY`` already exists in 64 | the list of environment variables, click on it and press "Edit...". 65 | Otherwise, click "New..". 66 | 3. Enter CIVIS_API_KEY as the "Variable name". 67 | 4. Enter your API key as the "Variable value". Your API key should look 68 | like a long string of letters and numbers. 69 | 70 | .. end-include-marker-api-keys-section 71 | 72 | .. start-include-marker-installation-section 73 | 74 | Installation 75 | ------------ 76 | 77 | After creating an API key and setting the ``CIVIS_API_KEY`` environment 78 | variable, install the Python package ``civis`` with the recommended method via ``pip``:: 79 | 80 | pip install civis 81 | 82 | Alternatively, if you are interested in the latest functionality not yet released through ``pip``, 83 | you may clone the code from GitHub and build from source (``git`` assumed to be available): 84 | 85 | .. code-block:: bash 86 | 87 | pip install git+https://github.com/civisanalytics/civis-python.git 88 | 89 | You can test your installation by running 90 | 91 | .. code-block:: python 92 | 93 | import civis 94 | client = civis.APIClient() 95 | print(client.users.list_me()['username']) 96 | 97 | If ``civis`` was installed correctly, this will print your Civis 98 | Platform username. 99 | 100 | The client has a soft dependency on ``pandas`` to support features such as 101 | data type parsing. If you are using the ``io`` namespace to read or write 102 | data from Civis, it is highly recommended that you install ``pandas`` and 103 | set ``use_pandas=True`` in functions that accept that parameter. To install 104 | ``pandas``: 105 | 106 | .. code-block:: bash 107 | 108 | pip install pandas 109 | 110 | Machine learning features in the ``ml`` namespace have a soft dependency on 111 | ``scikit-learn`` and ``pandas``. Install ``scikit-learn`` to 112 | export your trained models from the Civis Platform or to 113 | provide your own custom models. Use ``pandas`` to download model predictions 114 | from the Civis Platform. The ``civis.ml`` code 115 | optionally uses the `feather `_ 116 | format to transfer data from your local computer to Civis 117 | Platform. Install these dependencies with 118 | 119 | .. code-block:: bash 120 | 121 | pip install scikit-learn 122 | pip install pandas 123 | pip install feather-format 124 | 125 | 126 | Some CivisML models have open-source dependencies in 127 | addition to ``scikit-learn``, which you may need if you want to 128 | download the model object. These dependencies are 129 | ``civisml-extensions``, ``glmnet``, and ``muffnn``. Install these 130 | dependencies with 131 | 132 | .. code-block:: bash 133 | 134 | pip install civisml-extensions 135 | pip install glmnet 136 | pip install muffnn 137 | 138 | .. end-include-marker-installation-section 139 | 140 | Usage 141 | ----- 142 | 143 | ``civis`` includes a number of wrappers around the Civis API for 144 | common workflows. 145 | 146 | .. code-block:: python 147 | 148 | import civis 149 | df = civis.io.read_civis(table="my_schema.my_table", 150 | database="database", 151 | use_pandas=True) 152 | 153 | The Civis API may also be directly accessed via the ``APIClient`` class. 154 | 155 | .. code-block:: python 156 | 157 | import civis 158 | client = civis.APIClient() 159 | database = client.databases.list() 160 | 161 | See the `documentation `_ for a more 162 | complete user guide. 163 | 164 | 165 | Building Documentation 166 | ---------------------- 167 | 168 | Background: 169 | 170 | * We use the Sphinx framework. The documentation source files are in ``docs/``. 171 | * All auto-generated files, including the HTML pages, are explicitly not versioned 172 | (see ``.gitignore``). 173 | 174 | For the public documentation at https://civis-python.readthedocs.io: 175 | 176 | * The doc build is configured by ``.readthedocs.yaml``. 177 | Normally, even when we need to update the documentation or make a new release of civis-python, 178 | neither this configuration YAML file nor Civis's account on the Read the Docs site need 179 | any updates. 180 | * To update the documentation, the files under ``docs/`` can be updated as needed. 181 | If the "API Resources" pages need to be updated because the upstream Civis API has been updated, 182 | then the following need to happen: 183 | (i) the new Civis API updates must be accessible by a "standard" Civis Platform user, 184 | i.e., not behind a feature flag, and 185 | (ii) you'll need to locally run ``python tools/update_civis_api_spec.py`` to update 186 | ``civis_api_spec.json`` inside the ``civis`` Python package codebase. 187 | It is this JSON file that's the basis for the Civis API information on the "API Resources" pages. 188 | Regardless of which Civis API key you use to run ``python tools/update_civis_api_spec.py``, 189 | the updated ``civis_api_spec.json`` only contains Civis API information available to 190 | a standard Civis Platform user. 191 | * The URL https://civis-python.readthedocs.io auto-redirects to 192 | the "stable" URL https://civis-python.readthedocs.io/en/stable/ which reflects 193 | the most recent released civis-python version 194 | (every GitHub release with the tag "vX.Y.Z" triggers a new "stable" doc build 195 | on the Read The Docs site). 196 | In contrast, the "latest" URL https://civis-python.readthedocs.io/en/latest/ reflects 197 | the most recent commit to the upstream ``main`` branch of the civis-python codebase on GitHub. 198 | If there are doc changes (e.g., new or removed Civis API methods) that we'd really like to 199 | show up at the "stable" URL sooner rather than waiting for the next release with other code changes, 200 | we can make a patch release (i.e., increment the "Z" in "vX.Y.Z"). 201 | 202 | The doc build has its full dependencies listed in ``docs/requirements.txt``. 203 | To update this file: 204 | 205 | * Install the latest version of ``pip-tools``: ``pip install --upgrade pip-tools``. 206 | * Run the ``pip-compile`` command at the top of ``docs/requirements.txt``, with the flag 207 | ``--upgrade`` added to upgrade all transitive dependencies as well. 208 | 209 | To build the documentation locally, for testing and development: 210 | 211 | * Install the full doc-related dependencies: ``pip install -r docs/requirements.txt``. 212 | * Run ``sphinx-build -b html docs docs/build``. 213 | In case you would like for the "API Resources" page to locally show what a specific 214 | Civis Platform user would see from the Civis API 215 | (rather than use the available ``civis_api_spec.json`` for a standard Civis Platform user), 216 | set the environment variable ``CIVIS_API_KEY`` to this user's key 217 | and prepend this command with ``FETCH_REMOTE_RESOURCES=true``. 218 | 219 | 220 | Command-line Interface (CLI) 221 | ---------------------------- 222 | 223 | After installing the Python package, you'll also have a ``civis`` command accessible from your shell. It surfaces a commandline interface to all of the regular Civis API endpoints, plus a few helpers. To get started, run ``civis --help``. 224 | Please see the `CLI documentation `_ for more details. 225 | 226 | 227 | Contributing 228 | ------------ 229 | 230 | See `CONTRIBUTING.md `_ for information about contributing to this project. 231 | 232 | 233 | License 234 | ------- 235 | 236 | BSD-3 237 | 238 | See `LICENSE.md `_ for details. 239 | 240 | 241 | For Maintainers 242 | --------------- 243 | 244 | The `tools `_ directory contains scripts that civis-python maintainers can 245 | use (and maintain...). Please see their docstrings for usage. 246 | Non-public information can be found by searching the internal documentation system 247 | or consulting the current maintainers. 248 | -------------------------------------------------------------------------------- /docs/_static/civis.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/civisanalytics/civis-python/899fdf5eb470d36a473842242e6b22011b7ec071/docs/_static/civis.ico -------------------------------------------------------------------------------- /docs/_static/civis.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 9 | civis-logo-color-rgb 10 | Created with Sketch. 11 | 12 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* Background color behind the Civis logo at the top-left corner. */ 2 | .wy-side-nav-search { 3 | background: #22556F; 4 | } 5 | 6 | /* In the API resource pages, use less vertical space for readability, especially for long methods. */ 7 | dl.py.method dl.simple dd { 8 | margin-bottom: 2px; 9 | } 10 | dl.py.method dl.simple dt { 11 | margin-bottom: 2px; 12 | } 13 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface 2 | ====================== 3 | 4 | A command line interface (CLI) to Civis is provided. This can be invoked by 5 | typing the command ``civis`` in the shell (sh, bash, zsh, etc.). It can also 6 | be used in Civis container scripts where the Docker image has this client 7 | installed. Here's a simple example of printing the types of scripts. 8 | 9 | .. code-block:: bash 10 | 11 | > civis scripts list-types 12 | - name: sql 13 | - name: python3 14 | - name: javascript 15 | - name: r 16 | - name: containers 17 | 18 | Not all API endpoints are available through the CLI since some take complex 19 | data types (e.g., arrays, objects/dictionaries) as input. However, 20 | functionality is available for getting information about scripts, logs, etc., 21 | as well as executing already created scripts. 22 | 23 | The default output format is YAML, but the ``--json-output`` allows you to 24 | get output in JSON. 25 | 26 | You can find out more information about a command by adding a ``--help`` option, 27 | like ``civis scripts list --help``. 28 | 29 | The logging level of the CLI can be configured by setting the ``CIVIS_LOG_LEVEL`` 30 | environment variable, e.g., ``CIVIS_LOG_LEVEL=DEBUG civis users list-me``. 31 | 32 | Files 33 | ----- 34 | 35 | There are a few extra, CLI-only commands that wrap the Files API 36 | endpoints to make uploading and downloading files easier: 37 | 38 | - ``civis files upload $PATH`` 39 | 40 | - ``civis files download $FILEID $PATH`` 41 | 42 | Job Logs 43 | -------- 44 | 45 | These commands show job run logs in the format: "datetime message\\n" where 46 | datetime is in ISO8601 format, like "2020-02-14T20:28:18.722Z". 47 | If the job is still running, this command will continue outputting logs 48 | until the run is done and then exit. If the run is already finished, it 49 | will output all the logs from that run and then exit. 50 | 51 | NOTE: These commands could miss some log entries from a currently-running 52 | job. It does not re-fetch logs that might have been saved out of order, to 53 | preserve the chronological order of the logs and without duplication. 54 | 55 | - ``civis jobs follow-log $JOB_ID`` 56 | 57 | Output live log from the most recent job run for the given job ID. 58 | 59 | - ``civis jobs follow-run-log $JOB_ID $RUN_ID`` 60 | 61 | Output live log from the given job and run ID. 62 | 63 | Notebooks 64 | --------- 65 | 66 | The following CLI-only commands make it easier to use Civis Platform as a 67 | backend for your Jupyter notebooks. 68 | 69 | - ``civis notebooks download $NOTEBOOK_ID $PATH`` 70 | 71 | Download a notebook from Civis Platform to the requested file on the local filesystem. 72 | 73 | - ``civis notebooks new [$LANGUAGE] [--mem $MEMORY] [--cpu $CPU]`` 74 | 75 | Create a new notebook, allocate resources for it, and open it in a tab 76 | of your default web browser. This command is the most similar to ``jupyter notebook``. 77 | By default, Civis Platform will create a Python 3 notebook, but you can 78 | request any other language. Optional resource parameters let you allocate 79 | more memory or CPU to your notebook. 80 | 81 | - ``civis notebooks up $NOTEBOOK_ID [--mem $MEMORY] [--cpu $CPU]`` 82 | 83 | Allocate resources for a notebook which already exists in Civis Platform 84 | and open it in a tab of your default browser. Optional resource 85 | arguments allow you to change resources allocated to your notebook 86 | (default to using the same resources as the previous run). 87 | 88 | - ``civis notebooks down $NOTEBOOK_ID`` 89 | 90 | Stop a running notebook and free up the resources allocated to it. 91 | 92 | - ``civis notebooks open $NOTEBOOK_ID`` 93 | 94 | Open an existing notebook (which may or may not be running) in your default browser. 95 | 96 | SQL 97 | --- 98 | 99 | The Civis CLI allows for easy running of SQL queries on Civis Platform 100 | through the following commands: 101 | 102 | - ``civis sql [-n $MAX_LINES] -d $DATABASE_NAME -f $FILE_NAME`` 103 | 104 | Read a SQL query from a text file and run it on the specified database. 105 | The results of the query, if any, will be shown after it completes 106 | (up to a maximum of $MAX_LINES rows, defaulting to 100). 107 | 108 | - ``civis sql [-n $MAX_LINES] -d $DATABASE_NAME -c [$SQL_QUERY]`` 109 | 110 | Instead of reading from a file, read query text from a command line 111 | argument. If you do not provide a query on the command line, 112 | the query text will be taken from stdin. 113 | 114 | - ``civis sql -d $DATABASE_NAME [-f $SQL_FILE_NAME] -o $OUTPUT_FILE_NAME`` 115 | 116 | With the `-o` or `--output` option specified, the complete results 117 | of the query will be downloaded to a CSV file at the requested location 118 | after the query completes. 119 | -------------------------------------------------------------------------------- /docs/client.rst: -------------------------------------------------------------------------------- 1 | API Client 2 | ========== 3 | 4 | :class:`~civis.APIClient` is a class for handling requests to the Civis API. 5 | An instantiated :class:`~civis.APIClient` contains a set of resources 6 | (listed in :ref:`api_resources`) where each resource is an object with methods. By convention, 7 | an instantiated :class:`~civis.APIClient` object is named ``client`` and API 8 | requests are made with the following syntax: 9 | 10 | .. code-block:: python 11 | 12 | client = civis.APIClient() 13 | response = client.resource.method(params) 14 | 15 | 16 | .. toctree:: 17 | :maxdepth: 1 18 | 19 | api_resources 20 | responses 21 | 22 | 23 | Dynamically Created Resources and Methods 24 | ----------------------------------------- 25 | 26 | The methods on :class:`~civis.APIClient` are created dynamically at runtime 27 | by parsing an :class:`python:collections.OrderedDict` representation of the 28 | Civis API specification. 29 | The methods are generated based on the path and HTTP method used with each 30 | endpoint. For example, ``GET /workflows/1`` can be accessed with 31 | ``client.workflows.get(1)``. ``GET`` endpoints that don’t end in a parameter 32 | use a ``list`` method instead. 33 | Below are examples of endpoints and how they map to API Client methods: 34 | 35 | +-----------------------------------+-------------------------------------------+ 36 | | Endpoint | API Client Method | 37 | +===================================+===========================================+ 38 | | ``GET /workflows`` | ``client.workflows.list()`` | 39 | +-----------------------------------+-------------------------------------------+ 40 | | ``GET /workflows/1`` | ``client.workflows.get(1)`` | 41 | +-----------------------------------+-------------------------------------------+ 42 | | ``GET /workflows/1/executions`` | ``client.workflows.list_executions(1)`` | 43 | +-----------------------------------+-------------------------------------------+ 44 | | ``PATCH /workflows/1`` | ``client.workflows.patch(1, ...)`` | 45 | +-----------------------------------+-------------------------------------------+ 46 | | ``POST /workflows/1/executions`` | ``client.workflows.post_executions(1)`` | 47 | +-----------------------------------+-------------------------------------------+ 48 | | ``GET /workflows/1/executions/2`` | ``client.workflows.get_executions(1, 2)`` | 49 | +-----------------------------------+-------------------------------------------+ 50 | 51 | If your code editor has auto-completion functionality (as many heavy IDEs do), 52 | typing ``client.`` or ``client.workflows.`` should trigger the display of 53 | the available resources or methods, respectively. 54 | If you're running Python interactively 55 | (e.g., the regular Python interactive shell, IPython, or a Jupyter notebook), 56 | Python's built-in ``help`` function can be used to see lists of 57 | available endpoints for a resource (e.g., ``help(client.workflows)``) or to get 58 | documentation for a specific endpoint function (e.g., 59 | ``help(client.workflows.list)``). The ``?`` operator in IPython (e.g., ``?client.workflows``) and the ``shift-tab`` 60 | hotkey in a Jupyter notebook also cause documentation to be displayed. 61 | 62 | By default, the Civis API specification is downloaded from 63 | the ``/endpoints`` endpoint the first time an :class:`~civis.APIClient` object is 64 | instantiated. 65 | To reduce overhead due to re-downloading the same API spec multiple times when multiple 66 | ``client`` instances are created, this API spec is cached in memory for a set amount of time. 67 | If you're running Python interactively 68 | (e.g., the regular Python interactive shell, IPython, or a Jupyter notebook), 69 | the cached spec expires in 15 minutes, 70 | and if you're running a script, the spec expires in 24 hours. 71 | When a cached spec expires and a new ``client`` instance is created, 72 | a new spec is downloaded from the Civis API 73 | (so that updates to the Civis API, if any, are available to the new ``client``). 74 | If you want to force a new spec to be downloaded, you can pass 75 | ``force_refresh_api_spec=True`` to the :class:`~civis.APIClient` constructor. 76 | Note that for a given :class:`~civis.APIClient` object, the auto-generated resources and methods 77 | attached to it are never refreshed, even if the Civis API is updated during the lifetime of this object. 78 | 79 | In some circumstances, it may be useful to use a local cache of the API 80 | specification rather than downloading the spec. This can be done by passing 81 | the specification to the client through the parameter ``local_api_spec`` as 82 | either the :class:`python:collections.OrderedDict` or a filename where the 83 | specification has been saved. 84 | 85 | .. code-block:: python 86 | 87 | api_key = os.environ['CIVIS_API_KEY'] 88 | spec = civis.resources.get_api_spec(api_key) 89 | 90 | # From OrderedDict 91 | client = civis.APIClient(local_api_spec=spec) 92 | 93 | # From file 94 | with open('local_api_spec.json', 'w') as f: 95 | json.dump(spec, f) 96 | client = civis.APIClient(local_api_spec='local_api_spec.json') 97 | 98 | 99 | .. _retries: 100 | 101 | Retries 102 | ------- 103 | 104 | The API client will automatically retry for certain API error responses. 105 | 106 | If the error is one of [413, 429, 503] and the API client is told how long it needs 107 | to wait before it's safe to retry (this is always the case with 429s, which are 108 | rate limit errors), then the client will wait the specified amount of time 109 | before retrying the request. 110 | 111 | If the error is one of [429, 502, 503, 504] and the request is not a ``patch*`` or ``post*`` 112 | method, then the API client will retry the request several times, with an exponential delay, 113 | to see if it will succeed. If the request is of type ``post*`` it will retry with the same parameters 114 | for error codes [429, 503]. 115 | 116 | While the conditions under which retries are attempted are set as described above, 117 | the behavior of the retries is customizable by passing in a :class:`tenacity.Retrying` instance 118 | to the ``retries`` kwarg of :class:`civis.APIClient`. 119 | 120 | 121 | Object Reference 122 | ---------------- 123 | 124 | .. currentmodule:: civis 125 | 126 | .. autoclass:: civis.APIClient 127 | :members: 128 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Civis Client documentation master file 2 | 3 | Civis API Python Client 4 | ======================= 5 | 6 | .. meta:: 7 | :description: 8 | The Civis API Python Client provides an interface in Python to interact with Civis Platform programmatically. 9 | :keywords: 10 | civis, civis platform, civis analytics, civis api, civis python, 11 | data science, data engineering, data analysis, data analytics, machine learning 12 | 13 | .. include:: ../README.rst 14 | :start-after: start-include-marker-introductory-paragraph 15 | :end-before: end-include-marker-introductory-paragraph 16 | 17 | 18 | .. include:: ../README.rst 19 | :start-after: start-include-marker-api-keys-section 20 | :end-before: end-include-marker-api-keys-section 21 | 22 | 23 | .. include:: ../README.rst 24 | :start-after: start-include-marker-installation-section 25 | :end-before: end-include-marker-installation-section 26 | 27 | 28 | User Guide 29 | ---------- 30 | 31 | For a more detailed walkthrough, see the :ref:`user_guide`. 32 | 33 | 34 | Table of Contents 35 | ----------------- 36 | 37 | .. toctree:: 38 | :maxdepth: 1 39 | 40 | user_guide 41 | io 42 | ml 43 | parallel 44 | client 45 | cli 46 | utils 47 | 48 | 49 | Indices and tables 50 | ------------------ 51 | 52 | * :ref:`genindex` 53 | * :ref:`search` 54 | -------------------------------------------------------------------------------- /docs/io.rst: -------------------------------------------------------------------------------- 1 | Data Import and Export 2 | ====================== 3 | 4 | The ``civis.io`` namespace provides several functions for moving data in and 5 | out of Civis. 6 | 7 | Tables 8 | ------ 9 | 10 | Often, your data will be in structured format like a table in a relational 11 | database, a CSV, or a dataframe. The following functions handle moving 12 | structured data to and from Civis. When using these functions, it is 13 | recommended to have ``pandas`` installed and to pass ``use_pandas=True`` in 14 | the appropriate functions. If ``pandas`` is not installed, data returned 15 | from Civis will all be treated as strings. 16 | 17 | .. currentmodule:: civis.io 18 | 19 | .. autosummary:: 20 | :toctree: generated 21 | 22 | civis_to_csv 23 | civis_to_multifile_csv 24 | civis_file_to_table 25 | csv_to_civis 26 | dataframe_to_civis 27 | read_civis 28 | read_civis_sql 29 | export_to_civis_file 30 | split_schema_tablename 31 | 32 | Files 33 | ----- 34 | 35 | These functions will pass flat files to and from Civis. This is useful 36 | if you have data stored in binary or JSON format. Any type of file can 37 | be stored in platform via the files endpoint. 38 | 39 | .. currentmodule:: civis.io 40 | 41 | .. autosummary:: 42 | :toctree: generated 43 | 44 | civis_to_file 45 | dataframe_to_file 46 | file_id_from_run_output 47 | file_to_civis 48 | file_to_dataframe 49 | file_to_json 50 | json_to_file 51 | 52 | Databases 53 | --------- 54 | 55 | These functions move data from one database to another and expose an interface 56 | to run SQL in the database. Use :func:`~civis.io.query_civis` when you need to 57 | execute SQL that does not return data (for example, a ``GRANT`` or 58 | ``DROP TABLE`` statement). 59 | 60 | .. currentmodule:: civis.io 61 | 62 | .. autosummary:: 63 | :toctree: generated 64 | 65 | transfer_table 66 | query_civis 67 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.12 3 | # by the following command: 4 | # 5 | # pip-compile --extra=docs --output-file=docs/requirements.txt pyproject.toml 6 | # 7 | alabaster==1.0.0 8 | # via sphinx 9 | attrs==25.2.0 10 | # via 11 | # jsonschema 12 | # referencing 13 | babel==2.17.0 14 | # via sphinx 15 | certifi==2025.1.31 16 | # via requests 17 | charset-normalizer==3.4.1 18 | # via requests 19 | click==8.1.8 20 | # via civis (pyproject.toml) 21 | cloudpickle==3.1.1 22 | # via civis (pyproject.toml) 23 | docutils==0.21.2 24 | # via 25 | # sphinx 26 | # sphinx-rtd-theme 27 | idna==3.10 28 | # via requests 29 | imagesize==1.4.1 30 | # via sphinx 31 | jinja2==3.1.6 32 | # via sphinx 33 | joblib==1.4.2 34 | # via civis (pyproject.toml) 35 | jsonref==1.1.0 36 | # via civis (pyproject.toml) 37 | jsonschema==4.23.0 38 | # via civis (pyproject.toml) 39 | jsonschema-specifications==2024.10.1 40 | # via jsonschema 41 | markupsafe==3.0.2 42 | # via jinja2 43 | numpydoc==1.8.0 44 | # via civis (pyproject.toml) 45 | packaging==24.2 46 | # via sphinx 47 | pygments==2.19.1 48 | # via sphinx 49 | pyyaml==6.0.2 50 | # via civis (pyproject.toml) 51 | referencing==0.36.2 52 | # via 53 | # jsonschema 54 | # jsonschema-specifications 55 | requests==2.32.3 56 | # via 57 | # civis (pyproject.toml) 58 | # sphinx 59 | rpds-py==0.23.1 60 | # via 61 | # jsonschema 62 | # referencing 63 | snowballstemmer==2.2.0 64 | # via sphinx 65 | sphinx==8.1.3 66 | # via 67 | # civis (pyproject.toml) 68 | # numpydoc 69 | # sphinx-rtd-theme 70 | # sphinxcontrib-jquery 71 | sphinx-rtd-theme==3.0.2 72 | # via civis (pyproject.toml) 73 | sphinxcontrib-applehelp==2.0.0 74 | # via sphinx 75 | sphinxcontrib-devhelp==2.0.0 76 | # via sphinx 77 | sphinxcontrib-htmlhelp==2.1.0 78 | # via sphinx 79 | sphinxcontrib-jquery==4.1 80 | # via sphinx-rtd-theme 81 | sphinxcontrib-jsmath==1.0.1 82 | # via sphinx 83 | sphinxcontrib-qthelp==2.0.0 84 | # via sphinx 85 | sphinxcontrib-serializinghtml==2.0.0 86 | # via sphinx 87 | tabulate==0.9.0 88 | # via numpydoc 89 | tenacity==9.0.0 90 | # via civis (pyproject.toml) 91 | typing-extensions==4.12.2 92 | # via referencing 93 | urllib3==2.3.0 94 | # via requests 95 | -------------------------------------------------------------------------------- /docs/responses.rst: -------------------------------------------------------------------------------- 1 | .. _responses: 2 | 3 | Responses 4 | ========= 5 | 6 | A Civis API call from ``client..`` returns a :class:`civis.Response` object 7 | (or a :class:`civis.PaginatedResponse` object, if ```` is a "list" call with ``iterator=True``): 8 | 9 | .. code-block:: python 10 | 11 | >>> import civis 12 | >>> client = civis.APIClient() 13 | >>> response = client.scripts.get(12345) 14 | >>> response 15 | Response({'id': 12345, 16 | 'name': 'some script name', 17 | 'created_at': '2018-06-11T20:43:07.000Z', 18 | 'updated_at': '2018-06-11T20:43:19.000Z', 19 | 'author': Response({'id': 67890, 20 | 'name': 'Platform User Name', 21 | 'username': 'platformusername', 22 | 'initials': 'PUN', 23 | 'online': False}), 24 | ... 25 | 26 | To retrieve information from a :class:`civis.Response` object, 27 | use the attribute syntax: 28 | 29 | .. code-block:: python 30 | 31 | >>> response.id 32 | 12345 33 | >>> response.name 34 | 'some script name' 35 | >>> response.author 36 | Response({'id': 67890, 37 | 'name': 'Platform User Name', 38 | 'username': 'platformusername', 39 | 'initials': 'PUN', 40 | 'online': False}) 41 | >>> response.author.username 42 | 'platformusername' 43 | 44 | :class:`civis.APIClient` is type-annotated for the returned :class:`civis.Response` object 45 | of a given Civis API endpoint's method, including the expected attributes. 46 | These type annotations facilitate code development and testing: 47 | 48 | * If your IDE has auto-complete support, typing ``response.`` from the example above 49 | prompts possible attributes ``{id, name, author, ...}``. 50 | * Type checking (by tools such as ``mypy``) in test suites and continuous integration 51 | helps to catch issues such as typos and unexpected attributes. 52 | 53 | Alternatively, the "getitem" syntax can also be used: 54 | 55 | .. code-block:: python 56 | 57 | >>> response['id'] 58 | 12345 59 | >>> response['author'] 60 | Response({'id': 67890, 61 | 'name': 'Platform User Name', 62 | 'username': 'platformusername', 63 | 'initials': 'PUN', 64 | 'online': False}) 65 | 66 | Although the "getitem" syntax would lose the benefits of the attribute syntax 67 | listed above, the "getitem" syntax is more user-friendly when an attribute name 68 | is available programmatically, 69 | e.g., ``response[foo]`` versus ``getattr(response, foo)``. 70 | 71 | Note that :class:`civis.Response` objects are read-only. 72 | If you need to modify information from a response object, 73 | call :func:`civis.Response.json` to get a dictionary representation of the response object. 74 | You can then modify this dictionary as needed: 75 | 76 | .. code-block:: python 77 | 78 | >>> response.arguments = ... # !!! Raises CivisImmutableResponseError 79 | >>> response['arguments'] = ... # !!! Raises CivisImmutableResponseError 80 | >>> 81 | >>> response_json = response.json() 82 | >>> response_json['arguments'] = {'new_arg_for_a_similar_script': 'some_value'} 83 | >>> # use response_json downstream, e.g., to create a new Civis Platform script 84 | 85 | Response Types 86 | -------------- 87 | 88 | .. autoclass:: civis.Response 89 | :members: 90 | 91 | .. autoclass:: civis.PaginatedResponse 92 | :members: 93 | 94 | .. autoclass:: civis.futures.CivisFuture 95 | :members: 96 | 97 | Helper Functions 98 | ---------------- 99 | 100 | .. autofunction:: civis.find 101 | .. autofunction:: civis.find_one 102 | -------------------------------------------------------------------------------- /docs/utils.rst: -------------------------------------------------------------------------------- 1 | Running Jobs and Templates 2 | ========================== 3 | 4 | The ``civis.utils`` namespace provides several functions for running jobs 5 | and templates on the Civis Platform. 6 | 7 | .. currentmodule:: civis.utils 8 | 9 | .. autosummary:: 10 | :toctree: generated 11 | 12 | run_job 13 | run_template 14 | job_logs -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 69.5.1", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "civis" 7 | version = "2.5.0" 8 | description = "Civis API Python Client" 9 | readme = "README.rst" 10 | requires-python = ">= 3.10" 11 | authors = [ { name = "Civis Analytics", email = "opensource@civisanalytics.com" } ] 12 | license = { text = "BSD-3-Clause" } 13 | dependencies = [ 14 | "click >= 6.0", 15 | "cloudpickle >= 0.2", 16 | "joblib >= 1.3.0", 17 | "jsonref >= 0.1", 18 | "jsonschema >= 2.5.1", 19 | "PyYAML >= 3.0", 20 | "requests >= 2.32.3", 21 | "tenacity >= 6.2", 22 | ] 23 | classifiers = [ 24 | "Development Status :: 5 - Production/Stable", 25 | "License :: OSI Approved :: BSD License", 26 | "Programming Language :: Python", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3 :: Only", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | "Programming Language :: Python :: 3.12", 32 | "Programming Language :: Python :: 3.13", 33 | ] 34 | 35 | [project.urls] 36 | Homepage = "https://www.civisanalytics.com" 37 | Source = "https://github.com/civisanalytics/civis-python" 38 | 39 | [project.scripts] 40 | civis = "civis.cli.__main__:main" 41 | civis_joblib_worker = "civis.run_joblib_func:main" 42 | 43 | [project.optional-dependencies] 44 | dev-core = [ 45 | "bandit", # Install the latest version. 46 | "black == 24.10.0", 47 | "build == 1.2.1", 48 | "flake8 == 7.1.1", 49 | "pandas == 2.2.3", 50 | "pip-audit", # Install the latest version. 51 | "pytest == 8.3.3", 52 | "pytest-cov == 5.0.0", 53 | "twine == 5.1.1", 54 | ] 55 | dev-civisml = [ 56 | "feather-format == 0.4.1", 57 | "numpy == 2.1.2", 58 | "scikit-learn == 1.5.2", 59 | "scipy == 1.14.1", 60 | ] 61 | docs = [ 62 | # docs/requirements.txt pins all transitive dependencies for a reproducible doc build. 63 | "numpydoc == 1.8.0", 64 | "Sphinx == 8.1.3", 65 | "sphinx-rtd-theme == 3.0.2", 66 | ] 67 | 68 | [tool.setuptools.packages.find] 69 | where = [ "src" ] 70 | 71 | [tool.setuptools.package-data] 72 | civis = ["resources/*.json", "py.typed", "**/*.pyi"] 73 | 74 | [tool.pytest.ini_options] 75 | addopts = "--strict-markers -vv --cov=src/civis" 76 | testpaths = ["tests"] 77 | -------------------------------------------------------------------------------- /src/civis/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import sys 3 | from importlib.metadata import version 4 | from typing import TYPE_CHECKING 5 | 6 | from civis.client import APIClient 7 | from civis.loggers import civis_logger 8 | from civis.response import find, find_one, Response, PaginatedResponse 9 | from civis.service_client import ServiceClient 10 | 11 | 12 | try: 13 | # __sphinx_build__ is injected into bulitins in docs/conf.py. 14 | if __sphinx_build__: 15 | _BUILDING_SPHINX_DOC = True 16 | except NameError: 17 | _BUILDING_SPHINX_DOC = False 18 | 19 | 20 | def _lazy_import(name): 21 | # https://docs.python.org/3/library/importlib.html#implementing-lazy-imports 22 | spec = importlib.util.find_spec(name) 23 | loader = importlib.util.LazyLoader(spec.loader) 24 | spec.loader = loader 25 | module = importlib.util.module_from_spec(spec) 26 | sys.modules[name] = module 27 | loader.exec_module(module) 28 | return module 29 | 30 | 31 | if TYPE_CHECKING or _BUILDING_SPHINX_DOC: 32 | # Regularly loaded modules are needed for typing information and doc generation. 33 | from civis import futures, io, ml, parallel, utils, workflows 34 | else: 35 | futures = _lazy_import("civis.futures") 36 | io = _lazy_import("civis.io") 37 | ml = _lazy_import("civis.ml") 38 | parallel = _lazy_import("civis.parallel") 39 | utils = _lazy_import("civis.utils") 40 | workflows = _lazy_import("civis.workflows") 41 | 42 | __version__ = version("civis") 43 | __all__ = [ 44 | "__version__", 45 | "APIClient", 46 | "find", 47 | "find_one", 48 | "futures", 49 | "io", 50 | "civis_logger", 51 | "ml", 52 | "PaginatedResponse", 53 | "parallel", 54 | "Response", 55 | "ServiceClient", 56 | "utils", 57 | "workflows", 58 | ] 59 | -------------------------------------------------------------------------------- /src/civis/_camel_to_snake.py: -------------------------------------------------------------------------------- 1 | # The `camel_to_snake` function is used in multiple modules. 2 | # To avoid creating import overhead, it's defined in a separate module here 3 | # as opposed to in a module that itself has a fair amount of overhead. 4 | 5 | import re 6 | 7 | 8 | UNDERSCORER1 = re.compile(r"(.)([A-Z][a-z]+)") 9 | UNDERSCORER2 = re.compile("([a-z0-9])([A-Z])") 10 | 11 | 12 | def camel_to_snake(word): 13 | # https://gist.github.com/jaytaylor/3660565 14 | word = UNDERSCORER1.sub(r"\1_\2", word) 15 | return UNDERSCORER2.sub(r"\1_\2", word).lower() 16 | -------------------------------------------------------------------------------- /src/civis/_deprecation.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from inspect import signature 3 | import warnings 4 | 5 | 6 | def deprecate_param(version_removed, parameter_name, *additional_names): 7 | """Create a decorator which warns of parameter deprecation 8 | 9 | Use this to create a decorator which will watch for use of a 10 | deprecated parameter and issue a ``FutureWarning`` if the parameter 11 | is used. (Use a ``FutureWarning`` because Python does not display 12 | ``DeprecationWarning`` by default.) The decorator introspects the 13 | wrapped function's signature so that it catches both keyword 14 | and positional argument use. The default value of the parameter 15 | will not be affected. 16 | 17 | Parameters 18 | ---------- 19 | version_removed: str 20 | The version in which this parameter will no longer be an allowed 21 | input to the function, e.g. "v2.0.0". 22 | parameter_name: str 23 | The name of the parameter to be deprecated, as it appears in the 24 | function signature. 25 | *additional_names 26 | Use additional positional arguments to indicate multiple parameters 27 | to deprecate. 28 | 29 | Returns 30 | ------- 31 | A decorator function 32 | 33 | Raises 34 | ------ 35 | ValueError 36 | If the named parameter is not 37 | an argument of the wrapped function 38 | 39 | Examples 40 | -------- 41 | >>> @deprecate_param('v2.0.0', 'param2') 42 | ... def adder(param1, param2=0, param3=0): 43 | ... return param1 + param2 + param3 44 | >>> adder(1, 2, 3) 45 | /Users/username/src/civis-python/civis/utils/deprecation.py:68: 46 | FutureWarning: The "param2" parameter of "__main__.adder" is deprecated 47 | and will be removed in v2.0.0. 48 | FutureWarning) 49 | 6 50 | >>> adder(1, param3=13) 51 | 14 52 | """ 53 | all_names = [parameter_name] + list(additional_names) 54 | 55 | def decorator(func): 56 | # Introspect the wrapped function so that we can find 57 | # where the parameter is in the order of the function's inputs. 58 | # Signature.parameters is a subclass of OrderedDict. 59 | sig = signature(func) 60 | i_args = [] 61 | for name in all_names: 62 | if name not in sig.parameters: 63 | raise ValueError( 64 | '"{}" is not a parameter of ' 65 | "{}.".format(parameter_name, str(func)) 66 | ) 67 | i_args.append(list(sig.parameters.keys()).index(parameter_name)) 68 | 69 | @wraps(func) 70 | def wrapper(*args, **kwargs): 71 | warn_list = [] 72 | for name, i_arg in zip(all_names, i_args): 73 | # The len(args) check looks to see if the user has tried 74 | # to call the deprecated parameter as a positional argument. 75 | if len(args) > i_arg or name in kwargs: 76 | f_name = "{}.{}".format(func.__module__, func.__name__) 77 | msg = ( 78 | 'The "{}" parameter of "{}" is deprecated and ' 79 | "will be removed in {}.".format(name, f_name, version_removed) 80 | ) 81 | warn_list.append(msg) 82 | if warn_list: 83 | warnings.warn("\n".join(warn_list), FutureWarning) 84 | return func(*args, **kwargs) 85 | 86 | return wrapper 87 | 88 | return decorator 89 | -------------------------------------------------------------------------------- /src/civis/_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import tenacity 5 | from tenacity.wait import wait_base 6 | 7 | 8 | log = logging.getLogger(__name__) 9 | 10 | _RETRY_CODES = [429, 502, 503, 504] 11 | _RETRY_VERBS = ["HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"] 12 | _POST_RETRY_CODES = [429, 503] 13 | 14 | 15 | # Defining the default tenacity.Retrying as a user-friendly code string 16 | # so that it can be shown in civis.APIClient's docstring. 17 | DEFAULT_RETRYING_STR = """ 18 | tenacity.Retrying( 19 | wait=tenacity.wait_random_exponential(multiplier=2, max=60), 20 | stop=(tenacity.stop_after_delay(600) | tenacity.stop_after_attempt(10)), 21 | retry_error_callback=lambda retry_state: retry_state.outcome.result(), 22 | ) 23 | """ 24 | 25 | # Explicitly set the available globals and locals 26 | # to mitigate risk of unwanted code execution 27 | DEFAULT_RETRYING = eval( # nosec 28 | DEFAULT_RETRYING_STR, 29 | {"tenacity": tenacity, "__builtins__": {}}, # globals 30 | {}, # locals 31 | ) 32 | 33 | 34 | def get_api_key(api_key): 35 | """Pass-through if `api_key` is not None otherwise tries the CIVIS_API_KEY 36 | environment variable. 37 | """ 38 | if api_key is not None: # always prefer user given one 39 | return api_key 40 | api_key = os.environ.get("CIVIS_API_KEY", None) 41 | if api_key is None: 42 | raise EnvironmentError( 43 | "No Civis API key found. Please store in " 44 | "CIVIS_API_KEY environment variable" 45 | ) 46 | return api_key 47 | 48 | 49 | def retry_request(method, prepared_req, session, retrying=None): 50 | retry_conditions = None 51 | retrying = retrying if retrying else DEFAULT_RETRYING 52 | 53 | def _make_request(req, sess): 54 | """send the prepared session request""" 55 | response = sess.send(req) 56 | return response 57 | 58 | if method.upper() == "POST": 59 | retry_conditions = tenacity.retry_if_result( 60 | lambda res: res.status_code in _POST_RETRY_CODES 61 | ) 62 | elif method.upper() in _RETRY_VERBS: 63 | retry_conditions = tenacity.retry_if_result( 64 | lambda res: res.status_code in _RETRY_CODES 65 | ) 66 | 67 | if retry_conditions: 68 | retrying.retry = retry_conditions 69 | retrying.wait = wait_for_retry_after_header(fallback=retrying.wait) 70 | response = retrying(_make_request, prepared_req, session) 71 | return response 72 | 73 | response = _make_request(prepared_req, session) 74 | return response 75 | 76 | 77 | class wait_for_retry_after_header(wait_base): 78 | """Wait strategy that first looks for Retry-After header. If not 79 | present it uses the fallback strategy as the wait param""" 80 | 81 | def __init__(self, fallback): 82 | self.fallback = fallback 83 | 84 | def __call__(self, retry_state): 85 | # retry_state is an instance of tenacity.RetryCallState. 86 | # The .outcome property contains the result/exception 87 | # that came from the underlying function. 88 | result_headers = retry_state.outcome._result.headers 89 | retry_after = result_headers.get("Retry-After") or result_headers.get( 90 | "retry-after" 91 | ) 92 | 93 | try: 94 | log.info("Retrying after {} seconds".format(retry_after)) 95 | return int(retry_after) 96 | except (TypeError, ValueError): 97 | pass 98 | return self.fallback(retry_state) 99 | -------------------------------------------------------------------------------- /src/civis/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # This init file seems necessary to have _cli.py:main as an entry point. 2 | -------------------------------------------------------------------------------- /src/civis/cli/_cli_commands.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Additional commands to add to the CLI beyond the OpenAPI spec. 5 | """ 6 | import functools 7 | import os 8 | import sys 9 | 10 | import click 11 | import requests 12 | import webbrowser 13 | 14 | import civis 15 | from civis.io import file_to_civis, civis_to_file 16 | from civis.utils import job_logs 17 | 18 | 19 | # From http://patorjk.com/software/taag/#p=display&f=3D%20Diagonal&t=CIVIS 20 | _CIVIS_ASCII_ART = r""" 21 | ,----.. ,---, ,---, .--.--. 22 | / / \ ,`--.' | ,---.,`--.' | / / '. 23 | | : :| : : /__./|| : :| : /`. / 24 | . | ;. /: | ' ,---.; ; |: | '; | |--` 25 | . ; /--` | : |/___/ \ | || : || : ;_ 26 | ; | ; ' ' ;\ ; \ ' |' ' ; \ \ `. 27 | | : | | | | \ \ \: || | | `----. \ 28 | . | '___ ' : ; ; \ ' .' : ; __ \ \ | 29 | ' ; : .'|| | ' \ \ '| | ' / /`--' / 30 | ' | '/ :' : | \ ` ;' : |'--'. / 31 | | : / ; |.' : \ |; |.' `--'---' 32 | \ \ .' '---' '---" '---' 33 | `---` 34 | """ 35 | _FOLLOW_LOG_NOTE = """ 36 | 37 | Outputs job run logs in the format: "datetime message\\n" where 38 | datetime is in ISO8601 format, like "2020-02-14T20:28:18.722Z". 39 | If the job is still running, this command will continue outputting logs 40 | until the run is done and then exit. If the run is already finished, it 41 | will output all the logs from that run and then exit. 42 | 43 | NOTE: Log entries may appear our of order, particularly at the end of a run. 44 | """ 45 | 46 | 47 | @click.command("upload") 48 | @click.argument("path") 49 | @click.option( 50 | "--name", 51 | type=str, 52 | default=None, 53 | help="A name for the Civis File (defaults to the base file name", 54 | ) 55 | @click.option( 56 | "--expires-at", 57 | type=str, 58 | default=None, 59 | help=( 60 | "The date and time the file will expire " 61 | '(ISO-8601 format, e.g., "2017-01-15" or ' 62 | '"2017-01-15T15:25:10Z"). ' 63 | 'Set "never" for the file to not expire. ' 64 | "The default is the default in Civis (30 days)." 65 | ), 66 | ) 67 | @click.option( 68 | "--description", 69 | type=str, 70 | default=None, 71 | help="Description (max length: 512 characters) of the file object", 72 | ) 73 | def files_upload_cmd(path, name, expires_at, description): 74 | """Upload a local file to Civis and get back the File ID.""" 75 | 76 | if name is None: 77 | name = os.path.basename(path) 78 | 79 | kwargs = {"description": description} 80 | 81 | if expires_at is None: 82 | # Let file_to_civis use the default in Civis platform (30 days). 83 | pass 84 | elif expires_at.lower() == "never": 85 | kwargs = {"expires_at": None} 86 | else: 87 | kwargs = {"expires_at": expires_at} 88 | 89 | with open(path, "rb") as f: 90 | file_id = file_to_civis(f, name=name, **kwargs) 91 | print(file_id) 92 | 93 | 94 | @click.command("download") 95 | @click.argument("file_id", type=int) 96 | @click.argument("path") 97 | def files_download_cmd(file_id, path): 98 | """Download a Civis File to a specified local path.""" 99 | with open(path, "wb") as f: 100 | civis_to_file(file_id, f) 101 | 102 | 103 | @click.command("sql") 104 | @click.option( 105 | "--dbname", 106 | "-d", 107 | type=str, 108 | required=True, 109 | help="Execute the query on this Civis Platform database", 110 | ) 111 | @click.option( 112 | "--command", 113 | "-c", 114 | type=str, 115 | default=None, 116 | help="Execute a single input command string", 117 | ) 118 | @click.option( 119 | "--filename", 120 | "-f", 121 | type=click.Path(exists=True), 122 | help="Execute a query read from the given file", 123 | ) 124 | @click.option( 125 | "--output", "-o", type=click.Path(), help="Download query results to this file" 126 | ) 127 | @click.option("--quiet", "-q", is_flag=True, help="Suppress screen output") 128 | @click.option( 129 | "-n", 130 | type=int, 131 | default=100, 132 | help="Display up to this many rows of the result. Max 100.", 133 | ) 134 | def sql_cmd(dbname, command, filename, output, quiet, n): 135 | """\b Execute a SQL query in Civis Platform 136 | 137 | If neither a command nor an input file is specified, read 138 | the SQL command from stdin. 139 | If writing to an output file, use a Civis SQL script and write the 140 | entire query output to the specified file. 141 | If not writing to an output file, use a Civis Query, and return a 142 | preview of the results, up to a maximum of 100 rows. 143 | """ 144 | if filename: 145 | with open(filename, "rt") as f: 146 | sql = f.read() 147 | elif not command: 148 | # Read the SQL query from user input. This also allows use of a heredoc 149 | lines = [] 150 | while True: 151 | try: 152 | _i = input() 153 | except (KeyboardInterrupt, EOFError): 154 | # The end of a heredoc produces an EOFError. 155 | break 156 | if not _i: 157 | break 158 | else: 159 | lines.append(_i) 160 | sql = "\n".join(lines) 161 | else: 162 | sql = command 163 | 164 | if not sql: 165 | # If the user didn't enter a query, exit. 166 | if not quiet: 167 | print("ERROR: Did not receive a SQL query.", file=sys.stderr) 168 | return 169 | 170 | if not quiet: 171 | print("\nExecuting query...", file=sys.stderr) 172 | if output: 173 | fut = civis.io.civis_to_csv(output, sql, database=dbname) 174 | fut.result() # Block for completion and raise exceptions if any 175 | if not quiet: 176 | print("Downloaded the result of the query to %s." % output, file=sys.stderr) 177 | else: 178 | fut = civis.io.query_civis( 179 | sql, database=dbname, preview_rows=n, polling_interval=3 180 | ) 181 | cols = fut.result()["result_columns"] 182 | rows = fut.result()["result_rows"] 183 | if not quiet: 184 | print("...Query complete.\n", file=sys.stderr) 185 | print(_str_table_result(cols, rows)) 186 | 187 | 188 | def _str_table_result(cols, rows): 189 | """Turn a Civis Query result into a readable table.""" 190 | str_rows = [["" if _v is None else _v for _v in row] for row in rows] 191 | # Determine the maximum width of each column. 192 | # First find the width of each element in each row, then find the max 193 | # width in each position. 194 | max_len = functools.reduce( 195 | lambda x, y: [max(z) for z in zip(x, y)], 196 | [[len(_v) for _v in _r] for _r in [cols] + str_rows], 197 | ) 198 | 199 | header_str = " | ".join( 200 | "{0:<{width}}".format(_v, width=_l) for _l, _v in zip(max_len, cols) 201 | ) 202 | tb_strs = [header_str, len(header_str) * "-"] 203 | for row in str_rows: 204 | tb_strs.append( 205 | " | ".join( 206 | "{0:>{width}}".format(_v, width=_l) for _l, _v in zip(max_len, row) 207 | ) 208 | ) 209 | return "\n".join(tb_strs) 210 | 211 | 212 | @click.command( 213 | "follow-log", 214 | help="Output live log from the most recent job run." + _FOLLOW_LOG_NOTE, 215 | ) 216 | @click.argument("id", type=int) 217 | def jobs_follow_log(id): 218 | client = civis.APIClient() 219 | runs = client.jobs.list_runs(id, limit=1, order="id", order_dir="desc") 220 | if not runs: 221 | raise click.ClickException("No runs found for that job ID.") 222 | run_id = runs[0].id 223 | print("Run ID: " + str(run_id)) 224 | _jobs_follow_run_log(id, run_id) 225 | 226 | 227 | @click.command("follow-run-log", help="Output live run log." + _FOLLOW_LOG_NOTE) 228 | @click.argument("id", type=int) 229 | @click.argument("run_id", type=int) 230 | def jobs_follow_run_log(id, run_id): 231 | _jobs_follow_run_log(id, run_id) 232 | 233 | 234 | def _jobs_follow_run_log(id, run_id): 235 | for log in job_logs(id, run_id): 236 | print(" ".join((log["createdAt"], log["message"].rstrip())), flush=True) 237 | 238 | 239 | @click.command("download") 240 | @click.argument("notebook_id", type=int) 241 | @click.argument("path") 242 | def notebooks_download_cmd(notebook_id, path): 243 | """Download a notebook to a specified local path.""" 244 | client = civis.APIClient() 245 | info = client.notebooks.get(notebook_id) 246 | response = requests.get(info["notebook_url"], stream=True, timeout=60) 247 | response.raise_for_status() 248 | chunk_size = 32 * 1024 249 | chunked = response.iter_content(chunk_size) 250 | with open(path, "wb") as f: 251 | for lines in chunked: 252 | f.write(lines) 253 | 254 | 255 | @click.command("new") 256 | @click.argument("language", type=click.Choice(["python3", "r"]), default="python3") 257 | @click.option( 258 | "--mem", type=int, default=None, help="Memory allocated for this notebook in MiB." 259 | ) 260 | @click.option( 261 | "--cpu", 262 | type=int, 263 | default=None, 264 | help="CPU available for this notebook in 1/1000 of a core.", 265 | ) 266 | def notebooks_new_cmd(language="python3", mem=None, cpu=None): 267 | """Create a new notebook and open it in the browser.""" 268 | client = civis.APIClient() 269 | kwargs = {"memory": mem, "cpu": cpu} 270 | kwargs = {k: v for k, v in kwargs.items() if v is not None} 271 | new_nb = client.notebooks.post(language=language, **kwargs) 272 | print( 273 | "Created new {language} notebook with ID {id} in Civis Platform" 274 | " (https://platform.civisanalytics.com/#/notebooks/{id}).".format( 275 | language=language, id=new_nb.id 276 | ) 277 | ) 278 | _notebooks_up(new_nb.id) 279 | _notebooks_open(new_nb.id) 280 | 281 | 282 | @click.command("up") 283 | @click.argument("notebook_id", type=int) 284 | @click.option( 285 | "--mem", type=int, default=None, help="Memory allocated for this notebook in MiB." 286 | ) 287 | @click.option( 288 | "--cpu", 289 | type=int, 290 | default=None, 291 | help="CPU available for this notebook in 1/1000 of a core.", 292 | ) 293 | def notebooks_up(notebook_id, mem=None, cpu=None): 294 | """Start an existing notebook and open it in the browser.""" 295 | client = civis.APIClient() 296 | kwargs = {"memory": mem, "cpu": cpu} 297 | kwargs = {k: v for k, v in kwargs.items() if v is not None} 298 | client.notebooks.patch(notebook_id, **kwargs) 299 | _notebooks_up(notebook_id) 300 | _notebooks_open(notebook_id) 301 | 302 | 303 | def _notebooks_up(notebook_id): 304 | client = civis.APIClient() 305 | return client.notebooks.post_deployments(notebook_id) 306 | 307 | 308 | @click.command("down") 309 | @click.argument("notebook_id", type=int) 310 | def notebooks_down(notebook_id): 311 | """Shut down a running notebook.""" 312 | client = civis.APIClient() 313 | nb = client.notebooks.get(notebook_id) 314 | state = nb["most_recent_deployment"]["state"] 315 | if state not in ["running", "pending"]: 316 | print('Notebook is in state "{}" and can\'t be stopped.'.format(state)) 317 | deployment_id = nb["most_recent_deployment"]["deploymentId"] 318 | client.notebooks.delete_deployments(notebook_id, deployment_id) 319 | 320 | 321 | @click.command("open") 322 | @click.argument("notebook_id", type=int) 323 | def notebooks_open(notebook_id): 324 | """Open an existing notebook in the browser.""" 325 | _notebooks_open(notebook_id) 326 | 327 | 328 | def _notebooks_open(notebook_id): 329 | url = "https://platform.civisanalytics.com/#/notebooks/{}?fullscreen=true" 330 | url = url.format(notebook_id) 331 | webbrowser.open(url, new=2, autoraise=True) 332 | 333 | 334 | @click.command("civis", help="Print Civis") 335 | def civis_ascii_art(): 336 | print(_CIVIS_ASCII_ART) 337 | -------------------------------------------------------------------------------- /src/civis/io/__init__.py: -------------------------------------------------------------------------------- 1 | from civis.io._databases import query_civis, transfer_table 2 | from civis.io._files import ( 3 | civis_to_file, 4 | dataframe_to_file, 5 | file_id_from_run_output, 6 | file_to_civis, 7 | file_to_dataframe, 8 | file_to_json, 9 | json_to_file, 10 | ) 11 | from civis.io._tables import ( 12 | civis_file_to_table, 13 | civis_to_csv, 14 | civis_to_multifile_csv, 15 | csv_to_civis, 16 | dataframe_to_civis, 17 | export_to_civis_file, 18 | read_civis, 19 | read_civis_sql, 20 | split_schema_tablename, 21 | ) 22 | 23 | 24 | __all__ = [ 25 | # from _databases.py 26 | "query_civis", 27 | "transfer_table", 28 | # From _files.py 29 | "civis_to_file", 30 | "dataframe_to_file", 31 | "file_id_from_run_output", 32 | "file_to_civis", 33 | "file_to_dataframe", 34 | "file_to_json", 35 | "json_to_file", 36 | # From _tables.py 37 | "civis_file_to_table", 38 | "civis_to_csv", 39 | "civis_to_multifile_csv", 40 | "csv_to_civis", 41 | "dataframe_to_civis", 42 | "export_to_civis_file", 43 | "read_civis", 44 | "read_civis_sql", 45 | "split_schema_tablename", 46 | ] 47 | -------------------------------------------------------------------------------- /src/civis/io/_databases.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from civis import APIClient 4 | from civis.io._utils import maybe_get_random_name 5 | from civis.futures import CivisFuture 6 | 7 | log = logging.getLogger(__name__) 8 | 9 | 10 | def query_civis( 11 | sql, 12 | database, 13 | client=None, 14 | credential_id=None, 15 | preview_rows=10, 16 | polling_interval=None, 17 | hidden=True, 18 | ): 19 | """Execute a SQL statement as a Civis query. 20 | 21 | Run a query that may return no results or where only a small 22 | preview is required. To execute a query that returns a large number 23 | of rows, see :func:`~civis.io.read_civis_sql`. 24 | 25 | Parameters 26 | ---------- 27 | sql : str 28 | The SQL statement to execute. 29 | database : str or int 30 | The name or ID of the database. 31 | client : :class:`civis.APIClient`, optional 32 | If not provided, an :class:`civis.APIClient` object will be 33 | created from the :envvar:`CIVIS_API_KEY`. 34 | credential_id : str or int, optional 35 | The ID of the database credential. If ``None``, the default 36 | credential will be used. 37 | preview_rows : int, optional 38 | The maximum number of rows to return. No more than 100 rows can be 39 | returned at once. 40 | polling_interval : int or float, optional 41 | Number of seconds to wait between checks for query completion. 42 | hidden : bool, optional 43 | If ``True`` (the default), this job will not appear in the Civis UI. 44 | 45 | Returns 46 | ------- 47 | results : :class:`~civis.futures.CivisFuture` 48 | A `CivisFuture` object. 49 | 50 | Examples 51 | -------- 52 | >>> import civis 53 | >>> run = civis.io.query_civis(sql="DELETE schema.table", database='database') 54 | >>> run.result() # Wait for query to complete 55 | """ 56 | if client is None: 57 | client = APIClient() 58 | database_id = client.get_database_id(database) 59 | cred_id = credential_id or client.default_database_credential_id 60 | resp = client.queries.post( 61 | database_id, sql, preview_rows, credential=cred_id, hidden=hidden 62 | ) 63 | return CivisFuture( 64 | client.queries.get, 65 | (resp.id,), 66 | polling_interval, 67 | client=client, 68 | poll_on_creation=False, 69 | ) 70 | 71 | 72 | def transfer_table( 73 | source_db, 74 | dest_db, 75 | source_table, 76 | dest_table, 77 | job_name=None, 78 | client=None, 79 | source_credential_id=None, 80 | dest_credential_id=None, 81 | polling_interval=None, 82 | **advanced_options, 83 | ): 84 | """Transfer a table from one location to another. 85 | 86 | Parameters 87 | ---------- 88 | source_db : str or int 89 | The name of the database where the source table is located. 90 | Optionally, could be the database ID. 91 | dest_db : str or int 92 | The name of the database where the table will be transfered. 93 | Optionally, could be the database ID. 94 | source_table : str 95 | Full name of the table to transfer, e.g., ``'schema.table'``. 96 | dest_table : str 97 | Full name of the table in the destination database, e.g., 98 | ``'schema.table'``. 99 | job_name : str, optional 100 | A name to give the job. If omitted, a random job name will be 101 | used. 102 | client : :class:`civis.APIClient`, optional 103 | If not provided, an :class:`civis.APIClient` object will be 104 | created from the :envvar:`CIVIS_API_KEY`. 105 | source_credential_id : str or int, optional 106 | Optional credential ID for the source database. If ``None``, the 107 | default credential will be used. 108 | dest_credential_id : str or int, optional 109 | Optional credential ID for the destination database. If ``None``, 110 | the default credential will be used. 111 | polling_interval : int or float, optional 112 | Number of seconds to wait between checks for job completion. 113 | **advanced_options : kwargs 114 | Extra keyword arguments will be passed to the import sync job. See 115 | :func:`~civis.resources._resources.Imports.post_syncs`. 116 | 117 | Returns 118 | ------- 119 | results : :class:`~civis.futures.CivisFuture` 120 | A `CivisFuture` object. 121 | 122 | Examples 123 | -------- 124 | >>> import civis 125 | >>> civis.io.transfer_table(source_db='Cluster A', dest_db='Cluster B', 126 | ... source_table='schma.tbl', dest_table='schma.tbl') 127 | """ 128 | if client is None: 129 | client = APIClient() 130 | source_cred_id = source_credential_id or client.default_database_credential_id 131 | dest_cred_id = dest_credential_id or client.default_database_credential_id 132 | job_name = maybe_get_random_name(job_name) 133 | source = { 134 | "remote_host_id": client.get_database_id(source_db), 135 | "credential_id": source_cred_id, 136 | } 137 | destination = { 138 | "remote_host_id": client.get_database_id(dest_db), 139 | "credential_id": dest_cred_id, 140 | } 141 | job_id = client.imports.post( 142 | job_name, "Dbsync", True, source=source, destination=destination 143 | ).id 144 | 145 | client.imports.post_syncs( 146 | id=job_id, 147 | source={"path": source_table}, 148 | destination={"path": dest_table}, 149 | advanced_options=advanced_options, 150 | ) 151 | run_id = client.imports.post_runs(id=job_id).run_id 152 | log.debug("Started run %d of sync for import %d", run_id, job_id) 153 | fut = CivisFuture( 154 | client.imports.get_files_runs, 155 | (job_id, run_id), 156 | polling_interval=polling_interval, 157 | client=client, 158 | poll_on_creation=False, 159 | ) 160 | return fut 161 | -------------------------------------------------------------------------------- /src/civis/io/_utils.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | 4 | def maybe_get_random_name(name): 5 | if not name: 6 | name = uuid.uuid4().hex 7 | return name 8 | -------------------------------------------------------------------------------- /src/civis/loggers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | 6 | class _LogFilter(logging.Filter): 7 | def __init__(self, mode, level): 8 | super().__init__() 9 | if mode not in ("at_or_below", "above"): 10 | raise ValueError(f"mode must be one of {{at_or_below, above}}): {mode}") 11 | self.mode = mode 12 | self.level = level 13 | 14 | def filter(self, record): 15 | if self.mode == "at_or_below": 16 | return record.levelno <= self.level 17 | else: 18 | return record.levelno > self.level 19 | 20 | 21 | def civis_logger(name=None, level=None, fmt="%(message)s"): 22 | """Return a logger for Civis Platform jobs. 23 | 24 | The logs of Civis Platform jobs format stdout in black and stderr in red. 25 | This logger sends INFO-level (or below) logging to stdout (black), 26 | and other levels' logging (WARNING, etc.) to stderr (red). 27 | 28 | Parameters 29 | ---------- 30 | name : str, optional 31 | Logger name, to be passed into :func:`logging.getLogger`. 32 | If ``None`` or not provided, ``__name__`` of the module where 33 | this logger is instantiated is used. 34 | level : int or str, optional 35 | Level from which logging is done, 36 | see https://docs.python.org/3/library/logging.html#logging-levels. 37 | If ``None`` or not provided, the level specified by the environment 38 | variable ``CIVIS_LOG_LEVEL`` is used 39 | (e.g., ``export CIVIS_LOG_LEVEL=DEBUG``). 40 | If this environment variable is also not given, 41 | the logging level defaults to ``logging.INFO``. 42 | fmt : str or logging.Formatter, optional 43 | Logging format. The default is ``"%(message)s"``. 44 | For the attributes that can be formatted, see: 45 | https://docs.python.org/3/library/logging.html#logrecord-objects 46 | Alternatively, you may pass in a :class:`logging.Formatter` instance 47 | for more custom formatting. 48 | 49 | Returns 50 | ------- 51 | :class:`logging.Logger` 52 | """ 53 | logger = logging.getLogger(name if name is not None else globals()["__name__"]) 54 | 55 | if level is None: 56 | logger.setLevel(os.getenv("CIVIS_LOG_LEVEL") or logging.INFO) 57 | else: 58 | logger.setLevel(level) 59 | 60 | # When running on Civis Platform (as opposed to unit tests in CI), 61 | # we don't want to propagate log records to the root logger 62 | # in order to avoid duplicate logs. 63 | logger.propagate = False 64 | 65 | if isinstance(fmt, logging.Formatter): 66 | platform_fmt = fmt 67 | else: 68 | platform_fmt = logging.Formatter(fmt) 69 | 70 | at_or_below_info_hdlr = logging.StreamHandler(sys.stdout) 71 | at_or_below_info_hdlr.addFilter(_LogFilter("at_or_below", logging.INFO)) 72 | at_or_below_info_hdlr.setFormatter(platform_fmt) 73 | logger.addHandler(at_or_below_info_hdlr) 74 | 75 | above_info_hdlr = logging.StreamHandler(sys.stderr) 76 | above_info_hdlr.addFilter(_LogFilter("above", logging.INFO)) 77 | above_info_hdlr.setFormatter(platform_fmt) 78 | logger.addHandler(above_info_hdlr) 79 | 80 | return logger 81 | -------------------------------------------------------------------------------- /src/civis/ml/__init__.py: -------------------------------------------------------------------------------- 1 | """Machine learning in Civis""" 2 | 3 | from civis.ml._model import * # NOQA 4 | from civis.ml._helper import * # NOQA 5 | -------------------------------------------------------------------------------- /src/civis/polling.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | 4 | from civis.base import CivisJobFailure, CivisAsyncResultBase, FAILED, DONE 5 | from civis.response import Response 6 | 7 | 8 | _MAX_POLLING_INTERVAL = 15 9 | 10 | 11 | class _ResultPollingThread(threading.Thread): 12 | """Poll a function until it returns a Response with a DONE state""" 13 | 14 | # Inspired by `threading.Timer` 15 | 16 | def __init__(self, pollable_result): 17 | super().__init__(daemon=True) 18 | self.pollable_result = pollable_result 19 | self.finished = threading.Event() 20 | 21 | def cancel(self): 22 | """Stop the poller if it hasn't finished yet.""" 23 | self.finished.set() 24 | 25 | def join(self, timeout=None): 26 | """Shut down the polling when the thread is terminated.""" 27 | self.cancel() 28 | super().join(timeout=timeout) 29 | 30 | def run(self): 31 | """Poll until done.""" 32 | while not self.finished.wait(self.pollable_result._next_polling_interval): 33 | # Spotty internet connectivity can result in polling functions 34 | # returning None. This treats None responses like responses which 35 | # have a non-DONE state. 36 | poller_result = self.pollable_result._check_result() 37 | if poller_result is not None and poller_result.state in DONE: 38 | self.finished.set() 39 | 40 | 41 | class PollableResult(CivisAsyncResultBase): 42 | """A class for tracking pollable results. 43 | 44 | This class will begin polling immediately upon creation, and poll for 45 | job completion once every `polling_interval` seconds until the job 46 | completes in Civis. 47 | 48 | Parameters 49 | ---------- 50 | poller : func 51 | A function which returns an object that has a ``state`` attribute. 52 | poller_args : tuple 53 | The arguments with which to call the poller function. 54 | polling_interval : int or float, optional 55 | The number of seconds between API requests to check whether a result 56 | is ready. If an integer or float is provided, this number will be used 57 | as the polling interval. If ``None`` (the default), the polling interval will 58 | start at 1 second and increase geometrically up to 15 seconds. The ratio of 59 | the increase is 1.2, resulting in polling intervals in seconds of 60 | 1, 1.2, 1.44, 1.728, etc. This default behavior allows for a faster return for 61 | a short-running job and a capped polling interval for longer-running jobs. 62 | client : :class:`civis.APIClient`, optional 63 | If not provided, an :class:`civis.APIClient` object will be 64 | created from the :envvar:`CIVIS_API_KEY`. 65 | poll_on_creation : bool, optional 66 | If ``True`` (the default), it will poll upon calling ``result()`` the 67 | first time. If ``False``, it will wait the number of seconds specified 68 | in `polling_interval` from object creation before polling. 69 | 70 | Examples 71 | -------- 72 | >>> client = civis.APIClient() 73 | >>> database_id = client.get_database_id("my_database") 74 | >>> cred_id = client.default_database_credential_id 75 | >>> sql = "SELECT 1" 76 | >>> preview_rows = 10 77 | >>> response = client.queries.post(database_id, sql, preview_rows, 78 | >>> credential=cred_id) 79 | >>> job_id = response.id 80 | >>> 81 | >>> poller = client.queries.get 82 | >>> poller_args = (job_id, ) # (job_id, run_id) if poller requires run_id 83 | >>> polling_interval = 10 84 | >>> poll = PollableResult(poller, poller_args, polling_interval) 85 | """ 86 | 87 | # this may not be friendly to a rate-limited api 88 | # Implementation notes: The `PollableResult` depends on some private 89 | # features of the `concurrent.futures.Future` class, so it's possible 90 | # that future versions of Python could break something here. 91 | # (It works under at least 3.6) 92 | # We use the following `Future` implementation details 93 | # - The `Future` checks its state against predefined strings. We use 94 | # `STATE_TRANS` to translate from the Civis platform states to `Future` 95 | # states. 96 | # - `Future` uses a `_state` attribute to check its current condition 97 | # - `Future` handles event notification through `set_result` and 98 | # `set_exception`, which we call from `_check_result`. 99 | # - We use the `Future` thread lock called `_condition` 100 | # - We assume that results of the Future are stored in `_result`. 101 | def __init__( 102 | self, 103 | poller, 104 | poller_args, 105 | polling_interval=None, 106 | client=None, 107 | poll_on_creation=True, 108 | ): 109 | super().__init__() 110 | 111 | self.poller = poller 112 | self.poller_args = poller_args 113 | self.polling_interval = polling_interval 114 | self.client = client 115 | self.poll_on_creation = poll_on_creation 116 | 117 | if self.polling_interval is not None and self.polling_interval <= 0: 118 | raise ValueError("The polling interval must be positive.") 119 | 120 | self._next_polling_interval = 1 121 | self._use_geometric_polling = True 122 | 123 | # Polling arguments. Never poll more often than the requested interval. 124 | if poll_on_creation: 125 | self._last_polled = None 126 | else: 127 | self._last_polled = time.time() 128 | self._last_result = None 129 | 130 | self._begin_tracking() 131 | 132 | def _begin_tracking(self, start_thread=False): 133 | """Start monitoring the Civis Platform job""" 134 | with self._condition: 135 | if getattr(self, "poller", None) is None: 136 | raise RuntimeError( 137 | "Internal error: Must set polling " 138 | "function before initializing thread." 139 | ) 140 | self._reset_polling_thread(self.polling_interval, start_thread) 141 | 142 | def _check_result(self): 143 | """Return the job result from Civis. Once the job completes, store the 144 | result and never poll again.""" 145 | with self._condition: 146 | # Start a single thread continuously polling. 147 | # It will stop once the job completes. 148 | if not self._polling_thread.is_alive() and self._result is None: 149 | self._polling_thread.start() 150 | 151 | if self._result is not None: 152 | # If the job is already completed, just return the stored 153 | # result. 154 | return self._result 155 | 156 | # Check to see if the job has finished, but don't poll more 157 | # frequently than the requested polling frequency. 158 | now = time.time() 159 | if ( 160 | not self._last_polled 161 | or (now - self._last_polled) >= self._next_polling_interval 162 | ): 163 | if self._use_geometric_polling: 164 | # Choosing a common ratio of 1.2 for these polling intervals: 165 | # 1, 1.2, 1.44, 1.73, 2.07, 2.49, 2.99, ..., and capped at 15. 166 | # Within the first 15 secs by wall time, we call the poller 7 times, 167 | # which gives a short-running job's future.result() 168 | # a higher chance to return faster. 169 | # For longer running jobs, the polling interval will be capped 170 | # at 15 secs when by wall time 87 secs have passed. 171 | self._next_polling_interval *= 1.2 172 | if self._next_polling_interval > _MAX_POLLING_INTERVAL: 173 | self._next_polling_interval = _MAX_POLLING_INTERVAL 174 | self._use_geometric_polling = False 175 | # Poll for a new result 176 | self._last_polled = now 177 | try: 178 | self._last_result = self.poller(*self.poller_args) 179 | except Exception as e: 180 | # The _poller can raise API exceptions 181 | # Set those directly as this Future's exception 182 | self._set_api_exception(exc=e) 183 | else: 184 | # If the job has finished, then register completion and 185 | # store the results. Because of the `if self._result` check 186 | # up top, we will never get here twice. 187 | self._set_api_result(self._last_result) 188 | 189 | return self._last_result 190 | 191 | def _set_api_result(self, result): 192 | with self._condition: 193 | if result.state in FAILED: 194 | try: 195 | err_msg = str(result["error"]) 196 | except: # NOQA 197 | err_msg = str(result) 198 | job_id = getattr(self, "job_id", None) 199 | run_id = getattr(self, "run_id", None) 200 | self._set_api_exception( 201 | exc=CivisJobFailure(err_msg, result, job_id, run_id), 202 | result=result, 203 | ) 204 | elif result.state in DONE: 205 | self.set_result(result) 206 | self.cleanup() 207 | 208 | def _set_api_exception(self, exc, result=None): 209 | with self._condition: 210 | if result is None: 211 | result = Response({"state": FAILED[0]}) 212 | self._result = result 213 | self._last_result = self._result 214 | self.set_exception(exc) 215 | self.cleanup() 216 | 217 | def cleanup(self): 218 | # This gets called after the result is set. 219 | # Ensure that the polling thread shuts down when it's no longer needed. 220 | with self._condition: 221 | if self._polling_thread.is_alive(): 222 | self._polling_thread.cancel() 223 | 224 | def _reset_polling_thread(self, polling_interval, start_thread=False): 225 | with self._condition: 226 | if ( 227 | getattr(self, "_polling_thread", None) is not None 228 | and self._polling_thread.is_alive() 229 | ): 230 | self._polling_thread.cancel() 231 | self.polling_interval = polling_interval 232 | self._next_polling_interval = 1 if (pi := polling_interval) is None else pi 233 | self._use_geometric_polling = polling_interval is None 234 | self._polling_thread = _ResultPollingThread(self) 235 | if start_thread: 236 | self._polling_thread.start() 237 | -------------------------------------------------------------------------------- /src/civis/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/civisanalytics/civis-python/899fdf5eb470d36a473842242e6b22011b7ec071/src/civis/py.typed -------------------------------------------------------------------------------- /src/civis/resources/__init__.py: -------------------------------------------------------------------------------- 1 | from ._resources import ( 2 | generate_classes, 3 | get_api_spec, 4 | generate_classes_maybe_cached, 5 | CACHED_SPEC_PATH, 6 | cache_api_spec, 7 | API_SPEC_PATH, 8 | ) 9 | 10 | __all__ = [ 11 | "generate_classes", 12 | "get_api_spec", 13 | "generate_classes_maybe_cached", 14 | "CACHED_SPEC_PATH", 15 | "cache_api_spec", 16 | "API_SPEC_PATH", 17 | ] 18 | -------------------------------------------------------------------------------- /src/civis/resources/_api_spec.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from inspect import signature, isfunction 3 | 4 | import civis 5 | from civis.resources import generate_classes_maybe_cached 6 | 7 | 8 | def download_latest_api_spec(path): 9 | client = civis.APIClient() 10 | try: 11 | job = client.scripts.post_custom(from_template_id=13448) 12 | except civis.base.CivisAPIError as e: 13 | if e.status_error == 404: 14 | raise EnvironmentError( 15 | "This script can only be run by a Civis employee with their " 16 | "regular Civis Platform account's API key." 17 | ) 18 | else: 19 | raise 20 | fut = civis.utils.run_job(job.id, client=client, polling_interval=5) 21 | fut.result() 22 | print(f"custom script {fut.job_id} run {fut.run_id} has succeeded") 23 | outputs = client.scripts.list_custom_runs_outputs(fut.job_id, fut.run_id) 24 | file_id = civis.find_one(outputs, name="civis_api_spec.json").object_id 25 | with open(path, "wb") as f: 26 | civis.io.civis_to_file(file_id, f, client=client) 27 | 28 | 29 | def _get_methods(cls) -> dict: 30 | return { 31 | method_name: method 32 | for method_name, method in vars(cls).items() 33 | if not method_name.startswith("_") and isfunction(method) 34 | } 35 | 36 | 37 | def _compare(reference: dict, compared: dict) -> tuple[dict, dict]: 38 | new = { 39 | "endpoints": set(), 40 | "methods": defaultdict(set), 41 | } 42 | changed = { 43 | "method parameters": defaultdict(set), 44 | "method docstrings": defaultdict(set), 45 | } 46 | for endpoint_name in set(compared.keys()) - set(reference.keys()): 47 | new["endpoints"].add(endpoint_name) 48 | for endpoint_name in set(compared.keys()) & set(reference.keys()): 49 | methods_compared = _get_methods(compared[endpoint_name]) 50 | methods_reference = _get_methods(reference[endpoint_name]) 51 | if meth_names := (set(methods_compared.keys()) - set(methods_reference.keys())): 52 | for meth_name in meth_names: 53 | new["methods"][endpoint_name].add(meth_name) 54 | for meth_name in set(methods_compared.keys()) & set(methods_reference.keys()): 55 | method_compared = methods_compared[meth_name] 56 | method_reference = methods_reference[meth_name] 57 | if ( 58 | signature(method_compared).parameters 59 | != signature(method_reference).parameters 60 | ): 61 | changed["method parameters"][endpoint_name].add(meth_name) 62 | if method_compared.__doc__ != method_reference.__doc__: 63 | changed["method docstrings"][endpoint_name].add(meth_name) 64 | # Convert defaultdicts to regular dicts for nicer pprinting. 65 | new["methods"] = dict(new["methods"]) 66 | changed["method parameters"] = dict(changed["method parameters"]) 67 | changed["method docstrings"] = dict(changed["method docstrings"]) 68 | return new, changed 69 | 70 | 71 | def compare_api_specs(path_current: str, path_upstream: str) -> tuple[dict, dict, dict]: 72 | """Compare two Civis API specs for whether there's a difference. 73 | 74 | Parameters 75 | ---------- 76 | path_current : str 77 | Path of the current Civis API spec versioned in the civis-python codebase. 78 | path_upstream : str 79 | Path of the latest Civis API spec fetched from upstream. 80 | 81 | Returns 82 | ------- 83 | tuple[dict, dict, dict] 84 | Dicts of added, removed, and changed endpoints and methods. 85 | """ 86 | endpoints_current = generate_classes_maybe_cached( 87 | path_current, api_key="no_key_needed", api_version="1.0" 88 | ) 89 | endpoints_upstream = generate_classes_maybe_cached( 90 | path_upstream, api_key="no_key_needed", api_version="1.0" 91 | ) 92 | added, changed = _compare(endpoints_current, endpoints_upstream) 93 | removed, _ = _compare(endpoints_upstream, endpoints_current) 94 | return added, removed, changed 95 | -------------------------------------------------------------------------------- /src/civis/resources/_client_pyi.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import textwrap 4 | import typing 5 | 6 | from civis.resources import generate_classes_maybe_cached 7 | from civis.response import Response 8 | 9 | 10 | CLIENT_PYI_PATH = os.path.join( 11 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 12 | "client.pyi", 13 | ) 14 | 15 | 16 | def _get_endpoint_class_name(endpoint_name): 17 | # Factor out this helper function for consistency. 18 | return f"_{endpoint_name.title()}" 19 | 20 | 21 | def _get_annotation(param): 22 | if param.name == "self": 23 | return "" 24 | elif param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD: 25 | return param.annotation 26 | else: 27 | return f"{param.annotation} | None" 28 | 29 | 30 | def _extract_nested_response_classes(response_classes, return_type): 31 | response_classes[return_type.__name__] = return_type 32 | for typ in return_type.__annotations__.values(): 33 | if isinstance(typ, str): 34 | continue 35 | if isinstance(typ, typing._GenericAlias): 36 | typ = typing.get_args(typ)[0] 37 | response_classes = _extract_nested_response_classes(response_classes, typ) 38 | return response_classes 39 | 40 | 41 | def generate_client_pyi(client_pyi_path, api_spec_path): 42 | classes = generate_classes_maybe_cached( 43 | api_spec_path, api_key="not_needed", api_version="1.0" 44 | ) 45 | 46 | with open(client_pyi_path, "w") as f: 47 | f.write( 48 | """# This file is auto-generated by tools/update_civis_api_spec.py. 49 | # Do not edit it by hand. 50 | 51 | from collections import OrderedDict 52 | from collections.abc import Iterator 53 | from typing import Any, List 54 | 55 | from civis.response import Response 56 | 57 | """ 58 | ) 59 | 60 | response_classes = {} 61 | 62 | for endpoint_name, endpoint_class in classes.items(): 63 | f.write(f"class {_get_endpoint_class_name(endpoint_name)}:\n") 64 | method_defs = [] 65 | for method_name, method in vars(endpoint_class).items(): 66 | method_def = "" 67 | if method_name.startswith("_"): 68 | continue 69 | signature = inspect.signature(method) 70 | return_type = signature.return_annotation 71 | if return_type is not Response: 72 | if return_type.__name__ == "Iterator": 73 | response_classes = _extract_nested_response_classes( 74 | response_classes, typing.get_args(return_type)[0] 75 | ) 76 | else: 77 | response_classes = _extract_nested_response_classes( 78 | response_classes, return_type 79 | ) 80 | params = inspect.signature(method).parameters 81 | method_def += f" def {method_name}(\n" 82 | for param_name, param in params.items(): 83 | annotation = _get_annotation(param) 84 | if param_name == "self": 85 | method_def += " self,\n" 86 | elif param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD: 87 | method_def += f" {param_name}: {annotation},\n" 88 | else: 89 | method_def += f" {param_name}: {annotation} = ...,\n" 90 | if return_type.__name__ == "Iterator": 91 | return_str = f"Iterator[{typing.get_args(return_type)[0].__name__}]" 92 | else: 93 | return_str = return_type.__name__ 94 | method_def += f" ) -> {return_str}:\n" 95 | method_doc = textwrap.indent(method.__doc__, " " * 8).lstrip() 96 | method_def += f' """{method_doc}\n """\n ...\n' 97 | method_defs.append(method_def) 98 | f.write("\n".join(method_defs)) 99 | f.write("\n") 100 | 101 | for response_class in response_classes.values(): 102 | if len(line1 := f"class {response_class.__name__}(Response):") <= 88: 103 | f.write(f"{line1}\n") 104 | elif len(line2 := f"class {response_class.__name__}(") <= 88: 105 | f.write(f"{line2}\n Response\n):\n") 106 | else: 107 | f.write( 108 | f"class {response_class.__name__}( # noqa: E501\n" 109 | " Response\n):\n" 110 | ) 111 | for name, anno in response_class.__annotations__.items(): 112 | inner_anno_str = None 113 | if isinstance(anno, str): 114 | anno_str = anno 115 | elif isinstance(anno, typing._GenericAlias): 116 | inner_anno_str = typing.get_args(anno)[0].__name__ 117 | anno_str = f"{anno.__name__}[{inner_anno_str}]" 118 | else: 119 | anno_str = anno.__name__ 120 | if len(line := f" {name}: {anno_str}") <= 88: 121 | f.write(f"{line}\n") 122 | elif anno_str.startswith("List"): 123 | f.write(f" {name}: List[\n {inner_anno_str}\n ]\n") 124 | else: 125 | f.write(f" {name}: (\n {anno_str}\n )\n") 126 | f.write("\n") 127 | 128 | f.write( 129 | """# Need the individual endpoint classes defined first as above, 130 | # before we can define APIClient to use them. 131 | class APIClient: 132 | default_credential: int | None 133 | default_database_credential_id: int | None 134 | username: str 135 | feature_flags: tuple[str] 136 | last_response: Any 137 | def __init__( 138 | self, 139 | api_key: str | None = ..., 140 | return_type: str = ..., 141 | api_version: str = ..., 142 | local_api_spec: OrderedDict | str | None = ..., 143 | force_refresh_api_spec: bool = ..., 144 | ): ... 145 | def get_aws_credential_id( 146 | self, 147 | cred_name: str | int, 148 | owner: str | None = None, 149 | ) -> int: ... 150 | def get_database_credential_id( 151 | self, 152 | username: str | int, 153 | database_name: str | int, 154 | ) -> int: ... 155 | def get_database_id( 156 | self, 157 | database: str | int, 158 | ) -> int: ... 159 | def get_storage_host_id( 160 | self, 161 | storage_host: str | int, 162 | ) -> int: ... 163 | def get_table_id( 164 | self, 165 | table: str, 166 | database: str | int, 167 | ) -> int: ... 168 | """ 169 | ) 170 | for endpoint_name in classes: 171 | f.write( 172 | f" {endpoint_name} = {_get_endpoint_class_name(endpoint_name)}()\n" # noqa: E501 173 | ) 174 | -------------------------------------------------------------------------------- /src/civis/run_joblib_func.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is an executable intended for use with a joblib backend 3 | for the Civis platform. It takes a Civis File ID representing 4 | a callable serialized by either ``pickle`` or ``cloudpickle`` 5 | as an argument, downloads the file, 6 | deserializes it, calls the callable, serializes the result, 7 | and uploads the result to another Civis File. The output file's ID 8 | will be set as an output on this run. 9 | """ 10 | 11 | from datetime import datetime, timedelta 12 | from io import BytesIO 13 | import os 14 | import pickle # nosec 15 | import sys 16 | 17 | import civis 18 | import cloudpickle 19 | from joblib import parallel_config 20 | 21 | from civis.parallel import ( 22 | _robust_pickle_download, 23 | _robust_file_to_civis, 24 | _setup_remote_backend, 25 | ) 26 | 27 | 28 | def worker_func(func_file_id): 29 | # Have the output File expire in 7 days. 30 | expires_at = (datetime.now() + timedelta(days=7)).isoformat() 31 | 32 | client = civis.APIClient() 33 | job_id = os.environ.get("CIVIS_JOB_ID") 34 | run_id = os.environ.get("CIVIS_RUN_ID") 35 | if not job_id or not run_id: 36 | raise RuntimeError("This function must be run inside a " "Civis container job.") 37 | 38 | # Run the function. 39 | result = None 40 | try: 41 | func, remote_backend = _robust_pickle_download( 42 | func_file_id, client=client, n_retries=5, delay=0.5 43 | ) 44 | 45 | _backend = _setup_remote_backend(remote_backend) 46 | 47 | with parallel_config(_backend): 48 | result = func() 49 | except Exception as exc: 50 | print("Error! Attempting to record exception.") 51 | result = exc 52 | raise 53 | finally: 54 | # Serialize the result and upload it to the Files API. 55 | if result is not None: 56 | # If the function exits without erroring, we may not have a result. 57 | result_buffer = BytesIO() 58 | cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL) 59 | result_buffer.seek(0) 60 | output_name = "Results from Joblib job {} / run {}".format(job_id, run_id) 61 | output_file_id = _robust_file_to_civis( 62 | result_buffer, 63 | output_name, 64 | n_retries=5, 65 | delay=0.5, 66 | expires_at=expires_at, 67 | client=client, 68 | ) 69 | client.scripts.post_containers_runs_outputs( 70 | job_id, run_id, "File", output_file_id 71 | ) 72 | print("Results output to file ID: {}".format(output_file_id)) 73 | 74 | 75 | def main(): 76 | if len(sys.argv) > 1: 77 | func_file_id = sys.argv[1] 78 | else: 79 | # If the file ID to download isn't given as a command-line 80 | # argument, assume that it's in an environment variable. 81 | func_file_id = os.environ["JOBLIB_FUNC_FILE_ID"] 82 | worker_func(func_file_id=func_file_id) 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /src/civis/service_client.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from functools import lru_cache 3 | import json 4 | from jsonref import JsonRef 5 | import re 6 | import requests 7 | 8 | from civis import APIClient 9 | from civis.base import CivisAPIError, Endpoint, tostr_urljoin 10 | from civis.resources._resources import parse_method 11 | 12 | 13 | _TO_CAMELCASE_REGEX = re.compile(r"(^|_)([a-zA-Z])") 14 | 15 | 16 | def _get_service(client): 17 | if client._api_key: 18 | api_client = APIClient(client._api_key) 19 | else: 20 | api_client = APIClient() 21 | service = api_client.services.get(client._service_id) 22 | return service 23 | 24 | 25 | def auth_service_session(session, client): 26 | service = _get_service(client) 27 | auth_url = service["current_deployment"]["displayUrl"] 28 | # Make request for adding Authentication Cookie to session 29 | session.get(auth_url) 30 | 31 | 32 | def _parse_service_path(path, operations, root_path=None): 33 | """Parse an endpoint into a class where each valid http request 34 | on that endpoint is converted into a convenience function and 35 | attached to the class as a method. 36 | """ 37 | if root_path is not None: 38 | path = path.replace(root_path, "") 39 | path = path.strip("/") 40 | modified_base_path = path.split("/")[0].lower().replace("-", "_") 41 | methods = [] 42 | for verb, op in operations.items(): 43 | method = parse_method(verb, op, path) 44 | if method is None: 45 | continue 46 | methods.append(method) 47 | return modified_base_path, methods 48 | 49 | 50 | def parse_service_api_spec(api_spec, root_path=None): 51 | """Dynamically create classes to interface with a Civis Service API. 52 | 53 | Parse an OpenAPI (Swagger) specification into a dictionary of classes 54 | where each class represents an endpoint resource and contains 55 | methods to make http requests on that resource. 56 | 57 | Parameters 58 | ---------- 59 | api_spec : OrderedDict 60 | The Civis Service API specification to parse. References should be 61 | resolved before passing, typically using jsonref.JsonRef(). 62 | root_path : str, optional 63 | An additional path for APIs that are not hosted on the service's 64 | root level. An example root_path would be '/api' for an app with 65 | resource endpoints that all begin with '/api'. 66 | """ 67 | paths = api_spec["paths"] 68 | classes = {} 69 | for path, ops in paths.items(): 70 | base_path, methods = _parse_service_path(path, ops, root_path=root_path) 71 | class_name = to_camelcase(base_path) 72 | if methods and classes.get(base_path) is None: 73 | classes[base_path] = type(str(class_name), (ServiceEndpoint,), {}) 74 | for method_name, method in methods: 75 | setattr(classes[base_path], method_name, method) 76 | return classes 77 | 78 | 79 | class ServiceEndpoint(Endpoint): 80 | 81 | def __init__(self, client, return_type="civis"): 82 | self._return_type = return_type 83 | self._client = client 84 | 85 | def _build_path(self, path): 86 | if not path: 87 | return self._client._base_url 88 | if not self._client._root_path: 89 | return tostr_urljoin(self._client._base_url, path.strip("/")) 90 | return tostr_urljoin( 91 | self._client._base_url, self._client._root_path.strip("/"), path.strip("/") 92 | ) 93 | 94 | def _make_request(self, method, path=None, params=None, data=None, **kwargs): 95 | url = self._build_path(path) 96 | 97 | with requests.Session() as sess: 98 | auth_service_session(sess, self._client) 99 | with self._lock: 100 | response = sess.request(method, url, json=data, params=params, **kwargs) 101 | 102 | if not response.ok: 103 | raise CivisAPIError(response) 104 | 105 | return response 106 | 107 | 108 | class ServiceClient: 109 | 110 | def __init__( 111 | self, 112 | service_id, 113 | root_path=None, 114 | swagger_path="/endpoints", 115 | api_key=None, 116 | return_type="snake", 117 | local_api_spec=None, 118 | ): 119 | """Create an API Client from a Civis service. 120 | 121 | Parameters 122 | ---------- 123 | service_id : str, required 124 | The Id for the service that will be used to generate the client. 125 | root_path : str, optional 126 | An additional path for APIs that are not hosted on the service's 127 | root level. An example root_path would be '/api' for an app with 128 | resource endpoints that all begin with '/api'. 129 | swagger_path : str, optional 130 | The endpoint path that will be used to download the API Spec. 131 | The default value is '/endpoints' but another common path 132 | might be '/spec'. The API Spec must be compliant with Swagger 133 | 2.0 standards. 134 | api_key : str, optional 135 | Your API key obtained from the Civis Platform. If not given, the 136 | client will use the :envvar:`CIVIS_API_KEY` environment variable. 137 | This API key will need to be authorized to access the service 138 | used for the client. 139 | return_type : str, optional 140 | The following types are implemented: 141 | 142 | - ``'raw'`` Returns the raw :class:`requests:requests.Response` 143 | object. 144 | - ``'snake'`` Returns a :class:`civis.Response` object 145 | for the json-encoded content of a response. This maps the 146 | top-level json keys to snake_case. 147 | local_api_spec : collections.OrderedDict or string, optional 148 | The methods on this class are dynamically built from the Service 149 | API specification, which can be retrieved from the /endpoints 150 | endpoint. When local_api_spec is None, the default, this 151 | specification is downloaded the first time APIClient is 152 | instantiated. Alternatively, a local cache of the specification 153 | may be passed as either an OrderedDict or a filename which 154 | points to a json file. 155 | """ 156 | if return_type not in ["snake", "raw"]: 157 | raise ValueError("Return type must be one of 'snake', 'raw'") 158 | self._api_key = api_key 159 | self._service_id = service_id 160 | self._base_url = self.get_base_url() 161 | self._root_path = root_path 162 | self._swagger_path = swagger_path 163 | classes = self.generate_classes_maybe_cached(local_api_spec) 164 | for class_name, klass in classes.items(): 165 | setattr(self, class_name, klass(client=self, return_type=return_type)) 166 | 167 | @lru_cache(maxsize=4) 168 | def get_api_spec(self): 169 | swagger_url = self._base_url + self._swagger_path 170 | 171 | with requests.Session() as sess: 172 | auth_service_session(sess, self) 173 | response = sess.get(swagger_url) 174 | response.raise_for_status() 175 | spec = response.json(object_pairs_hook=OrderedDict) 176 | return spec 177 | 178 | @lru_cache(maxsize=4) 179 | def generate_classes(self): 180 | raw_spec = self.get_api_spec() 181 | spec = JsonRef.replace_refs(raw_spec) 182 | return parse_service_api_spec(spec, root_path=self._root_path) 183 | 184 | def get_base_url(self): 185 | service = _get_service(self) 186 | return service["current_url"] 187 | 188 | def generate_classes_maybe_cached(self, cache): 189 | """Generate class objects either from /endpoints or a local cache.""" 190 | if cache is None: 191 | classes = self.generate_classes() 192 | else: 193 | if isinstance(cache, OrderedDict): 194 | raw_spec = cache 195 | elif isinstance(cache, str): 196 | with open(cache, "r") as f: 197 | raw_spec = json.load(f, object_pairs_hook=OrderedDict) 198 | else: 199 | msg = "cache must be an OrderedDict or str, given {}" 200 | raise ValueError(msg.format(type(cache))) 201 | spec = JsonRef.replace_refs(raw_spec) 202 | classes = parse_service_api_spec(spec, root_path=self._root_path) 203 | return classes 204 | 205 | 206 | def to_camelcase(s): 207 | return _TO_CAMELCASE_REGEX.sub(lambda m: m.group(2).upper(), s) 208 | -------------------------------------------------------------------------------- /src/civis/tests/__init__.py: -------------------------------------------------------------------------------- 1 | from civis.tests.mocks import create_client_mock, create_client_mock_for_container_tests 2 | 3 | __all__ = ["create_client_mock", "create_client_mock_for_container_tests"] 4 | -------------------------------------------------------------------------------- /src/civis/tests/mocks.py: -------------------------------------------------------------------------------- 1 | """Mock client creation and tooling""" 2 | 3 | import warnings 4 | from functools import lru_cache 5 | from unittest import mock 6 | 7 | from civis import APIClient 8 | from civis.resources import API_SPEC_PATH 9 | from civis.response import Response 10 | 11 | 12 | def create_client_mock(cache=None): 13 | """Create an APIClient mock from a cache of the API spec 14 | 15 | Parameters 16 | ---------- 17 | cache : str, optional 18 | Location of the API spec on the local filesystem. 19 | If ``None`` or not given, the default API spec will be used. 20 | 21 | Returns 22 | ------- 23 | mock.Mock 24 | A `Mock` object which looks like an APIClient and which will 25 | error if any method calls have non-existent / misspelled parameters 26 | """ 27 | if cache is None: 28 | cache = API_SPEC_PATH 29 | 30 | # Create a client from the cache. We'll use this for auto-speccing. 31 | real_client = _real_client(cache) 32 | 33 | # Prevent the client from trying to talk to the real API when autospeccing 34 | with mock.patch("requests.Session", mock.MagicMock), warnings.catch_warnings(): 35 | # Ignore deprecation warning from `client.default_credential`. 36 | warnings.simplefilter("ignore", FutureWarning) 37 | mock_client = mock.create_autospec(real_client, spec_set=True) 38 | 39 | return mock_client 40 | 41 | 42 | def create_client_mock_for_container_tests( 43 | script_id=-10, run_id=100, state="succeeded", run_outputs=None, log_outputs=None 44 | ): 45 | """Returns a CivisAPIClient Mock set up for testing methods that use 46 | container scripts. Contains endpoint method mocks and return values 47 | for posting container jobs, retrieving outputs, and reading logs. 48 | Also contains the mocks to cancel the container when the state 49 | is set to 'failed'. 50 | 51 | Parameters 52 | ---------- 53 | script_id: int 54 | Mock-create containers with this ID when calling `post_containers` 55 | or `post_containers_runs`. 56 | run_id: int 57 | Mock-create runs with this ID when calling `post_containers_runs`. 58 | state: str, optional 59 | The reported state of the container run 60 | run_outputs: list, optional 61 | List of Response objects returned as run outputs 62 | log_outputs : list, optional 63 | List of Response objects returned as log outputs 64 | 65 | Returns 66 | ------- 67 | `unittest.mock.Mock` 68 | With scripts endpoints `post_containers`, `post_containers_runs`, 69 | `post_cancel`, and `get_containers_runs` set up. 70 | """ 71 | c = create_client_mock() 72 | 73 | mock_container = Response({"id": script_id}) 74 | c.scripts.post_containers.return_value = mock_container 75 | mock_container_run_start = Response( 76 | {"id": run_id, "container_id": script_id, "state": "queued"} 77 | ) 78 | mock_container_run_json = {"id": run_id, "container_id": script_id, "state": state} 79 | if state == "failed": 80 | mock_container_run_json["error"] = "None" 81 | mock_container_run = Response(mock_container_run_json) 82 | c.scripts.post_containers_runs.return_value = mock_container_run_start 83 | c.scripts.get_containers_runs.return_value = mock_container_run 84 | c.scripts.list_containers_runs_outputs.return_value = run_outputs or [] 85 | c.jobs.list_runs_outputs.return_value = run_outputs or [] 86 | c.jobs.list_runs_logs.return_value = log_outputs or [] 87 | 88 | def change_state_to_cancelled(script_id): 89 | mock_container_run_json = mock_container_run.json() 90 | mock_container_run_json["state"] = "cancelled" 91 | return Response(mock_container_run_json) 92 | 93 | c.scripts.post_cancel.side_effect = change_state_to_cancelled 94 | 95 | return c 96 | 97 | 98 | @lru_cache(maxsize=1) 99 | def _real_client(local_api_spec): 100 | real_client = APIClient(local_api_spec=local_api_spec, api_key="none") 101 | real_client._feature_flags = {"noflag": None} 102 | return real_client 103 | -------------------------------------------------------------------------------- /src/civis/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from civis.utils._jobs import run_job, run_template, job_logs 2 | 3 | __all__ = ["run_job", "run_template", "job_logs"] 4 | -------------------------------------------------------------------------------- /src/civis/utils/_jobs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import operator 3 | import time 4 | from datetime import datetime 5 | 6 | from civis import APIClient 7 | from civis.futures import CivisFuture 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | _FOLLOW_POLL_INTERVAL_SEC = 5 12 | _LOG_REFETCH_CUTOFF_SECONDS = 300 13 | _LOG_REFETCH_COUNT = 100 14 | _LOGS_PER_QUERY = 250 15 | 16 | 17 | def run_job(job_id, client=None, polling_interval=None): 18 | """Run a job. 19 | 20 | Parameters 21 | ---------- 22 | job_id: str or int 23 | The ID of the job. 24 | client: :class:`civis.APIClient`, optional 25 | If not provided, an :class:`civis.APIClient` object will be 26 | created from the :envvar:`CIVIS_API_KEY`. 27 | polling_interval : int or float, optional 28 | The number of seconds between API requests to check whether a result 29 | is ready. 30 | 31 | Returns 32 | ------- 33 | results: :class:`~civis.futures.CivisFuture` 34 | A `CivisFuture` object. 35 | """ 36 | if client is None: 37 | client = APIClient() 38 | run = client.jobs.post_runs(job_id) 39 | return CivisFuture( 40 | client.jobs.get_runs, 41 | (job_id, run["id"]), 42 | client=client, 43 | polling_interval=polling_interval, 44 | poll_on_creation=False, 45 | ) 46 | 47 | 48 | def run_template(id, arguments, JSONValue=False, client=None): 49 | """Run a template and return the results. 50 | 51 | Parameters 52 | ---------- 53 | id: int 54 | The template id to be run. 55 | arguments: dict 56 | Dictionary of arguments to be passed to the template. 57 | JSONValue: bool, optional 58 | If True, will return the JSON output of the template. 59 | If False, will return the file ids associated with the 60 | output results. 61 | client: :class:`civis.APIClient`, optional 62 | If not provided, an :class:`civis.APIClient` object will be 63 | created from the :envvar:`CIVIS_API_KEY`. 64 | 65 | Returns 66 | ------- 67 | output: dict 68 | If JSONValue = False, dictionary of file ids with the keys 69 | being their output names. 70 | If JSONValue = True, JSON dict containing the results of the 71 | template run. Expects only a single JSON result. Will return 72 | nothing if either there is no JSON result or there is more 73 | than 1 JSON result. 74 | 75 | Examples 76 | -------- 77 | >>> # Run template to return file_ids 78 | >>> run_template(my_template_id, arguments=my_dict_of_args) 79 | {'output': 1234567} 80 | >>> # Run template to return JSON output 81 | >>> run_template(my_template_id, arguments=my_dict_of_args, JSONValue=True) 82 | {'result1': 'aaa', 'result2': 123} 83 | """ 84 | if client is None: 85 | client = APIClient() 86 | job = client.scripts.post_custom(id, arguments=arguments) 87 | run = client.scripts.post_custom_runs(job.id) 88 | fut = CivisFuture(client.scripts.get_custom_runs, (job.id, run.id), client=client) 89 | fut.result() 90 | outputs = client.scripts.list_custom_runs_outputs(job.id, run.id) 91 | if JSONValue: 92 | json_output = [o.value for o in outputs if o.object_type == "JSONValue"] 93 | if len(json_output) == 0: 94 | log.warning("No JSON output for template {}".format(id)) 95 | return 96 | if len(json_output) > 1: 97 | log.warning( 98 | "More than 1 JSON output for template {}" 99 | " -- returning only the first one.".format(id) 100 | ) 101 | # Note that the cast to a dict is to convert 102 | # an expected Response object. 103 | return json_output[0].json() 104 | else: 105 | file_ids = {o.name: o.object_id for o in outputs} 106 | return file_ids 107 | 108 | 109 | def _timestamp_from_iso_str(s): 110 | """Return an integer POSIX timestamp for a given ISO date string. 111 | 112 | Note: Until Python 3.11, datetime.fromisoformat doesn't work 113 | with the format returned by Civis Platform. 114 | """ 115 | try: 116 | return datetime.fromisoformat(s).timestamp() 117 | except ValueError: 118 | try: 119 | # This is the format that Civis Platform returns. 120 | return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f%z").timestamp() 121 | except ValueError: 122 | # Another format, just in case. 123 | return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z").timestamp() 124 | 125 | 126 | def _compute_effective_max_log_id(logs): 127 | """Find a max log ID use in order to avoid missing late messages. 128 | 129 | The order of log IDs may not be consistent with "created at" times 130 | since log entries are created by Civis Platform as well as the code 131 | for the job itself. This function looks through recent logs 132 | and finds a maximum ID that is at least as old as a set cutoff period, 133 | so that messages with lower IDs that show up a bit late won't be skipped. 134 | With this, it is still theoretically possible but extremely unlikely 135 | for some late log messages to be skipped in the job_logs function. 136 | """ 137 | if not logs: 138 | return 0 139 | 140 | sorted_logs = sorted(logs, key=operator.itemgetter("id")) 141 | 142 | max_created_at_timestamp = _timestamp_from_iso_str(sorted_logs[-1]["createdAt"]) 143 | cutoff = time.time() - _LOG_REFETCH_CUTOFF_SECONDS 144 | if max_created_at_timestamp < cutoff: 145 | return sorted_logs[-1]["id"] 146 | elif len(sorted_logs) >= _LOG_REFETCH_COUNT: 147 | return sorted_logs[-_LOG_REFETCH_COUNT]["id"] 148 | 149 | return 0 150 | 151 | 152 | def _job_finished_past_timeout(job_id, run_id, finished_timeout, raw_client): 153 | """Return true if the run finished more than so many seconds ago.""" 154 | if finished_timeout is None: 155 | return False 156 | 157 | run = raw_client.jobs.get_runs(job_id, run_id) 158 | finished_at = run.json()["finishedAt"] 159 | if finished_at is None: 160 | return False 161 | finished_at_ts = _timestamp_from_iso_str(finished_at) 162 | result = finished_at_ts < time.time() - finished_timeout 163 | return result 164 | 165 | 166 | def job_logs(job_id, run_id=None, finished_timeout=None): 167 | """Return a generator of log message dictionaries for a given run. 168 | 169 | Parameters 170 | ---------- 171 | job_id : int 172 | The ID of the job to retrieve log message for. 173 | run_id : int or None 174 | The ID of the run to retrieve log messages for. 175 | If None, the ID for the most recent run will be used. 176 | finished_timeout: int or None 177 | If not None, then this function will return once the run has 178 | been finished for the specified number of seconds. 179 | If None, then this function will wait until the API says there 180 | will be no more new log messages, which may take a few minutes. 181 | A timeout of 30-60 seconds is usually enough to retrieve all 182 | log messages. 183 | 184 | Yields 185 | ------ 186 | dict 187 | A log message dictionary with "message", "createdAt" and other attributes 188 | provided by the job logs endpoint. Note that this will block execution 189 | until the job has stopped and all log messages are retrieved. 190 | 191 | Examples 192 | -------- 193 | >>> # Print all log messages from a job's most recent run 194 | >>> for log in job_logs(job_id=123456): 195 | ... print(f"{log['createdAt']}: {log['message']}") 196 | ... 197 | >>> # Get logs from a specific run with a 30 second timeout 198 | >>> for log in job_logs(job_id=123456, run_id=789, finished_timeout=30): 199 | ... print(log['message']) 200 | """ 201 | # The return_type for the client is "raw" in order to check 202 | # the "civis-cache-control" and "civis-max-id" headers when 203 | # list_runs_logs returns an empty list of new messages. 204 | # Caching of the endpoint information in 205 | # civis.resources.generate_classes_maybe_cached avoids extra API calls. 206 | raw_client = APIClient(return_type="raw") 207 | 208 | if run_id is None: 209 | run_id = raw_client.jobs.list_runs( 210 | job_id, limit=1, order="id", order_dir="desc" 211 | ).json()[0]["id"] 212 | 213 | local_max_log_id = 0 214 | continue_polling = True 215 | 216 | known_log_ids = set() 217 | 218 | while continue_polling: 219 | # This call gets a limited number of log messages since last_id, 220 | # ordered by log ID. 221 | response = raw_client.jobs.list_runs_logs( 222 | job_id, 223 | run_id, 224 | last_id=local_max_log_id, 225 | limit=_LOGS_PER_QUERY, 226 | ) 227 | if "civis-max-id" in response.headers: 228 | remote_max_log_id = int(response.headers["civis-max-id"]) 229 | else: 230 | # Platform hasn't seen any logs at all yet 231 | remote_max_log_id = None 232 | logs = response.json() 233 | if logs: 234 | local_max_log_id = max(log["id"] for log in logs) 235 | logs.sort(key=operator.itemgetter("createdAt", "id")) 236 | for log in logs: 237 | if log["id"] in known_log_ids: 238 | continue 239 | known_log_ids.add(log["id"]) 240 | yield log 241 | 242 | log_finished = response.headers["civis-cache-control"] != "no-store" 243 | 244 | if remote_max_log_id is None: 245 | remote_has_more_logs_to_get_now = False 246 | elif local_max_log_id == remote_max_log_id: 247 | remote_has_more_logs_to_get_now = False 248 | local_max_log_id = _compute_effective_max_log_id(logs) 249 | if log_finished or _job_finished_past_timeout( 250 | job_id, run_id, finished_timeout, raw_client 251 | ): 252 | continue_polling = False 253 | else: 254 | remote_has_more_logs_to_get_now = True 255 | 256 | if continue_polling and not remote_has_more_logs_to_get_now: 257 | time.sleep(_FOLLOW_POLL_INTERVAL_SEC) 258 | -------------------------------------------------------------------------------- /src/civis/workflows/__init__.py: -------------------------------------------------------------------------------- 1 | from ._validate import validate_workflow_yaml, WorkflowValidationError 2 | 3 | 4 | __all__ = ["validate_workflow_yaml", "WorkflowValidationError"] 5 | -------------------------------------------------------------------------------- /src/civis/workflows/_schemas.py: -------------------------------------------------------------------------------- 1 | """Schemas for Civis Platform workflow definitions. 2 | 3 | Civis Platform workflows: 4 | https://support.civisanalytics.com/hc/en-us/articles/115004172983-Workflows-Basics 5 | 6 | Mistral Workflow Language v2: 7 | https://docs.openstack.org/mistral/latest/user/wf_lang_v2.html#workflows 8 | """ 9 | 10 | from __future__ import annotations 11 | 12 | import inspect 13 | 14 | from civis import APIClient 15 | from civis.resources import API_SPEC_PATH 16 | 17 | 18 | _CLIENT = APIClient(local_api_spec=API_SPEC_PATH, api_key="no-key-needed") 19 | 20 | 21 | def _endpoint_method_params(endpoint: str, method: str) -> tuple[list[str], list[str]]: 22 | endpt = getattr(_CLIENT, endpoint) 23 | meth = getattr(endpt, method) 24 | method_params = inspect.signature(meth).parameters 25 | required, optional = [], [] 26 | for name, param in method_params.items(): 27 | if param.default == inspect.Parameter.empty: 28 | required.append(name) 29 | else: 30 | optional.append(name) 31 | return required, optional 32 | 33 | 34 | def _if_then_create_script(action: str) -> dict: 35 | endpoint, script_type = action.removeprefix("civis.").split(".") 36 | if script_type == "container": 37 | script_type = "containers" 38 | required, optional = _endpoint_method_params(endpoint, f"post_{script_type}") 39 | if "name" in required: 40 | # Civis Platform allows the workflow task name to be the script name. 41 | required.remove("name") 42 | optional.append("name") 43 | return { 44 | "if": {"properties": {"action": {"const": action}}}, 45 | "then": { 46 | "properties": { 47 | "input": { 48 | "type": "object", 49 | # Although we have type annotations for each key name, 50 | # leave the value unspecified as {} to allow YAQL expressions. 51 | "properties": {name: {} for name in required + optional}, 52 | "required": required, 53 | "additionalProperties": False, 54 | }, 55 | }, 56 | }, 57 | } 58 | 59 | 60 | def _if_then_import() -> dict: 61 | required_post, optional_post = _endpoint_method_params("imports", "post") 62 | required_post_syncs, optional_post_syncs = _endpoint_method_params( 63 | "imports", "post_syncs" 64 | ) 65 | if "name" in required_post: 66 | # Civis Platform allows the workflow task name to be the script name. 67 | required_post.remove("name") 68 | optional_post.append("name") 69 | if "id" in required_post_syncs: 70 | # The "id" will come from the job ID of the "post" call. 71 | required_post_syncs.remove("id") 72 | properties = { 73 | **{name: {} for name in required_post + optional_post}, 74 | "syncs": { 75 | "type": "array", 76 | "items": { 77 | "type": "object", 78 | # Although we have type annotations for each key name, 79 | # leave the value unspecified as {} to allow YAQL expressions. 80 | "properties": { 81 | name: {} for name in required_post_syncs + optional_post_syncs 82 | }, 83 | "required": required_post_syncs, 84 | "additionalProperties": False, 85 | }, 86 | }, 87 | } 88 | return { 89 | "if": {"properties": {"action": {"const": "civis.import"}}}, 90 | "then": { 91 | "properties": { 92 | "input": { 93 | "type": "object", 94 | "properties": properties, 95 | "required": required_post, 96 | "additionalProperties": False, 97 | }, 98 | }, 99 | }, 100 | } 101 | 102 | 103 | def _if_then_execute(action: str, id_name: str) -> dict: 104 | return { 105 | "if": {"properties": {"action": {"const": action}}}, 106 | "then": { 107 | "properties": { 108 | "input": { 109 | "type": "object", 110 | # Although the ID should be an integer, 111 | # leave it unspecified as {} to allow YAQL expressions. 112 | "properties": {id_name: {}}, 113 | "required": [id_name], 114 | "additionalProperties": False, 115 | }, 116 | }, 117 | }, 118 | } 119 | 120 | 121 | TASK_TRANSITION_SCHEMA = { 122 | "oneOf": [ 123 | # A single task name. 124 | {"type": "string"}, 125 | # A list of either (i) task names to transition to, or 126 | # (ii) task names that each have a YAQL guard expression, or 127 | # a mixture of (i) and (ii). 128 | {"type": "array", "items": {"oneOf": [{"type": "string"}, {"type": "object"}]}}, 129 | # A single task name or a list of task names under the (optional?) key "next". 130 | { 131 | "type": "object", 132 | "properties": { 133 | "next": { 134 | "oneOf": [ 135 | {"type": "string"}, 136 | {"type": "array", "items": {"type": "string"}}, 137 | {"type": "array", "items": {"type": "object"}}, 138 | ], 139 | }, 140 | }, 141 | }, 142 | ], 143 | } 144 | 145 | TASK_SCHEMA = { 146 | "$schema": "https://json-schema.org/draft/2020-12/schema", 147 | "type": "object", 148 | "properties": { 149 | "name": { 150 | "type": "string", 151 | "maxLength": 255, 152 | "not": {"enum": ["noop", "fail", "succeed", "pause"]}, 153 | }, 154 | "description": {"type": "string"}, 155 | "action": { 156 | "type": "string", 157 | "enum": [ 158 | "civis.scripts.python3", 159 | "civis.scripts.r", 160 | "civis.scripts.sql", 161 | "civis.scripts.javascript", 162 | "civis.scripts.container", 163 | "civis.scripts.dbt", 164 | "civis.scripts.custom", 165 | "civis.enhancements.cass_ncoa", 166 | "civis.import", 167 | "civis.run_job", 168 | "civis.workflows.execute", 169 | "std.async_noop", 170 | "std.echo", 171 | "std.fail", 172 | "std.noop", 173 | ], 174 | }, 175 | "input": {"type": "object"}, 176 | "publish": {"type": "object"}, 177 | "publish-on-error": {"type": "object"}, 178 | "on-success": TASK_TRANSITION_SCHEMA, 179 | "on-error": TASK_TRANSITION_SCHEMA, 180 | "on-complete": TASK_TRANSITION_SCHEMA, 181 | "join": { 182 | "oneOf": [ 183 | {"const": "all"}, 184 | {"type": "integer", "minimum": 1}, 185 | ], 186 | }, 187 | "requires": {"type": "array"}, 188 | "with-items": { 189 | "oneOf": [ 190 | {"type": "string"}, 191 | {"type": "array", "items": {"type": "string"}}, 192 | ], 193 | }, 194 | "keep-result": {"type": "boolean"}, 195 | "target": {"type": "string"}, 196 | "pause-before": {"type": "boolean"}, 197 | "wait-before": {"type": "number", "minimum": 0}, 198 | "wait-after": {"type": "number", "minimum": 0}, 199 | "fail-on": {"type": "string"}, 200 | "timeout": {"type": "number", "minimum": 0}, 201 | "retry": { 202 | "oneOf": [ 203 | {"type": "string"}, 204 | { 205 | "type": "object", 206 | "properties": { 207 | "count": {"type": "number", "minimum": 0}, 208 | "delay": {"type": "number", "minimum": 0}, 209 | "break-on": {"type": "string"}, 210 | "continue-on": {"type": "string"}, 211 | }, 212 | }, 213 | ], 214 | }, 215 | "concurrency": {"type": "number", "minimum": 1}, 216 | "safe-rerun": {"type": "boolean"}, 217 | }, 218 | "required": ["action"], 219 | "allOf": [ 220 | # If "action" is one of the Civis-defined ones, 221 | # then the allowed properties under "input" closely mirror the relevant 222 | # API endpoint method. 223 | _if_then_create_script("civis.scripts.python3"), 224 | _if_then_create_script("civis.scripts.r"), 225 | _if_then_create_script("civis.scripts.sql"), 226 | _if_then_create_script("civis.scripts.javascript"), 227 | _if_then_create_script("civis.scripts.container"), 228 | _if_then_create_script("civis.scripts.dbt"), 229 | _if_then_create_script("civis.scripts.custom"), 230 | _if_then_create_script("civis.enhancements.cass_ncoa"), 231 | _if_then_execute("civis.run_job", "job_id"), 232 | _if_then_execute("civis.workflows.execute", "workflow_id"), 233 | _if_then_import(), 234 | ], 235 | "additionalProperties": False, 236 | } 237 | 238 | 239 | WORKFLOW_SCHEMA = { 240 | "$schema": "https://json-schema.org/draft/2020-12/schema", 241 | "type": "object", 242 | "properties": {"version": {"const": "2.0"}}, 243 | "patternProperties": { 244 | "^(?:(?!version).)*$": { 245 | "type": "object", 246 | "properties": { 247 | "type": {"type": "string"}, 248 | "description": {"type": "string"}, 249 | "input": { 250 | "type": "array", 251 | "items": {"oneOf": [{"type": "string"}, {"type": "object"}]}, 252 | }, 253 | "output": {}, 254 | "output-on-error": {}, 255 | "task-defaults": { 256 | k: v for k, v in TASK_SCHEMA.items() if k != "required" 257 | }, 258 | "tasks": { 259 | "type": "object", 260 | "patternProperties": {"^.*$": TASK_SCHEMA}, 261 | "minProperties": 1, 262 | }, 263 | }, 264 | "required": ["tasks"], 265 | "additionalProperties": True, # Allow anchor definitions. 266 | }, 267 | }, 268 | "required": ["version"], 269 | "minProperties": 2, 270 | "maxProperties": 2, 271 | } 272 | -------------------------------------------------------------------------------- /src/civis/workflows/_validate.py: -------------------------------------------------------------------------------- 1 | """Validation for Civis Platform workflow definitions.""" 2 | 3 | from __future__ import annotations 4 | 5 | import io 6 | 7 | import jsonschema 8 | import yaml 9 | 10 | from ._schemas import WORKFLOW_SCHEMA 11 | 12 | 13 | _TASK_TRANSITION_ENGINE_COMMANDS = frozenset(["pause", "succeed", "fail"]) 14 | 15 | 16 | class WorkflowValidationError(Exception): 17 | pass 18 | 19 | 20 | def validate_workflow_yaml(wf_def: str, /) -> None: 21 | """Validate a YAML-formatted workflow definition. 22 | 23 | Parameters 24 | ---------- 25 | wf_def : str 26 | YAML-formatted workflow definition. 27 | 28 | Raises 29 | ------ 30 | WorkflowValidationError 31 | If the workflow definition is invalid. 32 | """ 33 | _validate_workflow_yaml_ascii_only(wf_def) 34 | wf_def_dict = yaml.safe_load(wf_def) 35 | _validate_workflow_by_schema(wf_def_dict) 36 | _validate_workflow_tasks(wf_def_dict) 37 | 38 | 39 | def _validate_workflow_by_schema(wf: dict) -> None: 40 | try: 41 | jsonschema.validate(wf, WORKFLOW_SCHEMA) 42 | except jsonschema.ValidationError as e: 43 | raise WorkflowValidationError(e) 44 | 45 | 46 | def _validate_workflow_yaml_ascii_only(wf_def: str) -> None: 47 | for line_no, line in enumerate(io.StringIO(wf_def), 1): 48 | for char_no, char in enumerate(line, 1): 49 | if not char.isascii(): 50 | raise WorkflowValidationError( 51 | "Workflow definition YAML cannot contain non-ASCII characters: " 52 | f"(line {line_no}) {line!r}, (character {char_no}) {char!r}" 53 | ) 54 | 55 | 56 | def _get_next_task_names(next_tasks: str | list | dict | None) -> list[str]: 57 | """Next task names under {'on-success', 'on-error', 'on-complete'}.""" 58 | if next_tasks is None: 59 | return [] 60 | elif isinstance(next_tasks, str): 61 | return [next_tasks] 62 | elif isinstance(next_tasks, list): 63 | task_names = [] 64 | for task in next_tasks: 65 | if isinstance(task, str): 66 | task_names.append(task) 67 | elif isinstance(task, dict): 68 | task_names.append(list(task.keys())[0]) 69 | else: 70 | raise WorkflowValidationError( 71 | "each item in next task list must be either of type str or dict: " 72 | f"{type(task)} ({task})" 73 | ) 74 | return task_names 75 | elif isinstance(next_tasks, dict): 76 | return _get_next_task_names(next_tasks.get("next")) 77 | 78 | 79 | def _validate_workflow_tasks(wf: dict) -> None: 80 | """Custom checks for workflow tasks that aren't amenable to jsonschema validation""" 81 | key = None 82 | for k in wf.keys(): 83 | if k != "version": 84 | key = k 85 | break 86 | try: 87 | tasks = wf[key]["tasks"] 88 | except KeyError: 89 | raise WorkflowValidationError("No workflow tasks found") 90 | for task_name, task in tasks.items(): 91 | recognized_task_names = set(tasks.keys()) | _TASK_TRANSITION_ENGINE_COMMANDS 92 | for next_task_group_name in ("on-success", "on-error", "on-complete"): 93 | next_task_names = _get_next_task_names(task.get(next_task_group_name)) 94 | if task_name in next_task_names: 95 | raise WorkflowValidationError( 96 | "A task cannot transition to itself. " 97 | f"{task_name!r} transitions to itself in {next_task_group_name!r}." 98 | ) 99 | for next_task_name in next_task_names: 100 | if next_task_name not in recognized_task_names: 101 | raise WorkflowValidationError( 102 | f"Task {task_name!r} transitions " 103 | f"to an undefined task {next_task_name!r} " 104 | f"in {next_task_group_name!r}." 105 | ) 106 | -------------------------------------------------------------------------------- /tests/petstore.json: -------------------------------------------------------------------------------- 1 | { 2 | "swagger": "2.0", 3 | "info": { 4 | "version": "1.0.0", 5 | "title": "Swagger Petstore", 6 | "license": { 7 | "name": "MIT" 8 | } 9 | }, 10 | "host": "petstore.swagger.io", 11 | "basePath": "/v1", 12 | "schemes": [ 13 | "http" 14 | ], 15 | "consumes": [ 16 | "application/json" 17 | ], 18 | "produces": [ 19 | "application/json" 20 | ], 21 | "paths": { 22 | "/pets": { 23 | "get": { 24 | "summary": "List all pets", 25 | "operationId": "listPets", 26 | "tags": [ 27 | "pets" 28 | ], 29 | "parameters": [ 30 | { 31 | "name": "limit", 32 | "in": "query", 33 | "description": "How many items to return at one time (max 100)", 34 | "required": false, 35 | "type": "integer", 36 | "format": "int32" 37 | } 38 | ], 39 | "responses": { 40 | "200": { 41 | "description": "An paged array of pets", 42 | "headers": { 43 | "x-next": { 44 | "type": "string", 45 | "description": "A link to the next page of responses" 46 | } 47 | }, 48 | "schema": { 49 | "$ref": "#/definitions/Pets" 50 | } 51 | }, 52 | "default": { 53 | "description": "unexpected error", 54 | "schema": { 55 | "$ref": "#/definitions/Error" 56 | } 57 | } 58 | } 59 | }, 60 | "post": { 61 | "summary": "Create a pet", 62 | "operationId": "createPets", 63 | "tags": [ 64 | "pets" 65 | ], 66 | "responses": { 67 | "201": { 68 | "description": "Null response" 69 | }, 70 | "default": { 71 | "description": "unexpected error", 72 | "schema": { 73 | "$ref": "#/definitions/Error" 74 | } 75 | } 76 | } 77 | } 78 | }, 79 | "/pets/{petId}": { 80 | "get": { 81 | "summary": "Info for a specific pet", 82 | "operationId": "showPetById", 83 | "tags": [ 84 | "pets" 85 | ], 86 | "parameters": [ 87 | { 88 | "name": "petId", 89 | "in": "path", 90 | "required": true, 91 | "description": "The id of the pet to retrieve", 92 | "type": "string" 93 | } 94 | ], 95 | "responses": { 96 | "200": { 97 | "description": "Expected response to a valid request", 98 | "schema": { 99 | "$ref": "#/definitions/Pets" 100 | } 101 | }, 102 | "default": { 103 | "description": "unexpected error", 104 | "schema": { 105 | "$ref": "#/definitions/Error" 106 | } 107 | } 108 | } 109 | } 110 | } 111 | }, 112 | "definitions": { 113 | "Pet": { 114 | "required": [ 115 | "id", 116 | "name" 117 | ], 118 | "properties": { 119 | "id": { 120 | "type": "integer", 121 | "format": "int64" 122 | }, 123 | "name": { 124 | "type": "string" 125 | }, 126 | "tag": { 127 | "type": "string" 128 | } 129 | } 130 | }, 131 | "Pets": { 132 | "type": "array", 133 | "items": { 134 | "$ref": "#/definitions/Pet" 135 | } 136 | }, 137 | "Error": { 138 | "required": [ 139 | "code", 140 | "message" 141 | ], 142 | "properties": { 143 | "code": { 144 | "type": "integer", 145 | "format": "int32" 146 | }, 147 | "message": { 148 | "type": "string" 149 | } 150 | } 151 | } 152 | } 153 | } -------------------------------------------------------------------------------- /tests/test_base.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | from json.decoder import JSONDecodeError 3 | 4 | import pytest 5 | import requests 6 | 7 | from civis.base import Endpoint, get_base_url, CivisAPIError 8 | 9 | 10 | def test_base_url_default(): 11 | assert get_base_url() == "https://api.civisanalytics.com/" 12 | 13 | 14 | def test_base_url_from_env(): 15 | custom_url = "https://api1.civisanalytics.com" 16 | with mock.patch.dict("os.environ", {"CIVIS_API_ENDPOINT": custom_url}): 17 | assert get_base_url() == custom_url + "/" 18 | 19 | 20 | @mock.patch("civis.base.get_base_url", return_value="https://base.api.url/") 21 | def test_endpoint_base_url(mock_get_base_url): 22 | session = mock.MagicMock(spec=requests.Session) 23 | endpoint = Endpoint(session, "client") 24 | 25 | assert endpoint._base_url == "https://base.api.url/" 26 | 27 | 28 | def test_store_last_response(): 29 | mock_client = mock.Mock() 30 | endpoint = Endpoint({}, client=mock_client, return_type="raw") 31 | 32 | returned_resp = {"value": "response"} 33 | endpoint._make_request = mock.Mock(return_value=returned_resp) 34 | 35 | resp = endpoint._call_api("GET") 36 | assert resp == returned_resp 37 | assert mock_client.last_response is resp 38 | 39 | 40 | def test_civis_api_error_empty_response(): 41 | # Fake response object, try to trigger error 42 | # Make sure response.json() gets the JSON decode error 43 | response = requests.Response() 44 | response._content = b"foobar" 45 | with pytest.raises(JSONDecodeError): 46 | response.json() 47 | 48 | error = CivisAPIError(response) 49 | assert error.error_message == "No Response Content from Civis API" 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "source_params, expected_params", 54 | [ 55 | ({}, None), 56 | (None, None), 57 | ( 58 | {"foo": 123, "bar": "hello", "baz": {"a": 1, "b": 2}}, 59 | {"foo": 123, "bar": "hello", "baz": {"a": 1, "b": 2}}, 60 | ), 61 | ({"foo": [1, 2, 3]}, {"foo[]": [1, 2, 3]}), 62 | ({"foo": (1, 2, 3)}, {"foo[]": [1, 2, 3]}), 63 | ({"foo": {1, 2, 3}}, {"foo[]": [1, 2, 3]}), 64 | ], 65 | ) 66 | def test_array_params(source_params, expected_params): 67 | assert Endpoint._handle_array_params(source_params) == expected_params 68 | -------------------------------------------------------------------------------- /tests/test_camel_to_snake.py: -------------------------------------------------------------------------------- 1 | from civis._camel_to_snake import camel_to_snake 2 | 3 | 4 | def test_camel_to_snake(): 5 | test_cases = [ 6 | ("CAMELCase", "camel_case"), 7 | ("camelCase", "camel_case"), 8 | ("CamelCase", "camel_case"), 9 | ("c__amel", "c__amel"), 10 | ] 11 | for in_word, out_word in test_cases: 12 | assert camel_to_snake(in_word) == out_word 13 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import json 3 | import os 4 | import shutil 5 | import warnings 6 | from unittest import mock 7 | 8 | import pytest 9 | 10 | from civis.cli.__main__ import generate_cli, invoke, make_operation_name 11 | from civis.cli._cli_commands import _str_table_result 12 | from civis.resources import API_SPEC_PATH 13 | 14 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 15 | 16 | 17 | def test_civis_command_available(): 18 | command = "civis" 19 | assert shutil.which(command), f"The `{command}` command is not available." 20 | 21 | 22 | @mock.patch("civis.cli.__main__.add_extra_commands") 23 | @mock.patch("civis.cli.__main__.retrieve_spec_dict") 24 | def test_generate_cli_petstore(mock_retrieve_spec_dict, mock_add_extra_commands): 25 | """Test loading the OpenAPI petstore example.""" 26 | 27 | # From https://raw.githubusercontent.com/OAI/OpenAPI-Specification/4b1c1167b99844fd3ca19dc0055bbdb0c5eff094/examples/v2.0/json/petstore.json # noqa: E501 28 | with open(os.path.join(THIS_DIR, "petstore.json")) as f: 29 | petstore_spec = json.load(f) 30 | 31 | mock_retrieve_spec_dict.return_value = petstore_spec 32 | cli = generate_cli() 33 | assert set(cli.commands.keys()) == {"pets"} 34 | assert set(cli.commands["pets"].commands.keys()) == {"list", "post", "get"} 35 | assert {x.name for x in cli.commands["pets"].commands["list"].params} == { 36 | "limit", 37 | "json_output", 38 | } 39 | 40 | 41 | @mock.patch("civis.cli.__main__.retrieve_spec_dict") 42 | def test_generate_cli_civis(mock_retrieve_spec_dict): 43 | """Test loading the Civis API spec as of 2021-12-02.""" 44 | with open(API_SPEC_PATH) as f: 45 | civis_spec = json.load(f, object_pairs_hook=OrderedDict) 46 | mock_retrieve_spec_dict.return_value = civis_spec 47 | 48 | with warnings.catch_warnings(): 49 | warnings.simplefilter("error") 50 | cli = generate_cli() 51 | 52 | # Check a regular command. 53 | list_runs_cmd = cli.commands["scripts"].commands["list-containers-runs"] 54 | expected_names = {"id", "limit", "page_num", "order", "order_dir", "json_output"} 55 | assert {_.name for _ in list_runs_cmd.params} == expected_names 56 | assert list_runs_cmd.params[0].name == "id" 57 | assert list_runs_cmd.params[0].required 58 | assert list_runs_cmd.params[1].name == "limit" 59 | assert not list_runs_cmd.params[1].required 60 | 61 | # Check that the extra files upload command was added 62 | expected_names = {"path", "name", "expires_at", "description"} 63 | files_upload_params = cli.commands["files"].commands["upload"].params 64 | assert {_.name for _ in files_upload_params} == expected_names 65 | 66 | # Check that the POST queries command, which uses an object in the body, 67 | # was parsed properly. 68 | pq_params = cli.commands["queries"].commands["post"].params 69 | expected_names = { 70 | "column_delimiter", 71 | "compression", 72 | "credential", 73 | "database", 74 | "filename_prefix", 75 | "include_header", 76 | "interactive", 77 | "preview_rows", 78 | "sql", 79 | "unquoted", 80 | "json_output", 81 | "hidden", 82 | } 83 | assert {_.name for _ in pq_params} == expected_names 84 | for p in pq_params: 85 | if p.name == "filename_prefix": 86 | assert not p.required 87 | if p.name == "database": 88 | assert p.required 89 | 90 | 91 | @mock.patch("civis.cli.__main__.open_session", autospec=True) 92 | def test_blank_output(mock_session): 93 | """ 94 | Test that endpoints that return blank results don't cause exceptions. 95 | """ 96 | # The response object's json method will raise a ValueError when the output 97 | # is blank. 98 | session_context = mock_session.return_value.__enter__.return_value 99 | session_context.send.return_value.json.side_effect = ValueError() 100 | session_context.send.return_value.status_code = 200 101 | 102 | op = {"parameters": []} 103 | with pytest.raises(SystemExit) as pytest_wrapped_e: 104 | invoke("WIBBLE", "/wobble/wubble", op) 105 | 106 | assert pytest_wrapped_e.type == SystemExit 107 | assert pytest_wrapped_e.value.code == 0 108 | 109 | 110 | @mock.patch("civis.cli.__main__.open_session", autospec=True) 111 | def test_failure_exit_code(mock_session): 112 | """ 113 | Test that we return a nonzero exit code when the API request fails. 114 | """ 115 | # first test that we get a zero exit code when the API request succeeds 116 | session_context = mock_session.return_value.__enter__.return_value 117 | session_context.send.return_value.json.side_effect = ValueError() 118 | session_context.send.return_value.status_code = 200 119 | 120 | op = {"parameters": []} 121 | 122 | with pytest.raises(SystemExit) as pytest_wrapped_e: 123 | invoke("WIBBLE", "/wobble/wubble", op) 124 | assert pytest_wrapped_e.value.code == 0 125 | 126 | # now test that we get a nonzero exit code when the API request fails 127 | session_context.send.return_value.status_code = 404 128 | 129 | with pytest.raises(SystemExit) as pytest_wrapped_e: 130 | invoke("WIBBLE", "/wobble/wubble", op) 131 | assert pytest_wrapped_e.value.code != 0 132 | 133 | 134 | @mock.patch("civis.cli.__main__.open_session", autospec=True) 135 | @mock.patch("civis.cli.__main__.Request", autospec=True) 136 | def test_parameter_case(mock_request, mock_session): 137 | """ 138 | Test that parameter names are sent in camelCase rather than snake_case. 139 | """ 140 | api_response = {"key": "value"} 141 | session_context = mock_session.return_value.__enter__.return_value 142 | session_context.send.return_value.json.return_value = api_response 143 | session_context.send.return_value.status_code = 200 144 | 145 | # To avoid needing CIVIS_API_KEY set in the environment. 146 | op = { 147 | "parameters": [ 148 | {"name": "firstParameter", "in": "query"}, 149 | {"name": "secondParameter", "in": "query"}, 150 | ] 151 | } 152 | with pytest.raises(SystemExit): 153 | invoke( 154 | "WIBBLE", "/wobble/wubble", op, first_parameter="a", second_parameter="b" 155 | ) 156 | 157 | mock_session.call_args[1]["user_agent"] = "civis-cli" 158 | 159 | mock_request.assert_called_with( 160 | url="https://api.civisanalytics.com/wobble/wubble", 161 | json={}, 162 | params={"firstParameter": "a", "secondParameter": "b"}, 163 | method="WIBBLE", 164 | ) 165 | 166 | 167 | @pytest.mark.parametrize( 168 | "path,method,resource_name,exp", 169 | [ 170 | ("/imports/files/{id}/runs/{run_id}", "get", "imports", "get-files-runs"), 171 | ("/aliases/{object_type}/{alias}", "get", "aliases", "get-object-type"), 172 | ("/workflows/", "get", "workflows", "list"), 173 | ("/results/{id}/grants", "delete", "results", "delete-grants"), 174 | ], 175 | ) 176 | def test_make_operation_name(path, method, resource_name, exp): 177 | assert make_operation_name(path, method, resource_name) == exp 178 | 179 | 180 | def test_str_table_result(): 181 | cols = ["a", "snake!"] 182 | rows = [["2", "3"], ["1.1", None]] 183 | 184 | out = _str_table_result(cols, rows) 185 | assert out == "a | snake!\n------------\n 2 | 3\n1.1 | " 186 | -------------------------------------------------------------------------------- /tests/test_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import OrderedDict 3 | from unittest import mock 4 | 5 | import pytest 6 | 7 | from civis import APIClient 8 | from civis.resources import API_SPEC_PATH 9 | 10 | 11 | with open(API_SPEC_PATH) as f: 12 | API_SPEC = json.load(f, object_pairs_hook=OrderedDict) 13 | 14 | 15 | class FakeUsersEndpoint: 16 | def list_me(self): 17 | return {"feature_flags": {"foo": True, "bar": True, "baz": False}} 18 | 19 | 20 | @mock.patch("civis.resources._resources.get_api_spec", return_value=API_SPEC) 21 | def test_feature_flags(mock_spec): 22 | client = APIClient() 23 | setattr(client, "users", FakeUsersEndpoint()) 24 | 25 | assert client.feature_flags == ("foo", "bar") 26 | 27 | 28 | @mock.patch("civis.resources._resources.get_api_spec", return_value=API_SPEC) 29 | def test_feature_flags_memoized(mock_spec): 30 | client = APIClient() 31 | setattr(client, "users", FakeUsersEndpoint()) 32 | with mock.patch.object(client.users, "list_me", wraps=client.users.list_me): 33 | client.feature_flags 34 | client.feature_flags 35 | assert client.users.list_me.call_count == 1 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "schema_tablename", ["foo.bar", '"foo".bar', 'foo."bar"', '"foo"."bar"'] 40 | ) 41 | def test_get_table_id(schema_tablename): 42 | """Check that get_table_id handles quoted schema.tablename correctly.""" 43 | client = APIClient(local_api_spec=API_SPEC, api_key="none") 44 | client.get_database_id = mock.Mock(return_value=123) 45 | 46 | mock_tables = mock.MagicMock() 47 | mock_tables.__getitem__.side_effect = {0: mock.Mock()}.__getitem__ 48 | 49 | client.tables.list = mock.Mock(return_value=mock_tables) 50 | 51 | client.get_table_id(table=schema_tablename, database=123) 52 | 53 | client.tables.list.assert_called_once_with( 54 | database_id=123, schema="foo", name="bar" 55 | ) 56 | 57 | 58 | def test_get_storage_host_id(): 59 | client = APIClient(local_api_spec=API_SPEC, api_key="none") 60 | 61 | class StorageHost: 62 | def __init__(self, id, name): 63 | self.id = id 64 | self.name = name 65 | 66 | def __getitem__(self, key): 67 | return getattr(self, key) 68 | 69 | storage_hosts = [StorageHost(1234, "test"), StorageHost(5678, "othertest")] 70 | client.storage_hosts.list = mock.Mock(return_value=storage_hosts) 71 | 72 | assert client.get_storage_host_id("test") == 1234 73 | 74 | client.storage_hosts.list.assert_called_once_with() 75 | 76 | assert client.get_storage_host_id(4732) == 4732 77 | with pytest.raises(ValueError, match="Storage Host invalidname not found"): 78 | client.get_storage_host_id("invalidname") 79 | -------------------------------------------------------------------------------- /tests/test_deprecate.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from civis import _deprecation 4 | 5 | import pytest 6 | 7 | 8 | def adder(param1, param2=0, param3=0): 9 | return param1 + param2 + param3 10 | 11 | 12 | def test_deprecate_kwarg(): 13 | # Verify that we get a warning if the deprecated parameter is 14 | # used as a keyword argument. 15 | decorated_func = _deprecation.deprecate_param("v2.0.0", "param2")(adder) 16 | 17 | with pytest.warns(FutureWarning) as record: 18 | output = decorated_func(1, param2=3, param3=5) 19 | 20 | assert output == 9, "The function should still give the expected output." 21 | assert len(record) == 1, "Only one warning should be raised." 22 | assert ( 23 | "v2.0.0" in record[0].message.args[0] 24 | ), "The warning should mention the removal version." 25 | assert ( 26 | "param2" in record[0].message.args[0] 27 | ), "The warning should mention the deprecated parameter." 28 | assert ( 29 | __name__ + ".adder" in record[0].message.args[0] 30 | ), "The warning should mention the function name." 31 | 32 | 33 | def test_deprecate_multiple_kwarg(): 34 | # Verify that we get a warning if the deprecated parameter is 35 | # used as a keyword argument. 36 | decorated_func = _deprecation.deprecate_param("v2.0.0", "param2", "param3")(adder) 37 | 38 | with pytest.warns(FutureWarning) as record: 39 | output = decorated_func(1, param2=3, param3=5) 40 | 41 | assert output == 9, "The function should still give the expected output." 42 | assert len(record) == 1, "Only one warning should be raised." 43 | assert ( 44 | "v2.0.0" in record[0].message.args[0] 45 | ), "The warning should mention the removal version." 46 | assert ( 47 | "param2" in record[0].message.args[0] 48 | ), "The warning should mention the first deprecated parameter." 49 | assert ( 50 | "param3" in record[0].message.args[0] 51 | ), "The warning should mention the second deprecated parameter." 52 | assert ( 53 | __name__ + ".adder" in record[0].message.args[0] 54 | ), "The warning should mention the function name." 55 | 56 | 57 | def test_deprecate_pos_arg(): 58 | # Verify that we get a warning if the deprecated parameter is 59 | # used as a positional argument. 60 | decorated_func = _deprecation.deprecate_param("v2.0.0", "param2")(adder) 61 | 62 | with pytest.warns(FutureWarning) as record: 63 | output = decorated_func(1, 3, 5) 64 | 65 | assert output == 9, "The function should still give the expected output." 66 | assert len(record) == 1, "Only one warning should be raised." 67 | assert ( 68 | "v2.0.0" in record[0].message.args[0] 69 | ), "The warning should mention the removal version." 70 | assert ( 71 | "param2" in record[0].message.args[0] 72 | ), "The warning should mention the deprecated parameter." 73 | assert ( 74 | __name__ + ".adder" in record[0].message.args[0] 75 | ), "The warning should mention the function name." 76 | 77 | 78 | def test_deprecate_multiple_pos_arg(): 79 | # Verify that we get a warning if the deprecated parameter is 80 | # used as a positional argument. 81 | decorated_func = _deprecation.deprecate_param("v2.0.0", "param2", "param3")(adder) 82 | 83 | with pytest.warns(FutureWarning) as record: 84 | output = decorated_func(1, 3, 5) 85 | 86 | assert output == 9, "The function should still give the expected output." 87 | assert len(record) == 1, "Only one warning should be raised." 88 | assert ( 89 | "v2.0.0" in record[0].message.args[0] 90 | ), "The warning should mention the removal version." 91 | assert ( 92 | "param2" in record[0].message.args[0] 93 | ), "The warning should mention the first deprecated parameter." 94 | assert ( 95 | "param3" in record[0].message.args[0] 96 | ), "The warning should mention the second deprecated parameter." 97 | assert ( 98 | __name__ + ".adder" in record[0].message.args[0] 99 | ), "The warning should mention the function name." 100 | 101 | 102 | def test_deprecate_no_warning(): 103 | # Verify that we don't see a warning if we don't use the 104 | # deprecated parameter. 105 | decorated_func = _deprecation.deprecate_param("v2.0.0", "param2")(adder) 106 | 107 | with warnings.catch_warnings(): 108 | warnings.simplefilter("error") 109 | output = decorated_func(1, param3=5) 110 | 111 | assert output == 6, "The function should still give the expected output." 112 | -------------------------------------------------------------------------------- /tests/test_loggers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from uuid import uuid4 3 | 4 | from civis import civis_logger 5 | 6 | 7 | def _get_test_logger(*args, **kwargs): 8 | # Need to use a logger of a different name in each test function, 9 | # or else we'd hit this issue: 10 | # https://github.com/pytest-dev/pytest/issues/5577 11 | logger = civis_logger(name=str(uuid4()), *args, **kwargs) 12 | # Set `propagate` back to `True`, 13 | # or else all the logging/caplog tests would fail. 14 | logger.propagate = True 15 | return logger 16 | 17 | 18 | def test_civis_logger_base_case(caplog, capsys): 19 | log = _get_test_logger() 20 | caplog.set_level(log.level) 21 | 22 | log.debug("debug level") 23 | log.info("this is info level") 24 | log.warning("warning!") 25 | log.error("error!!") 26 | 27 | actual_logs = [(rec.levelname, rec.message) for rec in caplog.records] 28 | expected_logs = [ 29 | ("INFO", "this is info level"), 30 | ("WARNING", "warning!"), 31 | ("ERROR", "error!!"), 32 | ] 33 | assert actual_logs == expected_logs 34 | 35 | captured = capsys.readouterr() 36 | assert captured.out == "this is info level\n" 37 | assert captured.err == "warning!\nerror!!\n" 38 | 39 | 40 | def test_civis_logger_set_to_debug_level(caplog, capsys): 41 | log = _get_test_logger(level=logging.DEBUG) 42 | caplog.set_level(log.level) 43 | 44 | log.debug("debug level") 45 | log.info("this is info level") 46 | log.warning("warning!") 47 | log.error("error!!") 48 | 49 | actual_logs = [(rec.levelname, rec.message) for rec in caplog.records] 50 | expected_logs = [ 51 | ("DEBUG", "debug level"), 52 | ("INFO", "this is info level"), 53 | ("WARNING", "warning!"), 54 | ("ERROR", "error!!"), 55 | ] 56 | assert actual_logs == expected_logs 57 | 58 | captured = capsys.readouterr() 59 | assert captured.out == "debug level\nthis is info level\n" 60 | assert captured.err == "warning!\nerror!!\n" 61 | 62 | 63 | def test_civis_logger_fmt_from_str(caplog, capsys): 64 | log = _get_test_logger(fmt="%(levelname)s:%(message)s") 65 | caplog.set_level(log.level) 66 | 67 | log.debug("debug level") 68 | log.info("this is info level") 69 | log.warning("warning!") 70 | log.error("error!!") 71 | 72 | actual_logs = [(rec.levelname, rec.message) for rec in caplog.records] 73 | expected_logs = [ 74 | ("INFO", "this is info level"), 75 | ("WARNING", "warning!"), 76 | ("ERROR", "error!!"), 77 | ] 78 | assert actual_logs == expected_logs 79 | 80 | captured = capsys.readouterr() 81 | assert captured.out == "INFO:this is info level\n" 82 | assert captured.err == "WARNING:warning!\nERROR:error!!\n" 83 | 84 | 85 | def test_civis_logger_fmt_from_formatter(caplog, capsys): 86 | fmt = logging.Formatter("%(levelname)s:%(message)s") 87 | log = _get_test_logger(fmt=fmt) 88 | caplog.set_level(log.level) 89 | 90 | log.debug("debug level") 91 | log.info("this is info level") 92 | log.warning("warning!") 93 | log.error("error!!") 94 | 95 | actual_logs = [(rec.levelname, rec.message) for rec in caplog.records] 96 | expected_logs = [ 97 | ("INFO", "this is info level"), 98 | ("WARNING", "warning!"), 99 | ("ERROR", "error!!"), 100 | ] 101 | assert actual_logs == expected_logs 102 | 103 | captured = capsys.readouterr() 104 | assert captured.out == "INFO:this is info level\n" 105 | assert captured.err == "WARNING:warning!\nERROR:error!!\n" 106 | -------------------------------------------------------------------------------- /tests/test_ml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/civisanalytics/civis-python/899fdf5eb470d36a473842242e6b22011b7ec071/tests/test_ml/__init__.py -------------------------------------------------------------------------------- /tests/test_ml/test_helper.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | import pytest 4 | 5 | from civis.response import Response 6 | from civis.ml import ( 7 | list_models, 8 | put_models_shares_groups, 9 | put_models_shares_users, 10 | delete_models_shares_groups, 11 | delete_models_shares_users, 12 | ) 13 | from civis.ml import _helper as helper 14 | from civis.tests.mocks import create_client_mock 15 | 16 | from . import test_model 17 | 18 | 19 | def test_list_models_bad_job_type(): 20 | with pytest.raises(ValueError): 21 | list_models(job_type="fake") 22 | 23 | 24 | def test_list_models(): 25 | resp = [Response({"id": 2834, "name": "RFC model"})] 26 | m_client = create_client_mock() 27 | m_client.aliases.list.return_value = ( 28 | test_model.TEST_TEMPLATE_ID_ALIAS_OBJECTS 29 | ) # noqa 30 | m_client.scripts.list_custom.return_value = resp 31 | out = list_models(job_type="train", client=m_client) 32 | assert out == resp 33 | 34 | out = list_models(job_type="predict", client=m_client) 35 | assert out == resp 36 | 37 | out = list_models(job_type=None, client=m_client) 38 | assert out == resp 39 | 40 | 41 | def _create_share_model_client_mock(run_ids): 42 | m_client = create_client_mock() 43 | m_client.scripts.put_containers_shares_users.return_value = "usershare" 44 | m_client.scripts.put_containers_shares_groups.return_value = "groupshare" 45 | m_client.scripts.delete_containers_shares_users.return_value = "userdel" 46 | m_client.scripts.delete_containers_shares_groups.return_value = "groupdel" 47 | m_client.scripts.list_containers_runs.return_value = [ 48 | Response({"id": _id}) for _id in run_ids 49 | ] 50 | m_client.scripts.list_containers_runs_outputs.return_value = [ 51 | Response({"object_id": 117, "object_type": "File", "name": "fname"}), 52 | Response({"object_id": 31, "object_type": "Project"}), 53 | Response({"object_id": 37, "object_type": "JSONValue"}), 54 | ] 55 | return m_client 56 | 57 | 58 | def test_share_model_users(): 59 | m_client = _create_share_model_client_mock([11]) 60 | 61 | resp = helper._share_model( 62 | 3, [7, 8], "write", "users", client=m_client, send_shared_email=True 63 | ) 64 | assert resp == "usershare" 65 | m_client.scripts.put_containers_shares_users.assert_called_once_with( 66 | 3, [7, 8], "write", send_shared_email=True 67 | ) 68 | m_client.files.put_shares_users.assert_called_once_with( 69 | 117, [7, 8], "write", send_shared_email=False 70 | ) 71 | m_client.projects.put_shares_users.assert_called_once_with( 72 | 31, [7, 8], "write", send_shared_email=False 73 | ) 74 | m_client.json_values.put_shares_users.assert_called_once_with( 75 | 37, [7, 8], "write", send_shared_email=False 76 | ) 77 | 78 | 79 | def test_share_model_groups(): 80 | m_client = _create_share_model_client_mock([11]) 81 | 82 | resp = helper._share_model( 83 | 3, [7, 8], "write", "groups", client=m_client, send_shared_email=True 84 | ) 85 | assert resp == "groupshare" 86 | m_client.scripts.put_containers_shares_groups.assert_called_once_with( 87 | 3, [7, 8], "write", send_shared_email=True 88 | ) 89 | m_client.files.put_shares_groups.assert_called_once_with( 90 | 117, [7, 8], "write", send_shared_email=False 91 | ) 92 | m_client.projects.put_shares_groups.assert_called_once_with( 93 | 31, [7, 8], "write", send_shared_email=False 94 | ) 95 | m_client.json_values.put_shares_groups.assert_called_once_with( 96 | 37, [7, 8], "write", send_shared_email=False 97 | ) 98 | 99 | 100 | def test_share_model_tworuns(): 101 | # Check that we grant permission on run outputs for each run 102 | m_client = _create_share_model_client_mock([11, 13]) 103 | 104 | helper._share_model(3, [7, 8], "write", "users", client=m_client) 105 | 106 | m_client.scripts.put_containers_shares_users.assert_called_once_with( 107 | 3, [7, 8], "write" 108 | ) 109 | 110 | assert m_client.files.put_shares_users.call_count == 2 111 | assert m_client.projects.put_shares_users.call_count == 2 112 | assert m_client.json_values.put_shares_users.call_count == 2 113 | 114 | 115 | def test_share_model_project_permissions(): 116 | # Grant "write" permission on the internal project when 117 | # overall "read" permission is requested. 118 | m_client = _create_share_model_client_mock([11]) 119 | 120 | helper._share_model(3, [7, 8], "read", "groups", client=m_client) 121 | 122 | m_client.projects.put_shares_groups.assert_called_once_with( 123 | 31, [7, 8], "write", send_shared_email=False 124 | ) 125 | 126 | 127 | def test_unshare_model_users(): 128 | m_cl = _create_share_model_client_mock([11]) 129 | 130 | resp = helper._unshare_model(3, 7, "users", client=m_cl) 131 | assert resp == "userdel" 132 | m_cl.scripts.delete_containers_shares_users.assert_called_once_with(3, 7) 133 | m_cl.files.delete_shares_users.assert_called_once_with(117, 7) 134 | m_cl.projects.delete_shares_users.assert_called_once_with(31, 7) 135 | m_cl.json_values.delete_shares_users.assert_called_once_with(37, 7) 136 | 137 | 138 | def test_unshare_model_groups(): 139 | m_cl = _create_share_model_client_mock([11]) 140 | 141 | resp = helper._unshare_model(3, 7, "groups", client=m_cl) 142 | assert resp == "groupdel" 143 | m_cl.scripts.delete_containers_shares_groups.assert_called_once_with(3, 7) 144 | m_cl.files.delete_shares_groups.assert_called_once_with(117, 7) 145 | m_cl.projects.delete_shares_groups.assert_called_once_with(31, 7) 146 | m_cl.json_values.delete_shares_groups.assert_called_once_with(37, 7) 147 | 148 | 149 | def test_unshare_model_tworuns(): 150 | # Check that we grant permission on run outputs for each run 151 | m_cl = _create_share_model_client_mock([11, 13]) 152 | 153 | helper._unshare_model(3, 7, "users", client=m_cl) 154 | 155 | m_cl.scripts.delete_containers_shares_users.assert_called_once_with(3, 7) 156 | 157 | assert m_cl.files.delete_shares_users.call_count == 2 158 | assert m_cl.projects.delete_shares_users.call_count == 2 159 | assert m_cl.json_values.delete_shares_users.call_count == 2 160 | 161 | 162 | @mock.patch("civis.ml._helper._share_model", autospec=True) 163 | def test_put_models_shares_groups(mock_share): 164 | mock_share.return_value = "retval" 165 | out = put_models_shares_groups(1, [7, 8], "read") 166 | 167 | assert out == "retval" 168 | mock_share.assert_called_once_with( 169 | 1, [7, 8], "read", entity_type="groups", client=None 170 | ) 171 | 172 | 173 | @mock.patch("civis.ml._helper._share_model", autospec=True) 174 | def test_put_models_shares_users(mock_share): 175 | mock_share.return_value = "retval" 176 | out = put_models_shares_users(1, [7, 8], "read") 177 | 178 | assert out == "retval" 179 | mock_share.assert_called_once_with( 180 | 1, [7, 8], "read", entity_type="users", client=None 181 | ) 182 | 183 | 184 | @mock.patch("civis.ml._helper._unshare_model", autospec=True) 185 | def test_delete_models_shares_groups(m_unshare): 186 | m_unshare.return_value = "retval" 187 | out = delete_models_shares_groups(1, 7) 188 | 189 | assert out == "retval" 190 | m_unshare.assert_called_once_with(1, 7, entity_type="groups", client=None) 191 | 192 | 193 | @mock.patch("civis.ml._helper._unshare_model", autospec=True) 194 | def test_delete_models_shares_users(m_unshare): 195 | m_unshare.return_value = "retval" 196 | out = delete_models_shares_users(1, 7) 197 | 198 | assert out == "retval" 199 | m_unshare.assert_called_once_with(1, 7, entity_type="users", client=None) 200 | -------------------------------------------------------------------------------- /tests/test_mocks.py: -------------------------------------------------------------------------------- 1 | """Tests for the test tooling""" 2 | 3 | import pytest 4 | 5 | from civis.tests import mocks 6 | 7 | 8 | def test_client_mock_attributeerror(): 9 | mock_client = mocks.create_client_mock() 10 | with pytest.raises(AttributeError): 11 | mock_client.not_an_endpoint() 12 | 13 | 14 | def test_client_mock_bad_parameter(): 15 | mock_client = mocks.create_client_mock() 16 | mock_client.tables.list(database_id=1) # Valid parameter 17 | with pytest.raises(TypeError): 18 | mock_client.tables.list(db_id=1) # Invalid parameter 19 | -------------------------------------------------------------------------------- /tests/test_polling.py: -------------------------------------------------------------------------------- 1 | """Test the `civis.polling` module""" 2 | 3 | import time 4 | from concurrent import futures 5 | import unittest 6 | from unittest import mock 7 | 8 | from civis.response import Response 9 | from civis.polling import PollableResult, _ResultPollingThread 10 | 11 | import pytest 12 | 13 | 14 | class State: 15 | def __init__(self, state): 16 | self.state = state 17 | 18 | 19 | def create_pollable_result(state, exception=None, result=None): 20 | f = PollableResult(State, (state,), polling_interval=0.001) 21 | f._exception = exception 22 | f._result = result 23 | return f 24 | 25 | 26 | CANCELLED_RESULT = create_pollable_result(state="cancelled") 27 | FINISHED_RESULT = create_pollable_result(state="success") 28 | QUEUED_RESULT = create_pollable_result(state="queued") 29 | 30 | 31 | class TestPolling(unittest.TestCase): 32 | def test_as_completed(self): 33 | my_futures = [QUEUED_RESULT, CANCELLED_RESULT, FINISHED_RESULT] 34 | fs = futures.as_completed(my_futures) 35 | f1 = next(fs) 36 | f2 = next(fs) 37 | finished_futures = set([f1, f2]) 38 | 39 | self.assertEqual(finished_futures, set([FINISHED_RESULT, CANCELLED_RESULT])) 40 | 41 | def test_wait(self): 42 | done, not_done = futures.wait( 43 | [QUEUED_RESULT, FINISHED_RESULT], return_when=futures.FIRST_COMPLETED 44 | ) 45 | self.assertEqual(set([FINISHED_RESULT]), done) 46 | self.assertEqual(set([QUEUED_RESULT]), not_done) 47 | 48 | def test_error_passthrough(self): 49 | pollable = PollableResult( 50 | mock.Mock(side_effect=[ZeroDivisionError()]), (), polling_interval=0.1 51 | ) 52 | pytest.raises(ZeroDivisionError, pollable.result) 53 | 54 | def test_error_setting(self): 55 | pollable = PollableResult( 56 | mock.Mock(side_effect=[ZeroDivisionError()]), (), polling_interval=0.1 57 | ) 58 | assert isinstance(pollable.exception(), ZeroDivisionError) 59 | 60 | def test_timeout(self): 61 | pollable = PollableResult( 62 | mock.Mock(return_value=Response({"state": "running"})), 63 | poller_args=(), 64 | polling_interval=0.1, 65 | ) 66 | pytest.raises(futures.TimeoutError, pollable.result, timeout=0.05) 67 | 68 | def test_poll_on_creation(self): 69 | poller = mock.Mock(return_value=Response({"state": "running"})) 70 | pollable = PollableResult( 71 | poller, (), polling_interval=0.01, poll_on_creation=False 72 | ) 73 | pollable.done() # Check status once to start the polling thread 74 | assert poller.call_count == 0 75 | time.sleep(0.05) 76 | assert poller.call_count > 0 77 | 78 | def test_poller_returns_none(self): 79 | check_result = mock.Mock( 80 | side_effect=[None, None, Response({"state": "success"})] 81 | ) 82 | pollable_result = mock.Mock() 83 | pollable_result._check_result = check_result 84 | pollable_result._next_polling_interval = 0.01 85 | polling_thread = _ResultPollingThread(pollable_result) 86 | polling_thread.run() 87 | assert check_result.call_count == 3 88 | 89 | def test_reset_polling_thread(self): 90 | pollable = PollableResult( 91 | mock.Mock(return_value=Response({"state": "running"})), 92 | poller_args=(), 93 | polling_interval=0.1, 94 | ) 95 | initial_polling_thread = pollable._polling_thread 96 | assert pollable.polling_interval == 0.1 97 | assert pollable._next_polling_interval == 0.1 98 | pollable._reset_polling_thread(0.2) 99 | # Check that the polling interval was updated 100 | assert pollable.polling_interval == 0.2 101 | assert pollable._next_polling_interval == 0.2 102 | # Check that the _polling_thread is a new thread 103 | assert pollable._polling_thread != initial_polling_thread 104 | # Check that the old thread was stopped 105 | assert not initial_polling_thread.is_alive() 106 | 107 | def test_geometric_polling(self): 108 | # To test polling, we make the poller function spit out a timestamp every time 109 | # it is called. Then we check if these timestamps are what we'd expect. 110 | poller_timestamps = [] 111 | 112 | def append_new_timestamp(*args, **kwargs): 113 | nonlocal poller_timestamps 114 | poller_timestamps.append(time.time()) 115 | if len(poller_timestamps) < 5: 116 | return Response({"state": "running"}) 117 | else: 118 | return Response({"state": "succeeded"}) 119 | 120 | poller = mock.Mock() 121 | poller.side_effect = append_new_timestamp 122 | 123 | pollable = PollableResult(poller, (), poll_on_creation=False) 124 | start_time = time.time() 125 | pollable.result() 126 | 127 | assert len(poller_timestamps) == 5 128 | expected_intervals = [1, 1.2, 1.44, 1.728, 2.0736] 129 | actual_intervals = [] 130 | for i, timestamp in enumerate(poller_timestamps): 131 | actual_intervals.append( 132 | timestamp - (poller_timestamps[i - 1] if i else start_time) 133 | ) 134 | assert actual_intervals == pytest.approx(expected_intervals, abs=0.02) 135 | 136 | 137 | if __name__ == "__main__": 138 | unittest.main() 139 | -------------------------------------------------------------------------------- /tests/test_run_joblib_func.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | 4 | def test_civis_joblib_worker_command_available(): 5 | command = "civis_joblib_worker" 6 | assert shutil.which(command), f"The `{command}` command is not available." 7 | -------------------------------------------------------------------------------- /tests/test_service_client.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import json 3 | from unittest import mock 4 | 5 | import requests 6 | 7 | from civis.base import CivisAPIError 8 | from civis.service_client import ( 9 | ServiceClient, 10 | ServiceEndpoint, 11 | _get_service, 12 | _parse_service_path, 13 | parse_service_api_spec, 14 | to_camelcase, 15 | ) 16 | import pytest 17 | 18 | MOCK_SERVICE_ID = 0 19 | 20 | MOCK_URL = "www.survey-url.com" 21 | 22 | 23 | @pytest.fixture 24 | def mock_swagger(): 25 | return { 26 | "info": {"title": "Test API Client", "version": "1.0"}, 27 | "paths": { 28 | "/some-resources": { 29 | "get": { 30 | "description": "", 31 | "responses": { 32 | "200": { 33 | "description": "Returns a list", 34 | } 35 | }, 36 | "summary": "List Resources", 37 | "tags": ["tag"], 38 | } 39 | }, 40 | "/some-resources/{id}": { 41 | "get": { 42 | "description": "", 43 | "responses": { 44 | "200": { 45 | "description": "Returns a Resource", 46 | } 47 | }, 48 | "summary": "Get Resources", 49 | "tags": ["tag"], 50 | }, 51 | "patch": { 52 | "description": "", 53 | "parameters": [ 54 | { 55 | "description": "The id of the Resource", 56 | "in": "path", 57 | "name": "id", 58 | "required": True, 59 | "type": "integer", 60 | }, 61 | { 62 | "description": "The fields and values to edit", 63 | "in": "body", 64 | "name": "body", 65 | "required": True, 66 | "schema": { 67 | "properties": { 68 | "field": { 69 | "description": "a property value", 70 | "type": "string", 71 | } 72 | } 73 | }, 74 | }, 75 | ], 76 | "responses": { 77 | "200": { 78 | "description": "Edits Resource", 79 | } 80 | }, 81 | "summary": "Patch Resources", 82 | "tags": ["tag"], 83 | }, 84 | }, 85 | }, 86 | "swagger": "2.0", 87 | } 88 | 89 | 90 | @pytest.fixture 91 | def mock_operations(mock_swagger): 92 | ops_json = mock_swagger["paths"]["/some-resources"] 93 | mock_ops_str = str(json.dumps(ops_json)) 94 | mock_operations = json.JSONDecoder(object_pairs_hook=OrderedDict).decode( 95 | mock_ops_str 96 | ) # noqa: E501 97 | return mock_operations 98 | 99 | 100 | @mock.patch("civis.service_client.ServiceClient.generate_classes_maybe_cached") 101 | @mock.patch("civis.service_client.ServiceClient.get_base_url") 102 | def test_service_client(url_mock, classes_mock): 103 | url_mock.return_value = MOCK_URL 104 | classes_mock.return_value = {} 105 | 106 | sc = ServiceClient(MOCK_SERVICE_ID) 107 | 108 | spec_endpoint = "/endpoints" 109 | 110 | assert sc._api_key is None 111 | assert sc._service_id == MOCK_SERVICE_ID 112 | assert sc._base_url == MOCK_URL 113 | assert sc._root_path is None 114 | assert sc._swagger_path == spec_endpoint 115 | 116 | # Custom root path 117 | sc = ServiceClient(MOCK_SERVICE_ID, root_path="/api") 118 | assert sc._root_path == "/api" 119 | 120 | # Custom Swagger path 121 | sc = ServiceClient(MOCK_SERVICE_ID, swagger_path="/spec") 122 | assert sc._swagger_path == "/spec" 123 | 124 | # Passed in API Key 125 | sc = ServiceClient(MOCK_SERVICE_ID, api_key="this_is_an_API_key") 126 | assert sc._api_key == "this_is_an_API_key" 127 | 128 | 129 | def test_service_endpoint(): 130 | service_client_mock = mock.Mock() 131 | se = ServiceEndpoint(service_client_mock) 132 | 133 | assert se._return_type == "civis" 134 | assert se._client == service_client_mock 135 | 136 | 137 | def test_parse_service_path(mock_operations): 138 | mock_path = "/some-resource/sub-resource/{id}" 139 | base_path, methods = _parse_service_path(mock_path, mock_operations) 140 | 141 | assert base_path == "some_resource" 142 | assert "get_sub_resource" in methods[0] 143 | 144 | mock_path = "/some-resource/{id}" 145 | base_path, methods = _parse_service_path(mock_path, mock_operations) 146 | 147 | assert base_path == "some_resource" 148 | assert "get" in methods[0] 149 | 150 | 151 | def test_parse_path__with_root(mock_operations): 152 | root_path = "/some-resource" 153 | 154 | mock_path = "/some-resource/sub-resource/{id}" 155 | base_path, methods = _parse_service_path( 156 | mock_path, mock_operations, root_path=root_path 157 | ) 158 | 159 | assert base_path == "sub_resource" 160 | assert "get" in methods[0] 161 | 162 | 163 | def test_parse_service_api_spec(mock_swagger): 164 | classes = parse_service_api_spec(mock_swagger) 165 | assert "some_resources" in classes 166 | 167 | 168 | @mock.patch("civis.service_client.requests.Session.get") 169 | @mock.patch("civis.service_client.auth_service_session") 170 | @mock.patch("civis.service_client.ServiceClient.generate_classes_maybe_cached") 171 | @mock.patch("civis.service_client.ServiceClient.get_base_url") 172 | def test_get_api_spec( 173 | url_mock, classes_mock, auth_session_mock, mock_response, mock_swagger 174 | ): 175 | mock_response.return_value = mock.Mock(ok=True) 176 | mock_response.return_value.json.return_value = mock_swagger 177 | 178 | url_mock.return_value = MOCK_URL 179 | classes_mock.return_value = {} 180 | 181 | sc = ServiceClient(MOCK_SERVICE_ID) 182 | 183 | spec = sc.get_api_spec() 184 | assert spec == mock_swagger 185 | 186 | 187 | @mock.patch("civis.service_client.parse_service_api_spec") 188 | @mock.patch("civis.service_client.ServiceClient.get_api_spec") 189 | @mock.patch("civis.service_client.ServiceClient.get_base_url") 190 | def test_generate_classes(url_mock, api_spec_mock, parse_mock, mock_swagger): 191 | api_spec_mock.return_value = {} 192 | 193 | def mock_class_function(client, return_type): 194 | return "return" 195 | 196 | parse_mock.return_value = {"class": mock_class_function} 197 | url_mock.return_value = MOCK_URL 198 | 199 | sc = ServiceClient(MOCK_SERVICE_ID, root_path="/foo") 200 | 201 | classes = sc.generate_classes() 202 | parse_mock.assert_called_once_with(api_spec_mock.return_value, root_path="/foo") 203 | 204 | assert "class" in classes 205 | 206 | 207 | @mock.patch("civis.service_client.parse_service_api_spec") 208 | @mock.patch("civis.service_client.ServiceClient.get_api_spec") 209 | @mock.patch("civis.service_client.ServiceClient.get_base_url") 210 | def test_generate_classes_maybe_cached( 211 | url_mock, api_spec_mock, parse_mock, mock_swagger 212 | ): 213 | api_spec_mock.return_value = {} 214 | 215 | def mock_class_function(client, return_type): 216 | return "return" 217 | 218 | parse_mock.return_value = {"class": mock_class_function} 219 | url_mock.return_value = MOCK_URL 220 | 221 | sc = ServiceClient(MOCK_SERVICE_ID, root_path="/foo") 222 | 223 | mock_spec_str = str(json.dumps(mock_swagger)) 224 | mock_spec = json.JSONDecoder(object_pairs_hook=OrderedDict).decode( 225 | mock_spec_str 226 | ) # noqa: E501 227 | classes = sc.generate_classes_maybe_cached(mock_spec) 228 | 229 | parse_mock.assert_has_calls( 230 | [ 231 | # the call from generate_classes_maybe_cached in ServiceClient.__init__ 232 | mock.call({}, root_path="/foo"), 233 | # the call from generate_classes_maybe_cached in this test 234 | mock.call(mock_spec, root_path="/foo"), 235 | ] 236 | ) 237 | 238 | assert "class" in classes 239 | 240 | 241 | @mock.patch("civis.service_client.ServiceClient.generate_classes_maybe_cached") 242 | @mock.patch("civis.service_client._get_service") 243 | def test_get_base_url(get_service_mock, classes_mock): 244 | get_service_mock.return_value = {"current_url": MOCK_URL} 245 | classes_mock.return_value = {} 246 | 247 | sc = ServiceClient(MOCK_SERVICE_ID) 248 | 249 | assert sc._base_url == MOCK_URL 250 | get_service_mock.assert_called_once_with(sc) 251 | 252 | 253 | @mock.patch("civis.service_client.ServiceClient.generate_classes_maybe_cached") 254 | @mock.patch("civis.service_client.APIClient") 255 | def test_get_service(mock_client, classes_mock): 256 | classes_mock.return_value = {} 257 | sc = ServiceClient(MOCK_SERVICE_ID) 258 | expected_service = {"current_url": MOCK_URL} 259 | mock_client.return_value.services.get.return_value = expected_service 260 | service = _get_service(sc) 261 | assert service == expected_service 262 | 263 | 264 | @mock.patch("civis.service_client.ServiceClient.generate_classes_maybe_cached") 265 | @mock.patch("civis.service_client.APIClient") 266 | def test_get_service__not_found(mock_client, classes_mock): 267 | classes_mock.return_value = {} 268 | sc = ServiceClient(MOCK_SERVICE_ID) 269 | err_resp = requests.Response() 270 | err_resp._content = json.dumps( 271 | { 272 | "status_code": 404, 273 | "error": "not_found", 274 | "errorDescription": "The requested resource could not be found.", 275 | "content": True, 276 | } 277 | ).encode() 278 | 279 | mock_client.return_value.services.get.side_effect = CivisAPIError(err_resp) 280 | 281 | with pytest.raises(CivisAPIError) as excinfo: 282 | _get_service(sc) 283 | 284 | expected_error = "The requested resource could not be found." 285 | assert str(excinfo.value) == expected_error 286 | 287 | 288 | def test_build_path(): 289 | service_client_mock = mock.Mock(_base_url="www.service_url.com", _root_path=None) 290 | se = ServiceEndpoint(service_client_mock) 291 | path = se._build_path("/resources") 292 | 293 | assert path == "www.service_url.com/resources" 294 | 295 | 296 | def test_build_path__with_root(): 297 | service_client_mock = mock.Mock(_base_url="www.service_url.com", _root_path="/api") 298 | se = ServiceEndpoint(service_client_mock) 299 | path = se._build_path("/resources") 300 | 301 | assert path == "www.service_url.com/api/resources" 302 | 303 | 304 | @mock.patch("civis.service_client.requests.Session.request") 305 | @mock.patch("civis.service_client.auth_service_session") 306 | def test_make_request(auth_mock, request_mock): 307 | service_client_mock = mock.Mock(_base_url="www.service_url.com") 308 | se = ServiceEndpoint(service_client_mock) 309 | 310 | expected_value = [ 311 | {"id": 1, "url": "www.survey_url.com/1"}, 312 | {"id": 2, "url": "www.survey_url.com/2"}, 313 | ] 314 | 315 | request_mock.return_value = mock.Mock(ok=True) 316 | request_mock.return_value.json = expected_value 317 | 318 | response = se._make_request("get", "resources/resources") 319 | 320 | assert response.json == expected_value 321 | 322 | 323 | def test_tocamlecase(): 324 | test_cases = [ 325 | ("snake_case", "SnakeCase"), 326 | ("Snake_Case", "SnakeCase"), 327 | ("snakecase", "Snakecase"), 328 | ] 329 | for in_word, out_word in test_cases: 330 | assert to_camelcase(in_word) == out_word 331 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from datetime import datetime 3 | from math import floor 4 | from unittest import mock 5 | 6 | import tenacity 7 | from requests import Request 8 | from requests import ConnectionError 9 | 10 | from civis._utils import retry_request, DEFAULT_RETRYING 11 | from civis._utils import _RETRY_VERBS, _RETRY_CODES, _POST_RETRY_CODES 12 | 13 | 14 | def _get_retrying(retries: int): 15 | retrying = copy.copy(DEFAULT_RETRYING) 16 | stop = tenacity.stop_after_delay(600) | tenacity.stop_after_attempt(retries) 17 | retrying.stop = stop 18 | return retrying 19 | 20 | 21 | def test_no_retry_on_success(): 22 | expected_call_count = 0 23 | api_response = {"key": "value"} 24 | mock_session = mock.MagicMock() 25 | session_context = mock_session.return_value.__enter__.return_value 26 | session_context.send.return_value.json.return_value = api_response 27 | 28 | for verb in _RETRY_VERBS: 29 | expected_call_count += 1 30 | session_context.send.return_value.status_code = 200 31 | 32 | request_info = dict( 33 | params={"secondParameter": "b", "firstParameter": "a"}, 34 | json={}, 35 | url="https://api.civisanalytics.com/wobble/wubble", 36 | method=verb, 37 | ) 38 | request = Request(**request_info) 39 | pre_request = session_context.prepare_request(request) 40 | retry_request(verb, pre_request, session_context, _get_retrying(3)) 41 | 42 | assert session_context.send.call_count == expected_call_count 43 | 44 | 45 | def test_no_retry_on_get_no_retry_failure(): 46 | expected_call_count = 0 47 | max_calls = 3 48 | api_response = {"key": "value"} 49 | mock_session = mock.MagicMock() 50 | session_context = mock_session.return_value.__enter__.return_value 51 | session_context.send.return_value.json.return_value = api_response 52 | 53 | for verb in _RETRY_VERBS: 54 | expected_call_count += 1 55 | session_context.send.return_value.status_code = 403 56 | 57 | request_info = dict( 58 | params={"secondParameter": "b", "firstParameter": "a"}, 59 | json={}, 60 | url="https://api.civisanalytics.com/wobble/wubble", 61 | method=verb, 62 | ) 63 | request = Request(**request_info) 64 | pre_request = session_context.prepare_request(request) 65 | retry_request(verb, pre_request, session_context, _get_retrying(max_calls)) 66 | 67 | assert session_context.send.call_count == expected_call_count 68 | 69 | 70 | @mock.patch("civis.futures.time.sleep", side_effect=lambda x: None) 71 | def test_retry_on_retry_eligible_failures(m_sleep): 72 | expected_call_count = 0 73 | max_calls = 3 74 | api_response = {"key": "value"} 75 | mock_session = mock.MagicMock() 76 | session_context = mock_session.return_value.__enter__.return_value 77 | session_context.send.return_value.json.return_value = api_response 78 | for verb in _RETRY_VERBS: 79 | for code in _RETRY_CODES: 80 | expected_call_count += max_calls 81 | session_context.send.return_value.status_code = code 82 | 83 | request_info = dict( 84 | params={"secondParameter": "b", "firstParameter": "a"}, 85 | json={}, 86 | url="https://api.civisanalytics.com/wobble/wubble", 87 | method=verb, 88 | ) 89 | 90 | request = Request(**request_info) 91 | pre_request = session_context.prepare_request(request) 92 | retry_request(verb, pre_request, session_context, _get_retrying(max_calls)) 93 | 94 | assert session_context.send.call_count == expected_call_count 95 | 96 | 97 | @mock.patch("civis.futures.time.sleep", side_effect=lambda x: None) 98 | def test_retry_on_retry_eligible_failures_lowercase_verbs(m_sleep): 99 | expected_call_count = 0 100 | max_calls = 3 101 | api_response = {"key": "value"} 102 | mock_session = mock.MagicMock() 103 | session_context = mock_session.return_value.__enter__.return_value 104 | session_context.send.return_value.json.return_value = api_response 105 | for verb in _RETRY_VERBS: 106 | for code in _RETRY_CODES: 107 | expected_call_count += max_calls 108 | session_context.send.return_value.status_code = code 109 | 110 | request_info = dict( 111 | params={"secondParameter": "b", "firstParameter": "a"}, 112 | json={}, 113 | url="https://api.civisanalytics.com/wobble/wubble", 114 | method=verb.lower(), 115 | ) 116 | 117 | request = Request(**request_info) 118 | pre_request = session_context.prepare_request(request) 119 | retry_request(verb, pre_request, session_context, _get_retrying(max_calls)) 120 | 121 | assert session_context.send.call_count == expected_call_count 122 | 123 | 124 | def test_no_retry_on_post_success(): 125 | expected_call_count = 1 126 | max_calls = 3 127 | api_response = {"key": "value"} 128 | mock_session = mock.MagicMock() 129 | session_context = mock_session.return_value.__enter__.return_value 130 | session_context.send.return_value.json.return_value = api_response 131 | 132 | session_context.send.return_value.status_code = 200 133 | 134 | request_info = dict( 135 | params={"secondParameter": "b", "firstParameter": "a"}, 136 | json={}, 137 | url="https://api.civisanalytics.com/wobble/wubble", 138 | method="POST", 139 | ) 140 | request = Request(**request_info) 141 | pre_request = session_context.prepare_request(request) 142 | retry_request("post", pre_request, session_context, _get_retrying(max_calls)) 143 | 144 | assert session_context.send.call_count == expected_call_count 145 | 146 | 147 | @mock.patch("civis.futures.time.sleep", side_effect=lambda x: None) 148 | def test_retry_on_retry_eligible_post_failures(m_sleep): 149 | expected_call_count = 0 150 | max_calls = 3 151 | api_response = {"key": "value"} 152 | mock_session = mock.MagicMock() 153 | session_context = mock_session.return_value.__enter__.return_value 154 | session_context.send.return_value.json.return_value = api_response 155 | 156 | for code in _POST_RETRY_CODES: 157 | expected_call_count += max_calls 158 | session_context.send.return_value.status_code = code 159 | 160 | request_info = dict( 161 | params={"secondParameter": "b", "firstParameter": "a"}, 162 | json={}, 163 | url="https://api.civisanalytics.com/wobble/wubble", 164 | method="POST", 165 | ) 166 | request = Request(**request_info) 167 | pre_request = session_context.prepare_request(request) 168 | retry_request("post", pre_request, session_context, _get_retrying(max_calls)) 169 | 170 | assert session_context.send.call_count == expected_call_count 171 | 172 | 173 | def test_no_retry_on_connection_error(): 174 | expected_call_count = 0 175 | api_response = {"key": "value"} 176 | mock_session = mock.MagicMock() 177 | session_context = mock_session.return_value.__enter__.return_value 178 | session_context.send.return_value.json.return_value = api_response 179 | 180 | for verb in _RETRY_VERBS: 181 | expected_call_count += 1 182 | 183 | request_info = dict( 184 | params={"secondParameter": "b", "firstParameter": "a"}, 185 | json={}, 186 | url="https://api.civisanalytics.com/wobble/wubble", 187 | method=verb, 188 | ) 189 | request = Request(**request_info) 190 | pre_request = session_context.prepare_request(request) 191 | 192 | session_context.send.side_effect = ConnectionError() 193 | try: 194 | retry_request(verb, pre_request, session_context, _get_retrying(3)) 195 | except ConnectionError: 196 | pass 197 | 198 | assert session_context.send.call_count == expected_call_count 199 | 200 | 201 | def test_retry_respect_retry_after_headers(): 202 | expected_call_count = 0 203 | max_calls = 2 204 | retry_after = 1 205 | api_response = {"key": "value"} 206 | mock_session = mock.MagicMock() 207 | session_context = mock_session.return_value.__enter__.return_value 208 | session_context.send.return_value.json.return_value = api_response 209 | 210 | session_context.send.return_value.status_code = 429 211 | session_context.send.return_value.headers = {"Retry-After": str(retry_after)} 212 | 213 | for verb in [ 214 | "HEAD", 215 | "TRACE", 216 | "GET", 217 | "PUT", 218 | "OPTIONS", 219 | "DELETE", 220 | "POST", 221 | "head", 222 | "trace", 223 | "get", 224 | "put", 225 | "options", 226 | "delete", 227 | "post", 228 | ]: 229 | expected_call_count += max_calls 230 | 231 | request_info = dict( 232 | params={"secondParameter": "b", "firstParameter": "a"}, 233 | json={}, 234 | url="https://api.civisanalytics.com/wobble/wubble", 235 | method=verb, 236 | ) 237 | 238 | request = Request(**request_info) 239 | pre_request = session_context.prepare_request(request) 240 | 241 | start_time = datetime.now().timestamp() 242 | retry_request(verb, pre_request, session_context, _get_retrying(max_calls)) 243 | end_time = datetime.now().timestamp() 244 | duration = end_time - start_time 245 | 246 | assert session_context.send.call_count == expected_call_count 247 | assert floor(duration) == retry_after * (max_calls - 1) 248 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import civis 5 | 6 | 7 | _REPO_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 8 | 9 | 10 | def test_version_number_match_with_changelog(): 11 | """__version__ and CHANGELOG.md match for the latest version number.""" 12 | changelog = open(os.path.join(_REPO_DIR, "CHANGELOG.md")).read() 13 | version_in_changelog = re.search(r"##\s+(\d+\.\d+\.\d+)", changelog).groups()[0] 14 | assert civis.__version__ == version_in_changelog, ( 15 | "Make sure both __version__ and CHANGELOG are updated to match the " 16 | "latest version number" 17 | ) 18 | -------------------------------------------------------------------------------- /tests/test_workflows/test_validate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import zipfile 4 | 5 | import requests 6 | import pytest 7 | 8 | from civis.workflows import validate_workflow_yaml, WorkflowValidationError 9 | 10 | 11 | _VALID_WORKFLOW_YAML = """ 12 | version: "2.0" 13 | workflow: 14 | tasks: 15 | task_1: 16 | action: civis.scripts.container 17 | input: 18 | required_resources: 19 | cpu: 1024 20 | memory: 1024 21 | disk_space: 10 22 | docker_image_name: civisanalytics/datascience-python 23 | docker_command: echo 'hello world' 24 | """ 25 | 26 | 27 | def test_valid_workflow_yaml(): 28 | validate_workflow_yaml(_VALID_WORKFLOW_YAML) 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "replacee, replacer, error_message_contains", 33 | [ 34 | ('version: "2.0"', "", "'version' is a required property"), 35 | ("tasks:", "foobar:", "'tasks' is a required property"), 36 | ( 37 | "civis.scripts.container", 38 | "civis.script.container", 39 | "'civis.script.container' is not one of", 40 | ), 41 | (" action:", " foo: bar\n action:", "'foo' was unexpected"), 42 | ( 43 | " input:", 44 | " on-success:\n - task_1\n input:", 45 | "A task cannot transition to itself", 46 | ), 47 | ( 48 | " input:", 49 | " on-success:\n - undefined_task\n input:", 50 | "undefined task", 51 | ), 52 | ("hello world", "hëlló wòrld", "cannot contain non-ASCII characters"), 53 | ], 54 | ) 55 | def test_invalid_workflow_yaml(replacee, replacer, error_message_contains): 56 | """Break a valid workflow yaml, which should raise a WorkflowValidationError.""" 57 | if replacee not in _VALID_WORKFLOW_YAML: 58 | raise ValueError(f"{replacee!r} not in the workflow yaml to be tested") 59 | invalid_wf_yaml = _VALID_WORKFLOW_YAML.replace(replacee, replacer) 60 | with pytest.raises(WorkflowValidationError, match=error_message_contains): 61 | validate_workflow_yaml(invalid_wf_yaml) 62 | 63 | 64 | def test_workflows_public_repo(): 65 | """All example workflow YAML files from the workflows-public repo should pass.""" 66 | wf_repo_zip_url = ( 67 | "https://github.com/civisanalytics/workflows-public/archive/refs/heads/main.zip" 68 | ) 69 | 70 | with tempfile.TemporaryDirectory() as temp_dir: 71 | wf_repo_zip_path = os.path.join(temp_dir, "workflows-public.zip") 72 | wf_repo_unzip_dir = os.path.join(temp_dir, "workflows-public") 73 | 74 | # Download the workflows-public repo as a zip file. 75 | with requests.get(wf_repo_zip_url, stream=True) as r: 76 | with open(wf_repo_zip_path, "wb") as f: 77 | for chunk in r.iter_content(chunk_size=1024): 78 | f.write(chunk) 79 | assert os.path.isfile(wf_repo_zip_path), "no file found" 80 | 81 | # Unzip repo content. 82 | with zipfile.ZipFile(wf_repo_zip_path) as zfile: 83 | zfile.extractall(wf_repo_unzip_dir) 84 | 85 | wf_repo_dir = os.path.join(wf_repo_unzip_dir, "workflows-public-main") 86 | yaml_filenames = [f for f in os.listdir(wf_repo_dir) if f.endswith(".yml")] 87 | assert yaml_filenames, f"no yaml files: {os.listdir(wf_repo_dir)}" 88 | 89 | # Validate each workflow yaml in the repo. 90 | for filename in yaml_filenames: 91 | wf_yaml_path = os.path.join(wf_repo_dir, filename) 92 | 93 | try: 94 | with open(wf_yaml_path) as f: 95 | validate_workflow_yaml(f.read()) 96 | except WorkflowValidationError as e: 97 | print("Failed workflow yaml:", filename) 98 | raise e 99 | -------------------------------------------------------------------------------- /tools/check_if_civis_api_spec_has_updated.py: -------------------------------------------------------------------------------- 1 | """This script checks if the Civis API spec has been updated. 2 | 3 | This script is set up to run in a scheduled Civis Platform job (see internal docs). 4 | If updates are detected, the job fails and notifies civis-python maintainers. 5 | """ 6 | 7 | import os 8 | import pprint 9 | import sys 10 | import tempfile 11 | 12 | from civis.resources import API_SPEC_PATH 13 | from civis.resources._api_spec import download_latest_api_spec, compare_api_specs 14 | 15 | 16 | def main(): 17 | if len(sys.argv) > 1: 18 | api_spec_path_current = sys.argv[1] # for testing this script 19 | else: 20 | api_spec_path_current = API_SPEC_PATH 21 | print("Current API spec path:", api_spec_path_current) 22 | with tempfile.TemporaryDirectory() as tempdir: 23 | latest_api_spec_path = os.path.join(tempdir, "civis_api_spec.json") 24 | download_latest_api_spec(latest_api_spec_path) 25 | added, removed, changed = compare_api_specs( 26 | api_spec_path_current, latest_api_spec_path 27 | ) 28 | if any(any(diffs.values()) for diffs in (added, removed, changed)): 29 | raise RuntimeError( 30 | "The Civis API spec has been updated. " 31 | "Please run tools/update_civis_api_spec.py.\n----------------\n" 32 | f"Added:\n{pprint.pformat(added)}\n----------------\n" 33 | f"Removed:\n{pprint.pformat(removed)}\n----------------\n" 34 | f"Changed:\n{pprint.pformat(changed)}" 35 | ) 36 | else: 37 | print("The Civis API spec hasn't been updated.") 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /tools/smoke_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | This script tests end-to-end functionality using the Civis Python client. 5 | It uses the live Civis API and Redshift, so a valid CIVIS_API_KEY is needed. 6 | 7 | This is based on a similar script for the R client: 8 | https://github.com/civisanalytics/civis-r/blob/master/tools/integration_tests/smoke_test.R 9 | """ 10 | 11 | import io 12 | import logging 13 | import time 14 | 15 | import civis 16 | import pandas as pd 17 | from sklearn.datasets import load_iris 18 | 19 | 20 | def main(): 21 | logging.basicConfig(format="", level=logging.INFO) 22 | 23 | logger = logging.getLogger("civis") 24 | 25 | t0 = time.time() 26 | 27 | database = "redshift-general" 28 | client = civis.APIClient() 29 | 30 | # Test read_civis and read_civis_sql produce the same results. 31 | # The table used here has an explicit index column to sort by in case the 32 | # rows come back in a different order. 33 | logger.info("Testing reading from redshift...") 34 | sql = "SELECT * FROM datascience.iris" 35 | df1 = civis.io.read_civis_sql( 36 | sql=sql, database=database, use_pandas=True, client=client 37 | ).set_index("index") 38 | df2 = civis.io.read_civis( 39 | table="datascience.iris", database=database, use_pandas=True, client=client 40 | ).set_index("index") 41 | assert df1.shape == (150, 5) 42 | # check_like=True since the order in which rows are retrieved may vary. 43 | pd.testing.assert_frame_equal(df1, df2, check_like=True) 44 | 45 | # Test uploading data. 46 | logger.info("Testing uploading to redshift...") 47 | table = "scratch.smoke_test_{}".format(int(time.time())) 48 | iris = load_iris() 49 | df_iris1 = ( 50 | pd.DataFrame(iris.data) 51 | .rename(columns={0: "c0", 1: "c1", 2: "c2", 3: "c3"}) 52 | .join(pd.DataFrame(iris.target).rename(columns={0: "label"})) 53 | .reset_index() 54 | ) 55 | try: 56 | civis.io.dataframe_to_civis(df_iris1, database, table, client=client).result() 57 | df_iris2 = civis.io.read_civis( 58 | table=table, database=database, use_pandas=True, client=client 59 | ) 60 | pd.testing.assert_frame_equal( 61 | df_iris1.sort_values(by="index").set_index("index"), 62 | df_iris2.sort_values(by="index").set_index("index"), 63 | ) 64 | finally: 65 | civis.io.query_civis( 66 | "DROP TABLE IF EXISTS %s" % table, database=database, client=client 67 | ) 68 | 69 | # Test uploading and downloading file. 70 | logger.info("Testing File uploading and downloading...") 71 | buf = io.BytesIO() 72 | csv_bytes1 = df_iris1.to_csv(index=False).encode("utf-8") 73 | buf.write(csv_bytes1) 74 | buf.seek(0) 75 | file_id = civis.io.file_to_civis(buf, name="civis-python test file", client=client) 76 | buf.seek(0) 77 | civis.io.civis_to_file(file_id, buf, client=client) 78 | buf.seek(0) 79 | csv_bytes2 = buf.read() 80 | assert csv_bytes1 == csv_bytes2, "File upload/download did not match." 81 | 82 | # Test modeling. 83 | logger.info("Testing Civis-ML...") 84 | for civisml_version in (None, "v2.2"): # None = latest production version 85 | logger.info("CivisML version: %r", civisml_version) 86 | mp = civis.ml.ModelPipeline( 87 | model_name="[civis-python smoke test; do not count this as CivisML usage]", 88 | model="sparse_logistic", 89 | dependent_variable="type", 90 | primary_key="index", 91 | client=client, 92 | civisml_version=civisml_version, 93 | ) 94 | result = mp.train( 95 | table_name="datascience.iris", database_name=database 96 | ).result() 97 | assert result["state"] == "succeeded" 98 | 99 | logger.info("%.1f seconds elapsed in total.", time.time() - t0) 100 | 101 | 102 | if __name__ == "__main__": 103 | main() 104 | -------------------------------------------------------------------------------- /tools/update_civis_api_spec.py: -------------------------------------------------------------------------------- 1 | """This script downloads and updates the Civis API spec. 2 | 3 | * `civis_api_spec.json` contains information about the publicly available 4 | API endpoints. This spec is used in both testing and generating 5 | the public Sphinx docs. 6 | * `client.pyi` is a Python stub file that provides type hints for a 7 | `civis.APIClient` object. This file matches the API spec in `civis_api_spec.json`. 8 | """ 9 | 10 | from civis.resources import API_SPEC_PATH 11 | from civis.resources._client_pyi import generate_client_pyi, CLIENT_PYI_PATH 12 | from civis.resources._api_spec import download_latest_api_spec 13 | 14 | 15 | if __name__ == "__main__": 16 | download_latest_api_spec(API_SPEC_PATH) 17 | print("downloaded civis_api_spec.json") 18 | 19 | # If we update civis_api_spec.json, 20 | # then client.pyi must also be updated to match it. 21 | generate_client_pyi(CLIENT_PYI_PATH, API_SPEC_PATH) 22 | print("updated client.pyi") 23 | --------------------------------------------------------------------------------