├── .github └── workflows │ └── test-and-deploy.yml ├── .gitignore ├── DEPLOY.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── architecture_aws.png ├── architecture_prediction_network.png ├── aws_setup_guide.md ├── azure_setup_guide.md └── gcp_setup_guide.md ├── numerai ├── __init__.py ├── cli │ ├── __init__.py │ ├── constants.py │ ├── destroy_all.py │ ├── doctor.py │ ├── misc.py │ ├── node │ │ ├── __init__.py │ │ ├── config.py │ │ ├── deploy.py │ │ ├── destroy.py │ │ └── test.py │ ├── setup.py │ ├── uninstall.py │ ├── upgrade.py │ └── util │ │ ├── __init__.py │ │ ├── debug.py │ │ ├── docker.py │ │ ├── files.py │ │ ├── keys.py │ │ └── terraform.py ├── examples │ ├── crypto-python3 │ │ ├── Dockerfile │ │ ├── predict.py │ │ └── requirements.txt │ ├── signals-python3 │ │ ├── Dockerfile │ │ ├── predict.py │ │ ├── requirements.txt │ │ └── train.py │ ├── tournament-python3 │ │ ├── Dockerfile │ │ ├── predict.py │ │ ├── requirements.txt │ │ └── train.py │ └── tournament-rlang │ │ ├── Dockerfile │ │ ├── install_packages.R │ │ └── main.R └── terraform │ ├── aws │ ├── -inputs.tf │ ├── -main.tf │ ├── -outputs.tf │ └── aws │ │ ├── -inputs.tf │ │ ├── -locals.tf │ │ ├── -outputs.tf │ │ ├── cluster.tf │ │ ├── main.zip │ │ ├── network.tf │ │ └── webhook.tf │ ├── azure │ ├── azure │ │ ├── cluster.tf │ │ ├── input.tf │ │ ├── locals.tf │ │ ├── outputs.tf │ │ └── webhook.tf │ ├── azure_cron_trigger.zip │ ├── azure_trigger.zip │ ├── input.tf │ ├── main.tf │ ├── outputs.tf │ └── registry.tf │ └── gcp │ ├── -inputs.tf │ ├── -main.tf │ ├── -outputs.tf │ ├── cloud-function.zip │ ├── gcp │ ├── -inputs.tf │ ├── -locals.tf │ ├── -outputs.tf │ ├── cluster.tf │ ├── webhook.tf │ └── workflow-source.yaml │ ├── registry.tf │ └── services.tf ├── scripts ├── setup-mac.sh ├── setup-ubu.sh └── setup-win10.ps1 └── setup.py /.github/workflows/test-and-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Test & Deploy Numerai CLI 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | 7 | concurrency: deploy-numerai-cli-${{ github.ref }} 8 | 9 | jobs: 10 | build-python36: 11 | name: Build Python 3.6 12 | runs-on: ubuntu-20.04 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: actions/setup-python@v4 16 | with: 17 | python-version: "3.6.15" 18 | - run: pip install . 19 | - run: numerai copy-example 20 | - run: test -e tournament-python3/predict.py 21 | 22 | build-python37: 23 | name: Build Python 3.7 24 | runs-on: ubuntu-20.04 25 | steps: 26 | - uses: actions/checkout@v2 27 | - uses: actions/setup-python@v4 28 | with: 29 | python-version: "3.7.9" 30 | - run: pip install . 31 | - run: numerai copy-example 32 | - run: test -e tournament-python3/predict.py 33 | 34 | build-docker: 35 | name: Build Docker 36 | runs-on: ubuntu-20.04 37 | steps: 38 | - uses: actions/checkout@v2 39 | - run: | 40 | cd numerai/examples/tournament-python3 \ 41 | && docker build -t example-python \ 42 | --build-arg NUMERAI_PUBLIC_ID=test \ 43 | --build-arg NUMERAI_SECRET_KEY=test . 44 | 45 | pypi-release: 46 | name: PyPI Release 47 | runs-on: ubuntu-20.04 48 | if: github.ref == 'refs/heads/master' 49 | needs: [build-python36, build-python37, build-docker] 50 | steps: 51 | - uses: actions/checkout@v2 52 | - uses: actions/setup-python@v4 53 | with: 54 | python-version: "3.7.9" 55 | - run: pip install twine wheel 56 | - run: python setup.py sdist bdist_wheel 57 | - run: | 58 | python -m twine upload \ 59 | -u __token__ \ 60 | -p ${{ secrets.PYPI_API_KEY }} \ 61 | --skip-existing dist/* 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # User-specific 107 | .numerai* 108 | */node_modules 109 | .terraform* 110 | terraform.tfstate* 111 | *.tfplan 112 | .vscode 113 | .prettierrc.json 114 | 115 | .idea/ 116 | .DS_STORE 117 | -------------------------------------------------------------------------------- /DEPLOY.md: -------------------------------------------------------------------------------- 1 | # Deployment Instructions 2 | Create a .pypirc file in your home directory like so: 3 | ``` 4 | ▶ cat ~/.pypirc 5 | [distutils] 6 | index-servers = 7 | pypi 8 | 9 | [pypi] 10 | repository: https://upload.pypi.org/legacy/ 11 | username: REPLACE_ME 12 | password: *** 13 | ``` 14 | 15 | Install twine: 16 | ``` 17 | pip3 install twine 18 | ``` 19 | 20 | Build the dist packages: 21 | ``` 22 | rm -rf dist 23 | python3 setup.py sdist bdist_wheel 24 | ``` 25 | 26 | Deploy: 27 | ``` 28 | twine upload dist/* 29 | ``` 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Numerai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include numerai * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # numerai-cli 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/numerai-cli.svg?color=brightgreen)](https://pypi.org/project/numerai-cli/) 4 | 5 | Welcome to the Numerai CLI for the [Numerai Tournament](https://docs.numer.ai/tournament/learn). 6 | Numerai CLI (aka Compute Heavy) is a tool to help you set up cloud automation for your Numerai predictions. 7 | This README is designed to have EVERYTHING you need to setup and maintain a Numerai Compute Heavy node. 8 | 9 | This CLI runs on your local computer to configure a Numerai Prediction Node in the cloud. 10 | This solution is architected to cost less than $5/mo on average, but actual costs may vary. 11 | It has been tested on MacOS/OSX, Windows 8/10, and Ubuntu 18/20, 12 | but should theoretically work anywhere that Docker and Python 3 are available. 13 | 14 | ## Contents 15 | 16 | - [Getting Started](#getting-started) 17 | - [List of Commands](#list-of-commands) 18 | - [Upgrading](#upgrading) 19 | - [Upgrading from 0.1/0.2 to 0.3.0](#upgrading-from-0102-to-030) 20 | - [Beyond](#beyond) 21 | - [Uninstalling](#uninstalling) 22 | - [Troubleshooting and Feedback](#troubleshooting-and-feedback) 23 | - [Python](#python) 24 | - [Docker](#docker) 25 | - [MacOS and Windows 10](#macos-and-windows-10) 26 | - [Linux](#linux) 27 | - [Older PCs: Docker Toolbox](#older-pcs-docker-toolbox) 28 | - [Azure](#azure) 29 | - [Common Errors](#common-errors) 30 | - [Billing Alerts](#billing-alerts) 31 | - [Prediction Node Architecture](#prediction-node-architecture) 32 | - [Python Example](#python-example) 33 | - [RLang Example](#rlang-example) 34 | - [The Dockerfile](#the-dockerfile) 35 | - [Cloud Components](#cloud-components) 36 | - [Special Thanks](#special-thanks) 37 | 38 | ## Getting Started 39 | 40 | To use this tool you need: 41 | 42 | - Numerai API keys 43 | - Paid cloud provider account 44 | - Python 45 | - Docker 46 | 47 | 1. Sign up a Numerai Account, get your Numerai API Keys, and your first Model: 48 | 49 | 1. Sign up at and log in to your new account 50 | 2. Go to > "Your API keys" section > click "Add" 51 | 3. Name your key and check all boxes under "Scope this key will have" 52 | 4. Enter your password and click "Confirm" 53 | 5. Copy your secret public key and secret key somewhere safe 54 | 55 | 2. Choose your cloud provider and follow the corresponding guide: 56 | 57 | - [AWS Setup Guide](./docs/aws_setup_guide.md) 58 | - [Azure Setup Guide](./docs/azure_setup_guide.md) 59 | - [GCP Setup Guide](./docs/gcp_setup_guide.md) 60 | 61 | 3. Install Docker and Python for your Operating System (if you encounter errors or your 62 | OS is not supported, please see [Troubleshooting and Feedback](#troubleshooting-and-feedback)): 63 | 64 | - Mac Terminal (cmd + space, type `terminal`, select `terminal.app`): 65 | 66 | ```shell 67 | curl https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-mac.sh | bash 68 | ``` 69 | 70 | - Ubuntu 18/20 Terminal (ctrl + alt + t): 71 | 72 | ```shell 73 | sudo apt update && sudo apt install -y libcurl4 curl && sudo curl https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-ubu.sh | sudo bash 74 | ``` 75 | 76 | - Windows 10 Command Prompt (windows key, type `cmd`, select Command Prompt): 77 | 78 | ```powershell 79 | powershell -command "$Script = Invoke-WebRequest 'https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-win10.ps1'; $ScriptBlock = [ScriptBlock]::Create($Script.Content); Invoke-Command criptBlock $ScriptBlock" 80 | ``` 81 | 82 | 4. After the setup script confirms Python and Docker, install `numerai-cli` via: 83 | 84 | ```shell 85 | pip3 install --upgrade numerai-cli --user 86 | ``` 87 | 88 | NOTES: 89 | 90 | - This command will also work to update to new versions of the package in the future. 91 | - If you are using python venv then drop the --user option. 92 | If you don't know what that is, disregard this note. 93 | 94 | 5. Run these commands on your personal computer (not a temporary instance) 95 | to get an example node running in minutes: 96 | 97 | For AWS run: 98 | 99 | ```shell 100 | numerai setup --provider aws 101 | ``` 102 | 103 | and enter your user's keys when prompted. 104 | 105 | For Azure users run: 106 | 107 | ```shell 108 | numerai setup --provider azure 109 | ``` 110 | 111 | and enter your application's keys when prompted. 112 | 113 | For GCP users run: 114 | 115 | ```shell 116 | numerai setup --provider gcp 117 | ``` 118 | 119 | and enter the complete path to your downloaded key when prompted. 120 | 121 | ```shell 122 | numerai node config --example tournament-python3 123 | numerai node deploy 124 | numerai node test 125 | ``` 126 | 127 | If you want to use larger instances to generate your predictions first run `numerai list-constants` 128 | to list the vCPU/mem presets available, then you can configure a node to use one of the presets via: 129 | 130 | ```shell 131 | numerai node config mem-lg 132 | ``` 133 | 134 | Your compute node is now setup and ready to run! When you make changes to your code or re-train your model, 135 | simply deploy and test your node again: 136 | 137 | ```shell 138 | numerai node deploy 139 | numerai node test 140 | ``` 141 | 142 | NOTES: 143 | 144 | - These commands have stored configuration files in `$USER_HOME/.numerai/`. DO NOT LOSE THIS FILE! 145 | or else you will have to manually delete every cloud resource by hand. 146 | - The example node trains a model in the cloud, which is bad. You should train locally, pickle the 147 | trained model, deploy your node, then unpickle your model to do the live predictions 148 | - The default example does _not_ make stake changes; please refer to the [numerapi docs](https://numerapi.readthedocs.io/en/latest/api/numerapi.html#module-numerapi.numerapi) 149 | for the methods you must call to do this. 150 | - You can view resources and logs in the AWS Console (region us-east-1) for your 151 | [ECS Cluster](https://console.aws.amazon.com/ecs/home?region=us-east-1#/clusters/numeraiubmission-ecs-cluster/tasks) 152 | and [other resources](https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups) 153 | 154 | ### List of Commands 155 | 156 | Use the `--help` option on any command or sub-command to get a full description of it: 157 | 158 | ```shell 159 | numerai 160 | numerai --help 161 | numerai [command] --help 162 | numerai [command] [sub-command] --help 163 | ``` 164 | 165 | Each command or sub-command takes its own options, for example if you want to copy the 166 | numerai signals example and configure a node for a signals model with large memory 167 | requirements you'd use something like this (replacing [MODEL NAME] with the relevant 168 | signals model name): 169 | 170 | ```shell 171 | numerai node -m [MODEL NAME] -t 11 config -s mem-lg -e signals-python3 172 | ``` 173 | 174 | Here, the `node` command takes a model name with `-m` and an argument `-t 11` to specify the tournament number 175 | (numerai is tournament 8, signals is tournament 11, crypto is tournament 12). 176 | The `config` sub-command also takes a `-s` option to specify the size of the node to configure. 177 | 178 | ### Upgrading 179 | 180 | Upgrading numerai-cli will always require you to update the package itself using pip: 181 | 182 | ```shell 183 | pip install --upgrade numerai-cli --user 184 | numerai upgrade 185 | ``` 186 | 187 | #### 0.1/0.2 to 0.3 188 | 189 | CLI 0.3 uses a new configuration format that is incompatible with versions 0.1 and 0.2, 190 | but a command to migrate you configuration is provided for you. Run this in the directory 191 | you ran `numerai setup` from the previous version: 192 | 193 | ```shell 194 | numerai upgrade 195 | ``` 196 | 197 | #### 0.3 to 0.4 198 | 199 | CLI 0.4 introduces a new provider option (Microsoft Azure) and moves the default aws 200 | terraform into a subdirectory. You'll need to run `upgrade`: 201 | 202 | ```shell 203 | numerai upgrade 204 | ``` 205 | 206 | If you want to use azure, follow the [setup guide for azure](./docs/azure_setup_guide.md) 207 | then run: 208 | 209 | ```shell 210 | numerai setup --provider azure 211 | numerai node config --provider azure 212 | ``` 213 | 214 | #### 0.4 to 1.0 215 | 216 | CLI 1.0 is a big change for AWS, you'll need to: 217 | 218 | - update the permissions for the role you created in the [AWS Setup Guide](./docs/aws_setup_guide.md) 219 | - update the terraform with `numerai setup` 220 | - replace your aws nodes by running: 221 | - `numerai node -m destroy --preserve-node-config` 222 | - `numerai node -m config` 223 | - `numerai node -m deploy` 224 | 225 | #### Beyond 226 | 227 | Some updates will make changes to configuration files used by Numerai CLI. These will 228 | require you to re-run some commands to upgrade your nodes to the newest versions: 229 | 230 | - `numerai setup` will copy over changes to files in the `$HOME/.numerai` directory 231 | - `numerai node config` will apply those changes to a node 232 | 233 | ### Uninstalling 234 | 235 | ```shell 236 | numerai uninstall 237 | ``` 238 | 239 | ## Troubleshooting and Feedback 240 | 241 | Please review this entire section and check github issues before asking questions. 242 | If you've exhausted this document, then join us on Discord 243 | 244 | If you still cannot find a solution or answer, please join us on 245 | [Discord](https://discord.gg/numerai) 246 | and include the following information with your issue/message: 247 | 248 | - The commands you ran that caused the error (even previous commands) 249 | - Version information from running: 250 | - `pip3 show numerai-cli` 251 | - `python -V` 252 | - `docker -v` 253 | - System Information from running 254 | - Mac: `system_profiler SPSoftwareDataType && system_profiler SPHardwareDataType` 255 | - Linux: `lsb_release -a && uname -a` 256 | - Windows: `powershell -command "Get-ComputerInfo"` 257 | 258 | ### Python 259 | 260 | If the environment setup script fails to install Python3 for you, report the error to Numerai 261 | and then install it manually with one of the following options: 262 | 263 | - [Download Python3 directly](https://www.python.org/downloads/) 264 | - install it from [your system's package manager](https://en.wikipedia.org/wiki/List_of_software_package_management_systems) 265 | - Use [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to install and manage python for you 266 | 267 | ### Docker 268 | 269 | If the environment setup script fails to install Docker for you, report the error to Numerai 270 | then read the following to get a working installation on your machine. For PCs, you may need to [activate virtualization in your BIOS](https://superuser.com/questions/1382472/how-do-i-find-and-enable-the-virtualization-setting-on-windows-10) before installing. 271 | 272 | #### MacOS and Windows 10 273 | 274 | Just install [Docker Desktop](https://www.docker.com/products/docker-desktop) to get it running. 275 | You should also increase the RAM allocated to the VM: 276 | 277 | 1. Open the Docker Desktop GUI then 278 | 2. Click Gear in top right corner 279 | 3. Select Resources > Advanced in left sidebar 280 | 4. Use slider to allocate more memory (leave a few gigs for your OS and background applications, otherwise your computer might crash) 281 | 282 | #### Linux 283 | 284 | Check here for instructions for your distribution: 285 | 286 | 287 | #### Older PCs: Docker Toolbox 288 | 289 | If your machine is older and/or doesn't have Hyper-V enabled, then you will have to follow these steps to install docker toolbox on your machine: 290 | 291 | 1. [Install Oracle VirtualBox](https://www.virtualbox.org/wiki/Downloads) for your Operating System 292 | 2. Restart your computer 293 | 3. [Install Docker Toolbox](https://github.com/docker/toolbox/releases) 294 | 4. Restart your computer 295 | 5. After it's installed, open the "Docker QuickStart Terminal" and run the following to increase its RAM: 296 | 297 | ```shell 298 | docker-machine rm default 299 | docker-machine create -d virtualbox --virtualbox-cpu-count=2 --virtualbox-memory=4096 --virtualbox-disk-size=50000 default 300 | docker-machine restart default 301 | ``` 302 | 303 | NOTE: your code must live somewhere under your User directory (ie. C:\Users\USER_NAME\ANY_FOLDER). This is a restriction of docker toolbox not sharing paths correctly otherwise. 304 | 305 | ### Azure 306 | 307 | If you just made your Azure account there's a chance your account provisioning could take some time, potentially up to 24 hours. 308 | 309 | When configuring your node for the first time the numerai-cli may hang as it tries to provision infrastructure in your account. If running `numerai node config --provider azure` shows no log output for more than 5 minutes, your account is likely in the stuck provisioning state. While we investigate this issue, the best course of action is to wait until the following day and run the command again as there is no way to skip this Azure provisioning step. 310 | 311 | ### Common Errors 312 | 313 | ```shell 314 | Error: 315 | subprocess.CalledProcessError: Command 'docker run --rm -it -v /home/jason/tmp/.numerai:/opt/plan -w /opt/plan hashicorp/terraform:light init' returned non-zero exit status 127. 316 | 317 | 318 | Reason: 319 | Docker is not installed. 320 | 321 | Solutions: 322 | If you're certain that docker is installed, make sure that your user can execute docker, ie. try to run `docker ps`. 323 | If that's the issue, then depending on your system, you can do the following: 324 | 325 | - Windows/OSX 326 | - Make sure the Docker Desktop is running and finished booting up. 327 | It can take a few minutes to be completely ready. 328 | When clicking on the docker tray icon, it should say "Docker Desktop is Running". 329 | - If you're using Docker Toolbox on Windows, then make sure you've opened the "Docker Quickstart Terminal". 330 | 331 | - Linux 332 | - Run `sudo usermod -aG docker $USER` 333 | - Then reboot or logout/login for this to take effect. 334 | ``` 335 | 336 | ```shell 337 | Error: 338 | docker: Error response from daemon: Drive has not been shared 339 | 340 | Solutions: 341 | - You need to [share your drive](https://docs.docker.com/docker-for-windows/#shared-drives). 342 | ``` 343 | 344 | ```shell 345 | Error: 346 | numerai: command not found 347 | 348 | Solutions: 349 | - osx/linux: Try and run `~/.local/bin/numerai` 350 | - Windows: `%LOCALAPPDATA%\Programs\Python\Python37-32\Scripts\numerai.exe` 351 | - Alternatively, exit then re-open your terminal/command prompt. 352 | ``` 353 | 354 | ```shell 355 | Error: 356 | error calling sts:GetCallerIdentity: InvalidClientTokenId: The security token included in the request is invalid. 357 | ... 358 | Command 'docker run -e "AWS_ACCESS_KEY_ID=..." -e "AWS_SECRET_ACCESS_KEY=..." --rm -it -v /home/jason/tmp/.numerai:/opt/plan -w /opt/planhashicorp/terraform:light apply -auto-approve' returned non-zero exit status 1. 359 | 360 | Solutions: 361 | - Run `numerai configure` to re-write your API Keys 362 | ``` 363 | 364 | ```shell 365 | Error: 366 | ERROR numerapi.base_api: Error received from your webhook server: {"tasks":[],"failures":[{"reason":"The requested CPU configuration is above your limit"}]} 367 | 368 | Solution: 369 | 1. Go to the [Quota Dashboard for EC2](console.aws.amazon.com/servicequotas/home/services/ec2/quotas) 370 | 2. Search for "On-Demand", this will list all instance types and their limits for your account. 371 | 3. Click the bubble next to "Running On-Demand Standard (A, C, D, H, I, M, R, T, Z) instances" 372 | 4. click "Request quota increase" in the top right 373 | 5. Input a higher value than the currently applied quota and finish the request 374 | ``` 375 | 376 | - If after AWS increases your quota, you still get this error, try again 1-2 more times 377 | - You may have to complete a quota request for other types of instances too depending on how resource intensive your setup is 378 | 379 | ## Billing Alerts 380 | 381 | There's no automated way to setup billing alerts, so you'll need to 382 | [configure one manually](https://www.cloudberrylab.com/resources/blog/how-to-configure-billing-alerts-for-your-aws-account/). 383 | We estimate costs to be less than $5 per month unless your compute takes more than 12 hours a week, 384 | but increasing the RAM/CPU will increase your costs. 385 | 386 | ## Prediction Node Architecture 387 | 388 | A Prediction Node represents a cloud-based model that Numerai can trigger for predictions; it is designed to be reliable, resource efficient, and easy to configure and debug. Prediction Nodes use a few important components like a `Dockerfile`, a `Trigger`, a `Container`, and a `Compute Cluster`, all of which can be created using one of the following examples. 389 | 390 | ### Python Example 391 | 392 | ```shell 393 | numerai-python3 394 | ├── Dockerfile 395 | ├── .dockerignore 396 | ├── predict.py 397 | ├── requirements.txt 398 | └── train.py 399 | ``` 400 | 401 | - `Dockerfile`: Used during `numerai node deploy` to build a Docker image that's used to run your code in the cloud. It copies all files in its directory, installs Python requirements for requirements.txt, and runs `python predict.py` by default. 402 | 403 | - `.dockerignore`: This file uses regex to match files that should not be included in the Docker image. 404 | 405 | - `train.py`: This is an extra entry point specifically for training, it's used when running `numerai node test --local --command "python train.py"` 406 | 407 | - `requirements.txt`: Defines python packages required to run the code. 408 | - `predict.py`: Gets run by default locally and in the cloud when running `numerai test` without the `--command|-c` option. 409 | 410 | ### RLang Example 411 | 412 | ```shell 413 | numerai-rlang 414 | ├── Dockerfile 415 | ├── .dockerignore 416 | ├── install_packages.R 417 | └── main.R 418 | ``` 419 | 420 | - `Dockerfile`: Used during `numerai node deploy` to build a Docker image that's used to run your code in the cloud. It copies all files in its directory, installs Rlang requirements from install_packages.R, and runs main.R by default. 421 | 422 | - `.dockerignore`: This file uses regex to match files that should not be included in the Docker image. 423 | 424 | - `install_packages.R`: Installs dependencies necessary for running the example. 425 | - `main.R`: Ran by default locally and in the cloud and when running `numerai test` without the `--command|-c` option. 426 | 427 | ### The Dockerfile 428 | 429 | This is the most important component of deploying a Prediction Node. It is a program (much like a bash script), that packages up your code as an `image`; this image contains everything your code needs to run in the cloud. The most typical case of a Dockerfile is demonstrated in [Numerai Examples](./numerai/examples/), if you're not sure how to use a Dockerfile, first copy an example with `numerai copy-example`, then read the documentation in the Dockerfile to learn the basics. 430 | 431 | These files are very flexible, the default Dockerfile will just copy everything in whatever directory it is in, but this can be customized if you'd like to share code between models. For example, if you have a python project setup like so: 432 | 433 | ```shell 434 | numerai_models 435 | ├── common 436 | ├──── __init__.py 437 | ├──── data.py 438 | ├──── setup.py 439 | ├── model_1 440 | ├──── Dockerfile 441 | ├──── .dockerignore 442 | ├──── predict.py 443 | ├──── requirements.txt 444 | └──── train.py 445 | ``` 446 | 447 | Where `common` is an installable python package you want to use in multiple models, you can add this line to model_1/Dockerfile: `RUN pip install ../common/`. Finally, run `numerai node deploy` from the `numerai_models` directory to install the package in the image, making it available to your model code. 448 | 449 | If you want to learn more about how to customize this file [checkout the Dockerfile reference] (). 450 | 451 | ### Cloud Components 452 | 453 | The CLI uses [Terraform](https://www.terraform.io/) to provision cloud resources. Each component and the related cloud resource(s) are listed below. The links will take you to the AWS console where you can monitor any of these resources for a given node; just visit the link and select the resource with the same name as the node you want to monitor (further directions are given for each resource below). 454 | 455 | - `Trigger`: A small function that schedules a "task" on your `Compute Cluster`. This "task" handles pulling the image that was created by the `Dockerfile` and running it as a `Container` on your `Compute Cluster`. This is handled by two resources: 456 | 457 | - **[API Gateway](https://console.aws.amazon.com/apigateway/main/apis)**: 458 | Hosts the webhook (HTTP endpoint) that Numerai calls to trigger your nodes. 459 | After clicking the link and selecting the resource, use the left sidebar to access metrics and logging. 460 | - **[Lambda](https://console.aws.amazon.com/lambda/home#/functions)**: 461 | Schedules your compute job when you call your Webhook URL. 462 | After clicking the link and selecting the resource, use the "Monitor" tab below the "Function Overview" section. 463 | 464 | - `Container`: The thing that actually contains and runs your code on a computer provisioned by the `Compute Cluster`. The `--size` (or `-s`) flag on the `numerai node config` sets the CPU and Memory limits for a `Container`. This is stored in one place: 465 | 466 | - **[ECR (Elastic Container Repository)](https://console.aws.amazon.com/ecr/repositories)**: 467 | Used for storing docker images. This is the location to which `numerai docker deploy` will push your image. 468 | There is not much monitoring here, but you can view your images and when they were uploaded. 469 | 470 | - `Compute Cluster`: A handler that accepts scheduled "tasks" and spins up and down computers to run `Containers`. This is handled by ECS: 471 | - **[ECS (Elastic Container Service)](https://console.aws.amazon.com/ecs/home#/clusters)**: 472 | This is where your containers will actually run and where you'll want to look if your containers don't seem to be scheduled/running. 473 | After clicking the link, you'll be able to scroll and monitor the top-level metrics of each cluster. 474 | After selecting a specific cluster, you can use the various tabs to view different components of the cluster (tasks are the runnable jobs 475 | that the Lambda schedules, instances are the computers the tasks run on, and metrics will show cluster-wide information) 476 | 477 | ## Special Thanks 478 | 479 | - Thanks to [uuazed](https://github.com/uuazed) for their work on [numerapi](https://github.com/uuazed/numerapi) 480 | - Thanks to [hellno](https://github.com/hellno) for starting the Signals [ticker map](https://github.com/hellno/numerai-signals-tickermap) 481 | - Thanks to tit_BTCQASH ([numerai profile](https://numer.ai/tit_btcqash) and [twitter profile](https://twitter.com/tit_BTCQASH)) for debugging the environment setup process on Windows 8 482 | - Thanks to [eses-wk](https://github.com/eses-wk) for implementing Azure support 483 | -------------------------------------------------------------------------------- /docs/architecture_aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/docs/architecture_aws.png -------------------------------------------------------------------------------- /docs/architecture_prediction_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/docs/architecture_prediction_network.png -------------------------------------------------------------------------------- /docs/aws_setup_guide.md: -------------------------------------------------------------------------------- 1 | # AWS Setup Guide 2 | 3 | Numerai CLI will create several resources in your AWS account to deploy your model. Follow the steps below to configure 4 | your AWS account for use with the Numerai CLI. 5 | 6 | If upgrading from numerai-cli 0.x, please review [Upgrading from numerai-cli v0.x](#upgrading-from-numerai-cli-v0x). 7 | 8 | 1. Create an [Amazon Web Services (AWS) Account](https://portal.aws.amazon.com/billing/signup) 9 | 2. Make sure you are signed in to the [AWS Console](console.aws.amazon.com) 10 | 3. Set up [AWS Billing](https://console.aws.amazon.com/billing/home?#/paymentmethods) 11 | 4. Create a [new IAM Policy](https://console.aws.amazon.com/iam/home?region=us-east-1#/policies$new?step=edit): 12 | 5. Select the "JSON" tab and overwrite the existing values with the following policy document: 13 | 14 | ```json 15 | 16 | { 17 | "Version": "2012-10-17", 18 | "Statement": [ 19 | { 20 | "Sid": "VisualEditor0", 21 | "Effect": "Allow", 22 | "Action": [ 23 | "logs:*", 24 | "s3:List*", 25 | "ecs:*", 26 | "lambda:*", 27 | "ecr:*", 28 | "ec2:*", 29 | "iam:*", 30 | "events:*", 31 | "batch:*" 32 | ], 33 | "Resource": "*" 34 | } 35 | ] 36 | } 37 | ``` 38 | 39 | NOTE: For experienced cloud users, it may seem unsafe to have `:*` next to resources. You may experiment with constricting these permissions at your own risk, but future versions of the CLI may not work if you do this. 40 | 41 | 6. Click "Next" at the bottom until you reach "Review Policy" 42 | 7. Name your policy (e.g. "compute-setup-policy") and remember this name, then hit "Create Policy" 43 | 8. Create a [new IAM User](https://us-east-1.console.aws.amazon.com/iamv2/home?region=us-east-1#/users/create) 44 | 9. Give your user a name (like "numerai-compute") and click "Next" 45 | 10. Click "Attach policies directly" and search for the policy you created in the previous steps. Click the checkbox next to your policy and click "Next" and then "Create User" 46 | 11. Click on the name of your user and then click the "Security credentials" tab. Scroll down to "Access keys", click "Create access key", click "Command Line Interface", check the confirmation box, and click "Next" and "Create access key". 47 | 12. Record the "Access key ID" and "Secret access key" from the final step. 48 | 49 | [Return to main guide](../README.md#getting-started) 50 | 51 | 52 | ## Upgrading from numerai-cli v0.x 53 | 54 | Additional permissions are required to upgrade from numerai-cli v0.x to numerai-cli v1.x. In numerai-cli v0.x, AWS models used API Gateway, which is no longer used in v1.x 55 | Numerai-cli will need permissions to remove API Gateway resources from your account in order to successfully update. Replace the policy above or the policy on your current IAM User with the below policy during your upgrade: 56 | 57 | ```json 58 | { 59 | "Version": "2012-10-17", 60 | "Statement": [ 61 | { 62 | "Sid": "VisualEditor0", 63 | "Effect": "Allow", 64 | "Action": [ 65 | "apigateway:*", 66 | "logs:*", 67 | "s3:List*", 68 | "ecs:*", 69 | "lambda:*", 70 | "ecr:*", 71 | "ec2:*", 72 | "iam:*", 73 | "events:*", 74 | "batch:*" 75 | ], 76 | "Resource": "*" 77 | } 78 | ] 79 | } 80 | ``` 81 | 82 | After your upgrade is complete, you can remove "apigateway:\*" from your policy if desire. 83 | 84 | -------------------------------------------------------------------------------- /docs/azure_setup_guide.md: -------------------------------------------------------------------------------- 1 | # Azure Setup Guide 2 | 3 | Numerai CLI will create several resources in your Azure account to deploy and run your models. 4 | Follow the steps below to give the Numerai CLI access to your Azure account. 5 | 6 | 1. Create an [Azure Account](https://signup.azure.com) 7 | 2. Make sure you are signed in to the [Azure Portal](https://portal.azure.com/) 8 | 3. Create an [Azure Subscription](https://portal.azure.com/#view/Microsoft_Azure_Billing/SubscriptionsBlade). Save your `Subscription ID` to use in the Numerai CLI later. 9 | 4. In `Azure Active Directory`, navigate to [App Registrations](https://portal.azure.com/#view/Microsoft_AAD_IAM/ActiveDirectoryMenuBlade/~/RegisteredApps) 10 | 5. Create a `New Registration` from the `App Registrations` blade. Give your application a name and leave the other options as default. 11 | 6. Creating a new registration will take you to your app's details page. If it doesn't, navigate back to your [App Registrations](https://portal.azure.com/#view/Microsoft_AAD_IAM/ActiveDirectoryMenuBlade/~/RegisteredApps) and select your application from the `All Applications` list. From the `Overview` page, save your application's `Application (client) ID` and `Directory (tenant) ID` for setting up Numerai CLI later. 12 | 7. Next we will create a new client secret. Navigate to `Certificates & secrets` from the menu on the left. Select `Client secrets` and click `Create new client secret` 13 | 8. Give your secret any name. Once it is created, copy the `value` of your secret to your clipboard and save this value for setting up Numerai CLI later. 14 | 9. We need to give your app access to your subscription. Navigate back to [Subscriptions](https://portal.azure.com/#view/Microsoft_Azure_Billing/SubscriptionsBlade) and select the subscription you created earlier. 15 | 10. Select `Access control (IAM)` from the menu on the left, click `+ Add`, and from the drop down menu select `Add role assignment`. Under `Role`, select `Privileged administrator roles`, and then `Owner`. 16 | 11. Next we will assign Owner permissions to your app. Select `Members` and then click `+ Select members`. In the search box, type the name of the app you created earlier and hit the return key. When your app appears, select your app and then click the `Select` button at the bottom of the screen. Finally, click `Review and assign`. 17 | 18 | [Return to main guide](../README.md#getting-started) 19 | -------------------------------------------------------------------------------- /docs/gcp_setup_guide.md: -------------------------------------------------------------------------------- 1 | # GCP Setup Guide 2 | 3 | Numerai CLI will create several resources in your GCP account to deploy your model. Follow the steps below to configure 4 | your GCP account for use with the Numerai CLI. 5 | 6 | 1. Create a [Google Cloud Account](https://cloud.google.com/gcp?hl=en) 7 | 2. Make sure you are signed in to the [Google Cloud Console](https://console.cloud.google.com) 8 | 3. Set up a [GCP Billing Account](https://console.cloud.google.com/billing/create) 9 | 4. If you don't already have a Google Cloud project you want to use, [create a new project](https://console.cloud.google.com/projectcreate) 10 | 5. Wait for the project to finish creating then ensure it's selected in the dropdown at the top of the console. 11 | 6. Ensure your billing account is assigned to your new project by checking [billing projects](https://console.cloud.google.com/billing/projects). 12 | 7. Next, [Create a service account](https://console.cloud.google.com/iam-admin/serviceaccounts/create) 13 | 8. Give your service account an ID and optionally a name and description, then click "Create and continue" to grant your service account access to this project. 14 | 9. Select "Basic" and then "Owner" to give your service account complete access to this project. Then select "Done". 15 | 10. You should now see your new service account in your list of service accounts. Click on your service account and then click the tab named "Keys" 16 | 11. Select "Add Key" and then click "Create a new key" in the drop down. A JSON file should be downloaded to your computer that contains this key content. 17 | 12. Record the complete path to the key you just downloaded for use in setting up the numerai-cli for GCP. (i.e. `/Users/john/Downloads/my-project-123456.json`) 18 | 19 | [Return to main guide](../README.md#getting-started) 20 | -------------------------------------------------------------------------------- /numerai/__init__.py: -------------------------------------------------------------------------------- 1 | from numerai.cli import main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /numerai/cli/__init__.py: -------------------------------------------------------------------------------- 1 | import click 2 | from colorama import init 3 | 4 | from numerai.cli import ( 5 | constants, 6 | destroy_all, 7 | doctor, 8 | node, 9 | setup, 10 | uninstall, 11 | upgrade, 12 | misc, 13 | ) 14 | from numerai.cli.util import debug, docker, files, keys 15 | 16 | 17 | @click.group() 18 | def numerai(): 19 | """ 20 | This tool helps you setup Numer.ai Prediction Nodes and deploy your models to them. 21 | """ 22 | pass 23 | 24 | 25 | def main(): 26 | if debug.is_win8(): 27 | init(wrap=False) 28 | else: 29 | init(autoreset=True) 30 | 31 | numerai.add_command(setup.setup) 32 | numerai.add_command(node.node) 33 | numerai.add_command(doctor.doctor) 34 | numerai.add_command(uninstall.uninstall) 35 | numerai.add_command(upgrade.upgrade) 36 | numerai.add_command(misc.copy_example) 37 | numerai.add_command(misc.list_constants) 38 | numerai.add_command(misc.add_volume_aws) 39 | numerai.add_command(destroy_all.destroy_all) 40 | numerai() 41 | -------------------------------------------------------------------------------- /numerai/cli/constants.py: -------------------------------------------------------------------------------- 1 | """Constants for Numerai CLI""" 2 | 3 | import os 4 | import json 5 | from pathlib import Path 6 | 7 | PACKAGE_PATH = os.path.dirname(__file__) 8 | CONFIG_PATH = os.path.join(str(Path.home()), ".numerai") 9 | KEYS_PATH = os.path.join(CONFIG_PATH, ".keys") 10 | GCP_KEYS_PATH = os.path.join(CONFIG_PATH, ".gcp_keys") 11 | NODES_PATH = os.path.join(CONFIG_PATH, "nodes.json") 12 | TERRAFORM_PATH = os.path.join(PACKAGE_PATH, "..", "terraform") 13 | EXAMPLE_PATH = os.path.join(PACKAGE_PATH, "..", "examples") 14 | 15 | EXAMPLES = os.listdir(EXAMPLE_PATH) 16 | 17 | PROVIDER_AWS = "aws" 18 | PROVIDER_AZURE = "azure" 19 | PROVIDER_GCP = "gcp" 20 | PROVIDERS = [PROVIDER_AWS, PROVIDER_AZURE, PROVIDER_GCP] 21 | 22 | LOG_TYPE_WEBHOOK = "webhook" 23 | LOG_TYPE_CLUSTER = "cluster" 24 | LOG_TYPES = [LOG_TYPE_WEBHOOK, LOG_TYPE_CLUSTER] 25 | 26 | SIZE_PRESETS = { 27 | # balanced cpu/mem 28 | "gen-xs": (512, 2048), 29 | "gen-sm": (1024, 4096), 30 | "gen-md": (2048, 8192), 31 | "gen-lg": (4096, 16384), 32 | "gen-xl": (8192, 30720), 33 | "gen-2xl": (16384, 61440), 34 | # cpu heavy 35 | "cpu-xs": (512, 1024), 36 | "cpu-sm": (1024, 2048), 37 | "cpu-md": (2048, 4096), 38 | "cpu-lg": (4096, 8192), 39 | "cpu-xl": (8192, 16384), 40 | "cpu-2xl": (16384, 30720), 41 | # mem heavy 42 | "mem-xs": (512, 4096), 43 | "mem-sm": (1024, 8192), 44 | "mem-md": (2048, 16384), 45 | "mem-lg": (4096, 30720), 46 | "mem-xl": (8192, 61440), 47 | "mem-2xl": (16384, 81920), 48 | "mem-3xl": (16384, 102400), 49 | "mem-4xl": (16384, 122880), 50 | } 51 | 52 | DEFAULT_EXAMPLE = "tournament-python3" 53 | DEFAULT_SIZE = "mem-md" 54 | DEFAULT_SIZE_GCP = "cpu-md" 55 | DEFAULT_PROVIDER = PROVIDER_AWS 56 | DEFAULT_PATH = os.getcwd() 57 | DEFAULT_TIMEOUT_MINUTES = 60 58 | DEFAULT_SETTINGS = { 59 | "provider": DEFAULT_PROVIDER, 60 | "cpu": SIZE_PRESETS[DEFAULT_SIZE][0], 61 | "memory": SIZE_PRESETS[DEFAULT_SIZE][1], 62 | "path": DEFAULT_PATH, 63 | "timeout_minutes": DEFAULT_TIMEOUT_MINUTES, 64 | } 65 | 66 | CONSTANTS_STR = f"""Default values (not your configured node values): 67 | ---Paths--- 68 | PACKAGE_PATH: {PACKAGE_PATH} 69 | CONFIG_PATH: {CONFIG_PATH} 70 | KEYS_PATH: {KEYS_PATH} 71 | NODES_PATH: {NODES_PATH} 72 | TERRAFORM_PATH: {TERRAFORM_PATH} 73 | EXAMPLE_PATH: {EXAMPLE_PATH} 74 | 75 | ---Cloud Interaction--- 76 | PROVIDERS: {PROVIDERS} 77 | LOG_TYPES: {LOG_TYPES} 78 | 79 | ---Prediction Nodes--- 80 | DEFAULT_EXAMPLE: {DEFAULT_EXAMPLE} 81 | DEFAULT_SIZE: {DEFAULT_SIZE} 82 | DEFAULT_PROVIDER: {DEFAULT_PROVIDER} 83 | DEFAULT_PATH: {DEFAULT_PATH} 84 | DEFAULT_SETTINGS: {json.dumps(DEFAULT_SETTINGS, indent=2)} 85 | """ 86 | -------------------------------------------------------------------------------- /numerai/cli/destroy_all.py: -------------------------------------------------------------------------------- 1 | """Destroy command for Numerai CLI""" 2 | 3 | import click 4 | from numerapi import base_api 5 | 6 | from numerai.cli.constants import * 7 | from numerai.cli.util.docker import terraform 8 | from numerai.cli.util.files import load_or_init_nodes, store_config, copy_file 9 | from numerai.cli.util.keys import get_provider_keys, get_numerai_keys 10 | 11 | 12 | @click.command("destroy-all", help="Destroy all nodes") 13 | @click.option("--verbose", "-v", is_flag=True) 14 | @click.option("--preserve-node-config", "-p", is_flag=True) 15 | @click.pass_context 16 | def destroy_all(ctx, verbose, preserve_node_config): 17 | """ 18 | Uses Terraform to destroy a Numerai Compute clusters for both Tournament and Signals. 19 | This will delete everything, including: 20 | - lambda url 21 | - docker container and associated task 22 | - all logs 23 | This command is idempotent and safe to run multiple times. 24 | """ 25 | if not os.path.exists(CONFIG_PATH): 26 | click.secho(".numerai directory not setup, run `numerai setup`...", fg="red") 27 | return 28 | 29 | if not click.prompt( 30 | "THIS WILL DELETE ALL YOUR NODES, ARE YOU SURE? (y/n)", 31 | ): 32 | exit(0) 33 | 34 | nodes_config = load_or_init_nodes() 35 | 36 | if len(nodes_config) == 0: 37 | click.secho("No nodes to destroy", fg="green") 38 | return 39 | 40 | try: 41 | provider_keys = { 42 | nodes_config[node]["provider"]: get_provider_keys(node) 43 | for node in nodes_config 44 | } 45 | except (KeyError, FileNotFoundError) as e: 46 | click.secho( 47 | f"make sure you run `numerai setup` and " f"`numerai node config` first...", 48 | fg="red", 49 | ) 50 | return 51 | 52 | click.secho("backing up nodes.json and deleting current config...") 53 | copy_file(NODES_PATH, f"{NODES_PATH}.backup", force=True, verbose=True) 54 | store_config(NODES_PATH, {}) 55 | 56 | try: 57 | click.secho(f"destroying nodes...") 58 | for provider, provider_keys in provider_keys.items(): 59 | click.secho(f"deleting cloud resources for {provider}...") 60 | terraform( 61 | "destroy -auto-approve", 62 | verbose, 63 | provider, 64 | env_vars=provider_keys, 65 | inputs={"node_config_file": "nodes.json"}, 66 | ) 67 | 68 | except Exception as e: 69 | click.secho(e.__str__(), fg="red") 70 | click.secho("restoring nodes.json...", fg="green") 71 | store_config(NODES_PATH, nodes_config) 72 | return 73 | 74 | napi = base_api.Api(*get_numerai_keys()) 75 | for node, node_config in nodes_config.items(): 76 | if "model_id" in node_config and "webhook_url" in node_config: 77 | model_id = node_config["model_id"] 78 | webhook_url = node_config["webhook_url"] 79 | click.echo(f"deregistering webhook {webhook_url} for model {model_id}...") 80 | napi.set_submission_webhook(model_id, None) 81 | 82 | click.secho("Prediction Nodes destroyed successfully", fg="green") 83 | 84 | if preserve_node_config: 85 | click.secho("restoring nodes.json...", fg="green") 86 | store_config(NODES_PATH, nodes_config) 87 | -------------------------------------------------------------------------------- /numerai/cli/doctor.py: -------------------------------------------------------------------------------- 1 | """Check and repair issues with environment""" 2 | import json 3 | import subprocess 4 | import sys 5 | from urllib import request 6 | import click 7 | 8 | from numerai.cli.util.files import load_or_init_nodes 9 | from numerai.cli.util.debug import is_win8, is_win10 10 | from numerai.cli.util.keys import ( 11 | check_aws_validity, 12 | check_numerai_validity, 13 | check_azure_validity, 14 | get_numerai_keys, 15 | get_aws_keys, 16 | get_azure_keys, 17 | ) 18 | 19 | 20 | @click.command() 21 | def doctor(): 22 | """ 23 | Checks and repairs your environment in case of errors. 24 | Attempts to provide information to debug your local machine. 25 | """ 26 | # Check environment pre-reqs 27 | click.secho("Running the environment setup script for your OS...") 28 | env_setup_cmd = None 29 | env_setup_status = -1 30 | env_setup_err = "" 31 | if sys.platform == "linux" or sys.platform == "linux2": 32 | env_setup_cmd = ( 33 | "sudo apt update && sudo apt install -y libcurl4 curl && " 34 | "sudo curl https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-ubu.sh " 35 | "| sudo bash" 36 | ) 37 | 38 | elif sys.platform == "darwin": 39 | env_setup_cmd = "curl https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-mac.sh | bash" 40 | 41 | elif is_win10(): 42 | env_setup_cmd = ( 43 | 'powershell -command "$Script = Invoke-WebRequest ' 44 | "'https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-win10.ps1'; " 45 | '$ScriptBlock = [ScriptBlock]::Create($Script.Content); Invoke-Command -ScriptBlock $ScriptBlock"' 46 | ) 47 | 48 | elif is_win8(): 49 | # TODO: check if more is needed? 50 | env_setup_cmd = "docker info" 51 | 52 | else: 53 | env_setup_status = 1 54 | env_setup_err = ( 55 | f"Unrecognized Operating System {sys.platform}, " 56 | f"cannot run environment setup script, skipping..." 57 | ) 58 | 59 | if env_setup_cmd is not None: 60 | res = subprocess.run( 61 | env_setup_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE 62 | ) 63 | env_setup_status = res.returncode 64 | env_setup_err = res.stderr 65 | 66 | # Check official (non-dev) version 67 | click.secho("Checking your numerai-cli version...") 68 | res = str( 69 | subprocess.run( 70 | "pip3 show numerai-cli", 71 | stdout=subprocess.PIPE, 72 | stderr=subprocess.PIPE, 73 | shell=True, 74 | ) 75 | ) 76 | curr_ver = [s for s in res.split("\\n") if "Version:" in s][0].split(": ")[1] 77 | url = "https://pypi.org/pypi/numerai-cli/json" 78 | versions = list( 79 | reversed( 80 | sorted( 81 | filter( 82 | lambda key: "dev" not in key, 83 | json.load(request.urlopen(url))["releases"].keys(), 84 | ) 85 | ) 86 | ) 87 | ) 88 | 89 | # Check keys 90 | click.secho("Checking your API keys...") 91 | nodes_config = load_or_init_nodes() 92 | used_providers = [nodes_config[n]["provider"] for n in nodes_config] 93 | 94 | invalid_providers = [] 95 | try: 96 | check_numerai_validity(*get_numerai_keys()) 97 | except: 98 | invalid_providers.append("numerai") 99 | if "aws" in used_providers: 100 | try: 101 | check_aws_validity(*get_aws_keys()) 102 | except: 103 | invalid_providers.append("aws") 104 | if "azure" in used_providers: 105 | try: 106 | check_azure_validity(*get_azure_keys()) 107 | except: 108 | invalid_providers.append("azure") 109 | 110 | if env_setup_status != 0: 111 | click.secho("Environment setup incomplete:", fg="red") 112 | click.secho(env_setup_err, fg="red") 113 | click.secho( 114 | "Ensure your OS is supported and read the Troubleshooting wiki: " 115 | "https://github.com/numerai/numerai-cli/wiki/Troubleshooting", 116 | fg="red", 117 | ) 118 | else: 119 | click.secho("Environment setup with Docker and Python", fg="green") 120 | 121 | if curr_ver < versions[0]: 122 | click.secho( 123 | "numerai-cli needs an upgrade" "(run `pip3 install -U numerai-cli` to fix)", 124 | fg="red", 125 | ) 126 | else: 127 | click.secho("numerai-cli is up to date", fg="green") 128 | 129 | if len(invalid_providers): 130 | click.secho( 131 | f"Invalid provider keys: {invalid_providers}" 132 | f"(run `numerai setup` to fix)", 133 | fg="red", 134 | ) 135 | 136 | else: 137 | click.secho("API Keys working", fg="green") 138 | 139 | click.secho( 140 | "\nIf you need help troubleshooting or want to report a bug please read the" 141 | "\nTroubleshooting and Feedback section of the readme:" 142 | "\nhttps://github.com/numerai/numerai-cli#troubleshooting-and-feedback", 143 | fg="yellow", 144 | ) 145 | -------------------------------------------------------------------------------- /numerai/cli/misc.py: -------------------------------------------------------------------------------- 1 | from numerai.cli.constants import * 2 | from numerai.cli.util import files 3 | from numerai.cli.util.terraform import apply_terraform 4 | 5 | import click 6 | 7 | 8 | @click.command() 9 | @click.option( 10 | "--example", 11 | "-e", 12 | type=click.Choice(EXAMPLES), 13 | default=DEFAULT_EXAMPLE, 14 | help=f"Specify the example to copy, defaults to {DEFAULT_EXAMPLE}. " 15 | f"Options are {EXAMPLES}.", 16 | ) 17 | @click.option( 18 | "--dest", 19 | "-d", 20 | type=str, 21 | help=f"Destination folder to which example code is written. " 22 | f"Defaults to the name of the example.", 23 | ) 24 | @click.option("--verbose", "-v", is_flag=True) 25 | def copy_example(example, dest, verbose): 26 | """ 27 | Copies all example files into the current directory. 28 | 29 | WARNING: this will overwrite the following files if they exist: 30 | 31 | - Python: Dockerfile, model.py, train.py, predict.py, and requirements.txt 32 | 33 | - RLang: Dockerfile, install_packages.R, main.R 34 | """ 35 | files.copy_example(example, dest, verbose) 36 | 37 | 38 | @click.command() 39 | def list_constants(): 40 | """ 41 | Display default and constant values used by the CLI. 42 | 43 | Does NOT show currently configured node values. 44 | """ 45 | click.secho(CONSTANTS_STR, fg="green") 46 | click.secho("SIZE_PRESETS:", fg="green") 47 | for size, preset in SIZE_PRESETS.items(): 48 | suffix = "(default)" if size == DEFAULT_SIZE else "" 49 | suffix = "(default - gcp)" if size == DEFAULT_SIZE_GCP else suffix 50 | click.secho( 51 | f" {size} -> cpus: {preset[0] / 1024}, " 52 | f"mem: {preset[1] / 1024} GB {suffix}", 53 | fg=( 54 | "green" 55 | if size == DEFAULT_SIZE or size == DEFAULT_SIZE_GCP 56 | else "yellow" 57 | ), 58 | ) 59 | click.secho( 60 | "Due to GCP Cloud Run size constraints, 'mem' sizes are not allowed when using GCP." 61 | ) 62 | click.secho( 63 | "For AWS, use one of these sizes, or specify your own CPU and Memory in cores and GB using --cpu and --memory options.\n" 64 | "See https://learn.microsoft.com/en-us/azure/container-apps/containers#configuration for Azure,\n" 65 | "or https://cloud.google.com/run/docs/configuring/services/memory-limits for GCP \n" 66 | "to learn more info about allowed size presets for those providers." 67 | ) 68 | 69 | 70 | @click.command() 71 | @click.option( 72 | "--size", 73 | "-s", 74 | type=int, 75 | required=True, 76 | help="Specify the volume size in GB you'd like your AWS nodes to share.", 77 | ) 78 | @click.option("--verbose", "-v", is_flag=True) 79 | def add_volume_aws(size, verbose): 80 | """ 81 | Set the volume size for AWS nodes. This volume is shared by all nodes. 82 | """ 83 | click.secho("Setting volume size for AWS nodes...", fg="yellow") 84 | # get nodes config object 85 | nodes_config = files.load_or_init_nodes() 86 | print(nodes_config) 87 | # set volume size for all nodes to same size 88 | for node in nodes_config: 89 | nodes_config[node]["volume"] = size 90 | files.store_config(NODES_PATH, nodes_config) 91 | files.copy_file( 92 | NODES_PATH, 93 | f"{CONFIG_PATH}/{PROVIDER_AWS}/", 94 | force=True, 95 | verbose=verbose, 96 | ) 97 | click.secho(f"Applying terraform to add {size} GB volume...", fg="yellow") 98 | apply_terraform(nodes_config, [PROVIDER_AWS], PROVIDER_AWS, verbose=verbose) 99 | click.secho("Volume size updated successfully!", fg="green") 100 | -------------------------------------------------------------------------------- /numerai/cli/node/__init__.py: -------------------------------------------------------------------------------- 1 | """Init for node""" 2 | 3 | import json 4 | import logging 5 | import click 6 | 7 | from numerapi import base_api 8 | 9 | from numerai.cli.constants import * 10 | from numerai.cli.node.config import config 11 | from numerai.cli.node.deploy import deploy 12 | from numerai.cli.node.destroy import destroy 13 | from numerai.cli.node.test import test, status 14 | from numerai.cli.util.keys import get_numerai_keys 15 | 16 | # Setting azure's logging level "ERROR" to avoid spamming the terminal 17 | 18 | 19 | def tournaments_dict(): 20 | napi = base_api.Api() 21 | tournaments = napi.raw_query('query { tournaments { name tournament } }') 22 | return {t["tournament"]: t["name"] for t in tournaments["data"]["tournaments"]} 23 | 24 | 25 | def get_models(tournament): 26 | napi = base_api.Api(*get_numerai_keys()) 27 | models = napi.get_models(tournament) 28 | name_prefix = tournaments_dict()[tournament] 29 | model_dict = {} 30 | for model_name, model_id in models.items(): 31 | model_dict[model_name] = { 32 | "id": model_id, 33 | "name": f"{name_prefix}-{model_name}", 34 | "tournament": tournament, 35 | } 36 | return model_dict 37 | 38 | 39 | @click.group() 40 | @click.option("--verbose", "-v", is_flag=True) 41 | @click.option( 42 | "--model-name", 43 | "-m", 44 | type=str, 45 | prompt=True, 46 | help="The name of one of your models to configure the Prediction Node for." 47 | " It defaults to the first model returned from your account.", 48 | ) 49 | @click.option( 50 | "--tournament", 51 | "-t", 52 | default=8, 53 | help="Target a specific tournament number." 54 | " Defaults to Numerai Tournament/Classic." 55 | f" Available tournaments: {json.dumps(tournaments_dict(), indent=2)}", 56 | ) 57 | @click.pass_context 58 | def node(ctx, verbose, model_name, tournament): 59 | """ 60 | Commands to manage and test Prediction Nodes. 61 | """ 62 | if not os.path.exists(CONFIG_PATH): 63 | click.secho( 64 | "cannot find .numerai config directory, " "run `numerai setup`", fg="red" 65 | ) 66 | exit(1) 67 | 68 | logger = logging.getLogger("azure") 69 | if verbose: 70 | logger.setLevel(logging.INFO) 71 | else: 72 | logger.setLevel(logging.ERROR) 73 | 74 | models = get_models(tournament) 75 | 76 | try: 77 | ctx.ensure_object(dict) 78 | ctx.obj["model"] = models[model_name] 79 | 80 | except KeyError: 81 | click.secho( 82 | f'Model with name "{model_name}" ' 83 | f"found in list of models:\n{json.dumps(models, indent=2)}" 84 | f'\n(use the "-s" flag for signals models)', 85 | fg="red", 86 | ) 87 | exit(1) 88 | 89 | 90 | node.add_command(config) 91 | node.add_command(deploy) 92 | node.add_command(destroy) 93 | node.add_command(test) 94 | node.add_command(status) 95 | -------------------------------------------------------------------------------- /numerai/cli/node/config.py: -------------------------------------------------------------------------------- 1 | """Config command for Numerai CLI""" 2 | 3 | import os 4 | 5 | from numerapi import base_api 6 | from numerai.cli.constants import ( 7 | DEFAULT_PROVIDER, 8 | DEFAULT_SIZE_GCP, 9 | PROVIDER_AWS, 10 | PROVIDER_AZURE, 11 | PROVIDERS, 12 | DEFAULT_SIZE, 13 | EXAMPLES, 14 | DEFAULT_SETTINGS, 15 | DEFAULT_PATH, 16 | SIZE_PRESETS, 17 | NODES_PATH, 18 | CONFIG_PATH, 19 | PROVIDER_GCP, 20 | ) 21 | from numerai.cli.util import docker 22 | from numerai.cli.util.files import ( 23 | load_or_init_nodes, 24 | store_config, 25 | copy_example, 26 | copy_file, 27 | ) 28 | from numerai.cli.util.keys import get_provider_keys, get_numerai_keys 29 | from numerai.cli.util.terraform import ( 30 | apply_terraform, 31 | create_azure_registry, 32 | create_gcp_registry, 33 | ) 34 | 35 | import click 36 | 37 | 38 | @click.command() 39 | @click.option("--verbose", "-v", is_flag=True) 40 | @click.option( 41 | "--provider", 42 | "-P", 43 | type=str, 44 | help=f"Select a cloud provider. One of {PROVIDERS}. " 45 | f"Defaults to {DEFAULT_PROVIDER}.", 46 | ) 47 | @click.option( 48 | "--size", 49 | "-s", 50 | type=str, 51 | help=f"CPU credits (cores * 1024) and Memory (in MiB) used in the deployed container. " 52 | f"Defaults to {DEFAULT_SIZE} (run `numerai list-constants` to see options).", 53 | ) 54 | @click.option( 55 | "--cpu", 56 | type=str, 57 | help=f"For AWS only, CPUs to allocate to your node" 58 | f"Defaults to 2 (run `numerai list-constants` to see options).", 59 | ) 60 | @click.option( 61 | "--memory", 62 | type=str, 63 | help=f"For AWS only, memory in GB to allocate to your node" 64 | f"Defaults to 16 (run `numerai list-constants` to see options).", 65 | ) 66 | @click.option( 67 | "--path", 68 | "-p", 69 | type=str, 70 | help=f"Target a file path. Defaults to current directory ({DEFAULT_PATH}).", 71 | ) 72 | @click.option( 73 | "--example", 74 | "-e", 75 | type=click.Choice(EXAMPLES), 76 | help=f"Specify an example to use for this node. Options are {EXAMPLES}.", 77 | ) 78 | @click.option( 79 | "--cron", 80 | "-c", 81 | type=str, 82 | help="A cron expression to trigger this node on a schedule " 83 | '(e.g. "30 18 ? * 7 *" to execute at 18:30 UTC every Saturday. ' 84 | '"0 30 13 * * SUN,TUE,WED,THU,FRI" to execute at 13:30 UTC every Sunday, Tuesday, Wednesday, Thursday and Friday). ' 85 | "This prevents your webhook from auto-registering. " 86 | "Check the AWS docs for more info about cron expressions: " 87 | "https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html" 88 | "Check the Azure docs for more info about cron expressions: " 89 | "https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-timer?tabs=python-v2%2Cin-process&pivots=programming-language-python#ncrontab-expressions", 90 | ) 91 | @click.option( 92 | "--timeout-minutes", 93 | type=str, 94 | help="Maximum time to allow this node to run when triggered. Defaults to 60 minutes. Valid for GCP only.", 95 | ) 96 | @click.option( 97 | "--register-webhook", 98 | "-r", 99 | is_flag=True, 100 | help="Forces your webhook to register with Numerai. " 101 | "Use in conjunction with options that prevent webhook auto-registering.", 102 | ) 103 | @click.pass_context 104 | def config( 105 | ctx, 106 | verbose, 107 | provider, 108 | size, 109 | cpu, 110 | memory, 111 | path, 112 | example, 113 | cron, 114 | timeout_minutes, 115 | register_webhook, 116 | ): 117 | """ 118 | Uses Terraform to create a full Numerai Compute cluster in your desired provider. 119 | Prompts for your cloud provider and Numerai API keys on first run, caches them in $HOME/.numerai. 120 | 121 | At the end of running, this will output a config file 'nodes.json'. 122 | """ 123 | ctx.ensure_object(dict) 124 | model = ctx.obj["model"] 125 | node = model["name"] 126 | model_id = model["id"] 127 | 128 | click.secho(f'Input provider "{provider}"...') 129 | click.secho(f'Input size "{size}"...') 130 | click.secho(f'Input node name "{node}"...') 131 | 132 | if example is not None: 133 | path = copy_example(example, path, verbose) 134 | 135 | # get nodes config object and set defaults for this node 136 | click.secho(f'configuring node "{node}"...') 137 | nodes_config = load_or_init_nodes() 138 | nodes_config.setdefault(node, {}) 139 | 140 | using_defaults = False 141 | if nodes_config[node] is None or nodes_config[node] == {}: 142 | using_defaults = True 143 | 144 | # Find any providers that will be affected by this config update 145 | affected_providers = [provider] 146 | 147 | if nodes_config[node] is not None and "provider" in nodes_config[node]: 148 | affected_providers.append(nodes_config[node]["provider"]) 149 | elif provider is None: 150 | affected_providers.append(DEFAULT_SETTINGS["provider"]) 151 | affected_providers = set(filter(None, affected_providers)) 152 | 153 | nodes_config[node].update( 154 | { 155 | key: default 156 | for key, default in DEFAULT_SETTINGS.items() 157 | if key not in nodes_config[node] 158 | } 159 | ) 160 | # update node as needed 161 | node_conf = nodes_config[node] 162 | 163 | if timeout_minutes: 164 | node_conf["timeout_minutes"] = timeout_minutes 165 | 166 | if provider: 167 | node_conf["provider"] = provider 168 | else: 169 | provider = node_conf["provider"] 170 | 171 | if timeout_minutes and provider == PROVIDER_AZURE: 172 | click.secho( 173 | "Timeout settings are unavailable for Azure and this input will be ignored.", 174 | fg="yellow", 175 | ) 176 | elif timeout_minutes: 177 | node_conf["timeout_minutes"] = timeout_minutes 178 | 179 | if provider == PROVIDER_GCP and size is not None and "mem-" in size: 180 | click.secho( 181 | "Invalid size: mem sizes are invalid for GCP due to sizing constraints with Google Cloud Run.", 182 | fg="red", 183 | ) 184 | click.secho( 185 | "Visit https://cloud.google.com/run/docs/configuring/services/memory-limits to learn more.", 186 | fg="red", 187 | ) 188 | exit(1) 189 | 190 | if size and (cpu or memory): 191 | click.secho( 192 | "Cannot provide size and CPU or Memory. Either use size or provide CPU and Memory.", 193 | fg="red", 194 | ) 195 | exit(1) 196 | if (cpu or memory) and node_conf["provider"] != PROVIDER_AWS: 197 | click.secho( 198 | "Specifying CPU and Memory is only valid for AWS nodes. (run `numerai list-constants` to see options).", 199 | fg="red", 200 | ) 201 | exit(1) 202 | elif (cpu or memory) and ( 203 | not (cpu or node_conf["cpu"]) or not (memory or node_conf["memory"]) 204 | ): 205 | click.secho( 206 | "One of CPU and Memory is missing either from your options or from your node configuration." 207 | "Provide both CPU and Memory to configure node size, or use size." 208 | "(run `numerai list-constants` to see options).", 209 | fg="red", 210 | ) 211 | exit(1) 212 | elif cpu or memory: 213 | if cpu: 214 | node_conf["cpu"] = int(cpu) * 1024 215 | if memory: 216 | node_conf["memory"] = int(memory) * 1024 217 | elif size: 218 | node_conf["cpu"] = SIZE_PRESETS[size][0] 219 | node_conf["memory"] = SIZE_PRESETS[size][1] 220 | elif node_conf["provider"] == PROVIDER_GCP and using_defaults: 221 | node_conf["cpu"] = SIZE_PRESETS[DEFAULT_SIZE_GCP][0] 222 | node_conf["memory"] = SIZE_PRESETS[DEFAULT_SIZE_GCP][1] 223 | 224 | if path: 225 | node_conf["path"] = os.path.abspath(path) 226 | if model_id: 227 | node_conf["model_id"] = model_id 228 | if cron: 229 | node_conf["cron"] = cron 230 | nodes_config[node] = node_conf 231 | 232 | click.secho(f'Current node config: "{node_conf}"...') 233 | 234 | # double check there is a dockerfile in the path we are about to configure 235 | docker.check_for_dockerfile(nodes_config[node]["path"]) 236 | store_config(NODES_PATH, nodes_config) 237 | 238 | # Added after tf directory restructure: copy nodes.json to providers' tf directory 239 | for affected_provider in affected_providers: 240 | copy_file( 241 | NODES_PATH, 242 | f"{CONFIG_PATH}/{affected_provider}/", 243 | force=True, 244 | verbose=verbose, 245 | ) 246 | 247 | # terraform apply: create cloud resources 248 | provider_keys = get_provider_keys(node) 249 | click.secho("Running terraform to provision cloud infrastructure...") 250 | 251 | # Azure only: Need to create a master Azure Container Registry and push a dummy placeholder image, before deploying the rest of the resources 252 | if provider == "azure": 253 | provider_registry_conf = create_azure_registry( 254 | provider, provider_keys, verbose=verbose 255 | ) 256 | node_conf.update(provider_registry_conf) 257 | node_conf["docker_repo"] = f'{node_conf["acr_login_server"]}/{node}' 258 | docker.login(node_conf, verbose) 259 | try: 260 | docker.manifest_inspect(node_conf["docker_repo"], verbose) 261 | except Exception as e: 262 | print(e) 263 | docker.pull("hello-world:linux", verbose) 264 | docker.tag("hello-world:linux", node_conf["docker_repo"], verbose) 265 | docker.push(node_conf["docker_repo"], verbose) 266 | nodes_config[node] = node_conf 267 | elif provider == "gcp": 268 | provider_registry_conf = create_gcp_registry(provider, verbose=verbose) 269 | node_conf.update(provider_registry_conf) 270 | registry_parts = node_conf["registry_id"].split("/") 271 | node_conf["artifact_registry_login_url"] = ( 272 | f"https://{registry_parts[3]}-docker.pkg.dev/" 273 | ) 274 | node_conf["docker_repo"] = ( 275 | f"{registry_parts[3]}-docker.pkg.dev/{registry_parts[1]}/numerai-container-registry/{node}:latest" 276 | ) 277 | docker.login(node_conf, verbose) 278 | try: 279 | docker.manifest_inspect(node_conf["docker_repo"], verbose) 280 | except Exception as e: 281 | docker.pull("hello-world:linux", verbose) 282 | docker.tag("hello-world:linux", node_conf["docker_repo"], verbose) 283 | docker.push(node_conf["docker_repo"], verbose) 284 | nodes_config[node] = node_conf 285 | 286 | store_config(NODES_PATH, nodes_config) 287 | 288 | apply_terraform(nodes_config, affected_providers, provider, verbose) 289 | 290 | webhook_url = nodes_config[node]["webhook_url"] 291 | napi = base_api.Api(*get_numerai_keys()) 292 | if not cron or register_webhook: 293 | click.echo(f"registering webhook {webhook_url} for model {model_id}...") 294 | napi.set_submission_webhook(model_id, webhook_url) 295 | 296 | else: 297 | click.echo(f"removing registered webhook for model {model_id}...") 298 | napi.set_submission_webhook(model_id, None) 299 | 300 | click.secho( 301 | "Prediction Node configured successfully. " "Next: deploy and test your node", 302 | fg="green", 303 | ) 304 | -------------------------------------------------------------------------------- /numerai/cli/node/deploy.py: -------------------------------------------------------------------------------- 1 | """Deploy command for Numerai CLI""" 2 | import click 3 | from numerai.cli.util import files, docker 4 | 5 | 6 | @click.command() 7 | @click.option("--verbose", "-v", is_flag=True) 8 | @click.pass_context 9 | def deploy(ctx, verbose): 10 | """Builds and pushes your docker image to the AWS ECR / Azure ACR repo""" 11 | ctx.ensure_object(dict) 12 | model = ctx.obj["model"] 13 | node = model["name"] 14 | node_config = files.load_or_init_nodes(node) 15 | 16 | docker.check_for_dockerfile(node_config["path"]) 17 | 18 | click.echo("building container image (this may take several minutes)...") 19 | docker.build(node_config, node, verbose) 20 | 21 | click.echo("logging into container registry...") 22 | docker.login(node_config, verbose) 23 | 24 | click.echo("pushing image to registry (this may take several minutes)...") 25 | docker.push(node_config["docker_repo"], verbose) 26 | 27 | click.echo("cleaning up local images...") 28 | docker.cleanup(node_config) 29 | 30 | click.secho("Prediction Node deployed. Next: test your node.", fg="green") 31 | -------------------------------------------------------------------------------- /numerai/cli/node/destroy.py: -------------------------------------------------------------------------------- 1 | """Destroy command for Numerai CLI""" 2 | 3 | import click 4 | from numerapi import base_api 5 | 6 | from numerai.cli.constants import * 7 | from numerai.cli.util.docker import terraform 8 | from numerai.cli.util.files import load_or_init_nodes, store_config, copy_file 9 | from numerai.cli.util.keys import get_provider_keys, get_numerai_keys 10 | 11 | 12 | @click.command() 13 | @click.option("--preserve-node-config", "-p", is_flag=True) 14 | @click.option("--verbose", "-v", is_flag=True) 15 | @click.pass_context 16 | def destroy(ctx, preserve_node_config, verbose): 17 | """ 18 | Uses Terraform to destroy a Numerai Compute cluster. 19 | This will delete everything, including: 20 | - lambda url 21 | - docker container and associated task 22 | - all logs 23 | This command is idempotent and safe to run multiple times. 24 | """ 25 | 26 | ctx.ensure_object(dict) 27 | model = ctx.obj["model"] 28 | node = model["name"] 29 | if not os.path.exists(CONFIG_PATH): 30 | click.secho(".numerai directory not setup, run `numerai setup`...", fg="red") 31 | return 32 | 33 | try: 34 | nodes_config = load_or_init_nodes() 35 | node_config = nodes_config[node] 36 | provider_keys = get_provider_keys(node) 37 | provider = node_config["provider"] 38 | except (KeyError, FileNotFoundError) as e: 39 | click.secho( 40 | f"make sure you run `numerai setup` and " 41 | f"`numerai node -n {node} config` first...", 42 | fg="red", 43 | ) 44 | return 45 | 46 | if not preserve_node_config: 47 | click.secho("backing up nodes.json...") 48 | copy_file(NODES_PATH, f"{NODES_PATH}.backup", force=True, verbose=True) 49 | 50 | try: 51 | click.secho( 52 | f"deleting node configuration" 53 | + (" (temporarily)" if preserve_node_config else "") 54 | + "..." 55 | ) 56 | del nodes_config[node] 57 | store_config(NODES_PATH, nodes_config) 58 | 59 | click.secho("deleting cloud resources for node...") 60 | terraform( 61 | "apply -auto-approve", 62 | verbose, 63 | provider, 64 | env_vars=provider_keys, 65 | inputs={"node_config_file": "nodes.json"}, 66 | ) 67 | 68 | except Exception as e: 69 | click.secho(e.__str__(), fg="red") 70 | nodes_config[node] = node_config 71 | store_config(NODES_PATH, nodes_config) 72 | return 73 | 74 | if "model_id" in node_config and "webhook_url" in node_config: 75 | napi = base_api.Api(*get_numerai_keys()) 76 | model_id = node_config["model_id"] 77 | webhook_url = node_config["webhook_url"] 78 | click.echo(f"deregistering webhook {webhook_url} for model {model_id}...") 79 | napi.set_submission_webhook(model_id, None) 80 | 81 | click.secho("Prediction Node destroyed successfully", fg="green") 82 | 83 | if preserve_node_config: 84 | click.secho("re-adding node config to nodes.json...", fg="green") 85 | nodes_config[node] = node_config 86 | store_config(NODES_PATH, nodes_config) 87 | -------------------------------------------------------------------------------- /numerai/cli/node/test.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | from datetime import datetime, timedelta 4 | 5 | import boto3 6 | import botocore 7 | import click 8 | import requests 9 | from numerapi import base_api 10 | 11 | from numerai.cli.constants import * 12 | from numerai.cli.util import docker 13 | from numerai.cli.util.debug import exception_with_msg 14 | from numerai.cli.util.files import load_or_init_nodes 15 | from numerai.cli.util.keys import ( 16 | get_aws_keys, 17 | get_numerai_keys, 18 | get_azure_keys, 19 | get_gcp_keys, 20 | ) 21 | 22 | from azure.identity import ClientSecretCredential 23 | from azure.mgmt.storage import StorageManagementClient 24 | from azure.core.credentials import AzureNamedKeyCredential 25 | from azure.data.tables import TableServiceClient, TableClient 26 | from datetime import datetime, timedelta, timezone 27 | import pandas as pd 28 | import json 29 | import google.cloud.run_v2 as run_v2 30 | import google.cloud.logging_v2 as logging_v2 31 | 32 | 33 | @click.command() 34 | @click.option( 35 | "--local", 36 | "-l", 37 | type=str, 38 | is_flag=True, 39 | help="Test the container locally, uses value specified with --command. ", 40 | ) 41 | @click.option( 42 | "--command", 43 | "-c", 44 | type=str, 45 | default="", 46 | help="Used to override the terminal command during local testing. " 47 | "Defaults to the command specified in the Dockerfile.", 48 | ) 49 | @click.option("--verbose", "-v", is_flag=True) 50 | @click.pass_context 51 | def test(ctx, local, command, verbose): 52 | """ 53 | Full end-to-end cloud or local test for a Prediction Node. 54 | 55 | This checks that: 56 | 1. Numerai can reach the Trigger 57 | 2. The Trigger schedules a Container to run 58 | 3. The Container starts up on the Compute Cluster 59 | 4. The Container uploads a submission with the Trigger ID assigned to it 60 | """ 61 | ctx.ensure_object(dict) 62 | model = ctx.obj["model"] 63 | node = model["name"] 64 | tournament = model["tournament"] 65 | node_config = load_or_init_nodes(node) 66 | provider = node_config["provider"] 67 | 68 | if local: 69 | click.secho("starting local test; building container...") 70 | docker.build(node_config, node, verbose) 71 | 72 | click.secho("running container...") 73 | docker.run(node_config, verbose, command=command) 74 | 75 | api = base_api.Api(*get_numerai_keys()) 76 | trigger_id = None 77 | try: 78 | if "cron" in node_config: 79 | click.secho("Attempting to manually trigger Cron node...") 80 | res = requests.post(node_config["webhook_url"], json.dumps({})) 81 | res.raise_for_status() 82 | 83 | else: 84 | click.secho("Checking if Numerai can Trigger your model...") 85 | res = api.raw_query( 86 | """mutation ( $modelId: String! ) { 87 | triggerModelWebhook( modelId: $modelId ) 88 | }""", 89 | variables={ 90 | "modelId": node_config["model_id"], 91 | }, 92 | authorization=True, 93 | ) 94 | if provider in PROVIDERS: 95 | trigger_id = res["data"]["triggerModelWebhook"] 96 | else: 97 | click.secho(f"Unsupported provider: '{provider}'", fg="red") 98 | exit(1) 99 | click.secho(f"Trigger ID assigned for this test: {trigger_id}", fg="green") 100 | 101 | if verbose: 102 | click.echo(f"response:\n{res}") 103 | click.secho("Webhook reachable...", fg="green") 104 | 105 | except ValueError as e: 106 | click.secho("there was a problem calling your webhook...", fg="red") 107 | if "Internal Server Error" in str(e): 108 | click.secho("attempting to dump webhook logs", fg="red") 109 | monitor(node, node_config, True, 20, LOG_TYPE_WEBHOOK, False) 110 | return 111 | 112 | click.secho("checking task status...") 113 | 114 | monitor( 115 | node, 116 | node_config, 117 | verbose, 118 | 15, 119 | LOG_TYPE_CLUSTER, 120 | follow_tail=True, 121 | trigger_id=trigger_id, 122 | ) 123 | if node_config["provider"] == "azure": 124 | time.sleep(5) 125 | 126 | if node_config["provider"] == "azure": 127 | click.secho( 128 | "[Azure node] Test complete, your model should submits automatically! " 129 | "You may check your submission here: https://numer.ai/models", 130 | fg="green", 131 | ) 132 | return 133 | 134 | click.secho("checking for submission...") 135 | res = api.raw_query( 136 | """query ( $modelId: String! ) { 137 | submissions( modelId: $modelId ){ 138 | round{ number, tournament }, 139 | triggerId 140 | insertedAt 141 | } 142 | }""", 143 | variables={"modelId": node_config["model_id"]}, 144 | authorization=True, 145 | ) 146 | curr_round = api.get_current_round(tournament) 147 | latest_subs = sorted( 148 | filter( 149 | lambda sub: sub["round"]["number"] == curr_round, res["data"]["submissions"] 150 | ), 151 | key=lambda sub: sub["insertedAt"], 152 | reverse=True, 153 | ) 154 | if len(latest_subs) == 0: 155 | click.secho("No submission found for current round, test failed", fg="red") 156 | return 157 | 158 | latest_sub = latest_subs[0] 159 | 160 | if "cron" in node_config: 161 | latest_date = datetime.strptime(latest_sub["insertedAt"], "%Y-%m-%dT%H:%M:%SZ") 162 | if latest_date < datetime.utcnow() - timedelta(minutes=5): 163 | click.secho( 164 | "No submission appeared in the last 5 minutes, be sure that your node" 165 | " is submitting correctly, check the numerai-cli wiki for more" 166 | " information on how to monitor parts of your node.", 167 | fg="red", 168 | ) 169 | 170 | if trigger_id != latest_sub["triggerId"]: 171 | click.secho( 172 | "Your node did not submit the Trigger ID assigned during this test, " 173 | "please ensure your node uses numerapi >= 0.2.4 (ignore if using rlang or Azure as provider)", 174 | fg="red", 175 | ) 176 | return 177 | 178 | click.secho("Submission uploaded correctly", fg="green") 179 | click.secho("Test complete, your model now submits automatically!", fg="green") 180 | 181 | 182 | def monitor(node, config, verbose, num_lines, log_type, follow_tail, trigger_id=None): 183 | if log_type not in LOG_TYPES: 184 | raise exception_with_msg( 185 | f"Unknown log type '{log_type}', " f"must be one of {LOG_TYPES}" 186 | ) 187 | 188 | if config["provider"] == PROVIDER_AWS: 189 | monitor_aws(node, config, num_lines, log_type, follow_tail, verbose, trigger_id) 190 | elif config["provider"] == PROVIDER_AZURE: 191 | monitor_azure(node, config, verbose) 192 | elif config["provider"] == PROVIDER_GCP: 193 | monitor_gcp(node, config, verbose, log_type, trigger_id) 194 | else: 195 | click.secho(f"Unsupported provider: '{config['provider']}'", fg="red") 196 | return 197 | 198 | 199 | def monitor_aws(node, config, num_lines, log_type, follow_tail, verbose, trigger_id): 200 | aws_public, aws_secret = get_aws_keys() 201 | logs_client = boto3.client( 202 | "logs", 203 | region_name="us-east-1", 204 | aws_access_key_id=aws_public, 205 | aws_secret_access_key=aws_secret, 206 | ) 207 | ecs_client = boto3.client( 208 | "ecs", 209 | region_name="us-east-1", 210 | aws_access_key_id=aws_public, 211 | aws_secret_access_key=aws_secret, 212 | ) 213 | 214 | monitor_start_time = datetime.now(timezone.utc) - timedelta(minutes=1) 215 | next_token = None 216 | log_lines = 0 217 | monitoring_done = False 218 | time_lapse = datetime.now(timezone.utc) - monitor_start_time 219 | task = None 220 | 221 | if verbose and log_type == LOG_TYPE_WEBHOOK: 222 | print_aws_webhook_logs(logs_client, config["webhook_log_group"], num_lines) 223 | 224 | while time_lapse <= timedelta(minutes=15) and monitoring_done == False: 225 | task, monitoring_done, message, color = get_recent_task_status_aws( 226 | config["cluster_arn"], ecs_client, node, trigger_id 227 | ) 228 | if task is None: 229 | click.secho(message, fg=color) 230 | else: 231 | if verbose and log_type == LOG_TYPE_CLUSTER: 232 | next_token, new_log_lines = print_aws_logs( 233 | logs_client, 234 | config["cluster_log_group"], 235 | f'ecs/default/{task["taskArn"].split("/")[-1]}', 236 | next_token=next_token, 237 | fail_on_not_found=False, 238 | ) 239 | log_lines += new_log_lines 240 | if log_lines == 0: 241 | next_token = None 242 | else: 243 | click.secho(message, fg=color) 244 | 245 | if not monitoring_done: 246 | time.sleep(5 if verbose else 15) 247 | time_lapse = datetime.now(timezone.utc) - monitor_start_time 248 | 249 | if ( 250 | monitoring_done 251 | and time_lapse <= timedelta(minutes=15) 252 | and verbose 253 | and log_lines == 0 254 | and task is not None 255 | ): 256 | click.secho( 257 | "Node executed successfully, but there are no logs yet.\n" 258 | "You can safely exit at this time, or the CLI will try to collect logs for the next 120 seconds.", 259 | fg="yellow", 260 | ) 261 | log_monitor_start_time = datetime.now(timezone.utc) - timedelta(minutes=0) 262 | log_time_lapse = datetime.now(timezone.utc) - log_monitor_start_time 263 | while log_time_lapse <= timedelta(minutes=2) and log_lines == 0: 264 | time.sleep(5) 265 | log_time_lapse <= datetime.now(timezone.utc) - log_monitor_start_time 266 | next_token, new_log_lines = print_aws_logs( 267 | logs_client, 268 | config["cluster_log_group"], 269 | f'ecs/default/{task["taskArn"].split("/")[-1]}', 270 | next_token=next_token, 271 | fail_on_not_found=False, 272 | ) 273 | log_lines += new_log_lines 274 | if log_lines == 0: 275 | next_token = None 276 | 277 | elif time_lapse >= timedelta(minutes=15) and not monitoring_done: 278 | click.secho( 279 | f"\nTimeout after 5 minutes, please run the `numerai node status`" 280 | f"command for this model or visit the log console:\n" 281 | f"https://console.aws.amazon.com/cloudwatch/home?" 282 | f"region=us-east-1#logsV2:log-groups/log-group/$252Ffargate$252Fservice$252F{node}", 283 | fg="red", 284 | ) 285 | 286 | 287 | def get_recent_task_status_aws(cluster_arn, ecs_client, node, trigger_id): 288 | tasks = ecs_client.list_tasks(cluster=cluster_arn, family=node) 289 | 290 | pending_codes = ["PROVISIONING", "PENDING", "ACTIVATING"] 291 | running_codes = ["RUNNING", "DEACTIVATING", "STOPPING"] 292 | stopped_codes = ["DEPROVISIONING", "STOPPED", "DELETED"] 293 | 294 | # try to find stopped tasks 295 | if len(tasks["taskArns"]) == 0: 296 | tasks = ecs_client.list_tasks( 297 | cluster=cluster_arn, desiredStatus="STOPPED", family=node 298 | ) 299 | 300 | if len(tasks["taskArns"]) == 0: 301 | message = ( 302 | "No recent tasks found!" 303 | if trigger_id is None 304 | else "No tasks yet, still waiting..." 305 | ) 306 | color = "red" if trigger_id is None else "yellow" 307 | return None, trigger_id is None, message, color 308 | 309 | tasks = ecs_client.describe_tasks(cluster=cluster_arn, tasks=tasks["taskArns"]) 310 | 311 | matched_task = None 312 | 313 | if trigger_id is not None: 314 | for task in tasks["tasks"]: 315 | matching_override = list( 316 | filter( 317 | lambda e: e["value"] == trigger_id, 318 | task["overrides"]["containerOverrides"][0]["environment"], 319 | ) 320 | ) 321 | if len(matching_override) == 1: 322 | matched_task = task 323 | break 324 | else: 325 | matched_task = tasks["tasks"][-1] 326 | 327 | if matched_task == None: 328 | return matched_task, False, "Waiting for job to start...", "yellow" 329 | elif ( 330 | matched_task["lastStatus"] in stopped_codes 331 | and "reason" in matched_task["containers"][0] 332 | ): 333 | return ( 334 | matched_task, 335 | True, 336 | f'Job failed! Container exited with code {matched_task["containers"][0]["exitCode"]}\r', 337 | "red", 338 | ) 339 | elif matched_task["lastStatus"] in stopped_codes: 340 | return matched_task, True, "Job execution finished!\r", "green" 341 | elif matched_task["lastStatus"] in pending_codes: 342 | return matched_task, False, "Waiting for job to start...", "yellow" 343 | elif matched_task["lastStatus"] in running_codes: 344 | return matched_task, False, "Waiting for job to complete...", "yellow" 345 | return matched_task, False, "Waiting for job to start...", "yellow" 346 | 347 | 348 | def print_aws_webhook_logs( 349 | logs_client, family, limit, next_token=None, raise_on_error=True 350 | ): 351 | streams = logs_client.describe_log_streams( 352 | logGroupName=family, orderBy="LastEventTime", descending=True 353 | ) 354 | 355 | if len(streams["logStreams"]) == 0: 356 | if not raise_on_error: 357 | return False 358 | raise exception_with_msg( 359 | "No logs found. Make sure the webhook has triggered by checking " 360 | "`numerai node status` and make sure a task is in the RUNNING state " 361 | "(this can take a few minutes). Also, make sure your webhook has " 362 | "triggered at least once by running `numerai node test`" 363 | ) 364 | 365 | name = streams["logStreams"][0]["logStreamName"] 366 | print_aws_logs(logs_client, family, name, limit, next_token) 367 | return True 368 | 369 | 370 | def print_aws_logs( 371 | logs_client, family, name, limit=None, next_token=None, fail_on_not_found=True 372 | ): 373 | kwargs = {} # boto is weird, and doesn't allow `None` for parameters 374 | if next_token is not None: 375 | kwargs["nextToken"] = next_token 376 | if limit is not None: 377 | kwargs["limit"] = limit 378 | try: 379 | events = logs_client.get_log_events( 380 | logGroupName=family, logStreamName=name, **kwargs 381 | ) 382 | except botocore.exceptions.ClientError as error: 383 | if error.response["Error"]["Code"] == "ResourceNotFoundException": 384 | if fail_on_not_found: 385 | raise error 386 | else: 387 | return None, 0 388 | else: 389 | raise error 390 | 391 | if len(events["events"]) == limit: 392 | click.echo("...more log lines available: use -n option to get more...") 393 | for event in events["events"]: 394 | click.echo( 395 | f"[{name}] {str(datetime.fromtimestamp(event['timestamp'] / 1000))}: {event['message']}" 396 | ) 397 | 398 | return events["nextForwardToken"], len(events["events"]) 399 | 400 | 401 | def monitor_azure(node, config, verbose): 402 | """ 403 | Monitor the logs of a node on Azure to see if the submission is completed 404 | """ 405 | 406 | # Go get the log for all webhook calls started in the last 1 minutes 407 | monitor_start_time = datetime.now(timezone.utc) - timedelta(minutes=1) 408 | 409 | azure_subs_id, azure_client, azure_tenant, azure_secret = get_azure_keys() 410 | credentials = ClientSecretCredential( 411 | client_id=azure_client, tenant_id=azure_tenant, client_secret=azure_secret 412 | ) 413 | # Get Azure Storage account key, using resource group name and trigger function's storage account name 414 | 415 | resource_group_name = config["resource_group_name"] 416 | storage_account_name = config["webhook_storage_account_name"] 417 | storage_client = StorageManagementClient( 418 | credential=credentials, subscription_id=azure_subs_id 419 | ) 420 | storage_keys = storage_client.storage_accounts.list_keys( 421 | resource_group_name=resource_group_name, account_name=storage_account_name 422 | ) 423 | if len([keys for keys in storage_keys.keys]) == 0: 424 | click.secho( 425 | f"Webhook's storage account key not found, check storage account name: {storage_account_name}", 426 | fg="red", 427 | ) 428 | exit(1) 429 | 430 | # Now we have the storage account's access keys 431 | storage_key = [keys for keys in storage_keys.keys][0] 432 | access_key = storage_key.value 433 | endpoint_suffix = "core.windows.net" 434 | account_name = storage_account_name 435 | endpoint = f"{account_name}.table.{endpoint_suffix}" 436 | connection_string = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={access_key};EndpointSuffix={endpoint_suffix}" 437 | 438 | # Get the table that store the run history for the webhook from the Azure storage account 439 | table_credential = AzureNamedKeyCredential(storage_account_name, access_key) 440 | table_service_client = TableServiceClient( 441 | endpoint=endpoint, credential=table_credential 442 | ) 443 | table_name = [ 444 | table.name 445 | for table in table_service_client.list_tables() 446 | if "History" in table.name 447 | ][0] 448 | 449 | # Query the webhook's History table and get records (entities) 450 | table_client = TableClient.from_connection_string(connection_string, table_name) 451 | 452 | # Continue to query the table until the webhook's run is done (log printed) or 15 minutes have passed 453 | time_lapse = datetime.now(timezone.utc) - monitor_start_time 454 | monitoring_done = False 455 | shown_log_row_key = list() 456 | while time_lapse <= timedelta(minutes=15) and monitoring_done == False: 457 | if len(shown_log_row_key) == 0 and verbose: 458 | click.secho(f"No log events yet, still waiting...\r", fg="yellow") 459 | else: 460 | click.secho( 461 | f"Waiting for submission run to finish...\r", 462 | fg="yellow", 463 | ) 464 | monitoring_done, shown_log_row_key = azure_refresh_and_print_log( 465 | table_client, monitor_start_time, shown_log_row_key 466 | ) 467 | time.sleep(15) 468 | # Update time lapse 469 | time_lapse = datetime.now(timezone.utc) - monitor_start_time 470 | if time_lapse >= timedelta(minutes=15): 471 | click.secho( 472 | f"Monitor timeout after 15 minutes, container run status cannot be determined. Recommended to check ran status directly on Azure Portal", 473 | fg="red", 474 | ) 475 | exit(1) 476 | 477 | 478 | def azure_refresh_and_print_log( 479 | table_client, monitor_start_time, shown_log_row_key=list() 480 | ): 481 | """ 482 | Refresh the log table and print the new log entries 483 | """ 484 | monitoring_done = False 485 | log_list = [entity for entity in table_client.list_entities()] 486 | if len(log_list) == 0: 487 | return monitoring_done, shown_log_row_key 488 | log_df = pd.DataFrame(log_list) 489 | relevant_cols = [ 490 | "RowKey", 491 | "EventType", 492 | "_Timestamp", 493 | "Name", 494 | "OrchestrationInstance", 495 | "Result", 496 | "OrchestrationStatus", 497 | ] 498 | available_cols = [col for col in relevant_cols if col in log_df.columns] 499 | log_df = log_df[available_cols].dropna(subset=["EventType"]) 500 | log_df = log_df[log_df["_Timestamp"] > monitor_start_time] 501 | execution_log = log_df[log_df["EventType"].str.contains("Execution")] 502 | 503 | # Remove logs that have been shown before 504 | log_df = log_df[log_df["RowKey"].isin(shown_log_row_key) == False].reset_index() 505 | 506 | if len(log_df) > 0: 507 | for i in range(len(log_df)): 508 | shown_log_row_key.append(log_df.loc[i, "RowKey"]) 509 | 510 | if log_df.loc[i, "EventType"] == "ExecutionStarted": 511 | az_func_name1 = log_df.loc[i, "Name"] 512 | click.secho( 513 | f"Azure Trigger Function: '{az_func_name1}' started", fg="green" 514 | ) 515 | # execute_st=log_df.loc[i,'_Timestamp'] 516 | elif log_df.loc[i, "EventType"] == "ExecutionCompleted": 517 | az_func_name1 = execution_log.loc[ 518 | execution_log["EventType"] == "ExecutionStarted", "Name" 519 | ].values[0] 520 | execute_st = execution_log.loc[ 521 | execution_log["EventType"] == "ExecutionStarted", "_Timestamp" 522 | ].values[-1] 523 | execute_et = execution_log.loc[ 524 | execution_log["EventType"] == "ExecutionCompleted", "_Timestamp" 525 | ].values[-1] 526 | time_taken = execute_et - execute_st 527 | click.secho( 528 | f"Azure Trigger Function: '{az_func_name1}' ended", fg="green" 529 | ) 530 | click.secho( 531 | f"'{az_func_name1}' time taken: {time_taken.astype('timedelta64[s]').astype('float')/60:.2f} mins" 532 | ) 533 | click.secho(f"'{az_func_name1}' result: {log_df.loc[i,'Result']}") 534 | monitoring_done = True 535 | 536 | return monitoring_done, shown_log_row_key 537 | 538 | 539 | def monitor_gcp(node, config, verbose, log_type, trigger_id): 540 | ## Write a function that uses the gcloud runv2 executions SDK to get executions that are currently running 541 | monitor_start_time = datetime.now(timezone.utc) - timedelta(minutes=1) 542 | 543 | gcp_key_path = get_gcp_keys() 544 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = gcp_key_path 545 | client = run_v2.ExecutionsClient() 546 | 547 | # Setup logging if necessary 548 | previous_insert_id = "0" 549 | logging_client = None 550 | if verbose: 551 | logging_client = logging_v2.Client() 552 | 553 | time_lapse = datetime.now(timezone.utc) - monitor_start_time 554 | monitoring_done = False 555 | 556 | if verbose and log_type == LOG_TYPE_WEBHOOK: 557 | print_gcp_webhook_logs(logging_client, config["job_id"]) 558 | 559 | while time_lapse <= timedelta(minutes=15) and monitoring_done == False: 560 | executions = get_gcp_job_executions(client, config["job_id"], trigger_id) 561 | if len(executions) == 0: 562 | click.secho(f"No job executions yet, still waiting...\r", fg="yellow") 563 | else: 564 | monitoring_done, message, color = check_gcp_execution_status(executions[0]) 565 | if verbose and log_type == LOG_TYPE_CLUSTER: 566 | previous_insert_id = print_gcp_execution_logs( 567 | logging_client, config["job_id"], executions[0], previous_insert_id 568 | ) 569 | elif not monitoring_done: 570 | click.secho(message, fg=color) 571 | 572 | if monitoring_done: 573 | click.secho(message, fg=color) 574 | break 575 | 576 | time.sleep(5 if verbose else 15) 577 | 578 | if time_lapse >= timedelta(minutes=15) and not monitoring_done: 579 | click.secho( 580 | f"Monitoring timed out after 15 minutes without determining the status of your container. Check the status of your container in the Google Cloud console.", 581 | fg="red", 582 | ) 583 | exit(1) 584 | 585 | 586 | def get_gcp_job_executions(client, job_id, trigger_id): 587 | # Initialize request argument(s) 588 | request = run_v2.ListExecutionsRequest( 589 | parent=job_id, 590 | ) 591 | 592 | page_result = client.list_executions(request=request) 593 | executions = [] 594 | # Handle the response 595 | for response in page_result: 596 | env = response.template.containers[0].env 597 | for env_var in env: 598 | if env_var.name == "TRIGGER_ID" and env_var.value == trigger_id: 599 | executions.append(response) 600 | elif trigger_id == None: 601 | executions.append(response) 602 | 603 | if trigger_id == None: 604 | executions = [executions[0]] 605 | return executions 606 | 607 | 608 | def check_gcp_execution_status(execution): 609 | condition_based_results = { 610 | run_v2.types.Condition.State.CONDITION_SUCCEEDED: [ 611 | True, 612 | "Job execution succeeded!\r", 613 | "green", 614 | ], 615 | run_v2.types.Condition.State.CONDITION_RECONCILING: [ 616 | False, 617 | "Waiting for job to complete...\r", 618 | "yellow", 619 | ], 620 | run_v2.types.Condition.State.CONDITION_PENDING: [ 621 | False, 622 | "Waiting for job to complete...\r", 623 | "yellow", 624 | ], 625 | run_v2.types.Condition.State.CONDITION_FAILED: [False, "Job failed!\r", "red"], 626 | } 627 | 628 | completed_condition = list( 629 | filter(lambda c: c.type_ == "Completed", execution.conditions) 630 | ) 631 | if len(completed_condition) == 1: 632 | if completed_condition[0].state in list((condition_based_results.keys())): 633 | return condition_based_results[completed_condition[0].state] 634 | else: 635 | return ( 636 | True, 637 | f"Unknown job status! Exiting test.\nJob status: {completed_condition.state}\r", 638 | "red", 639 | ) 640 | else: 641 | return ( 642 | False, 643 | "No job status found. Waiting for job status to resolve....\r", 644 | "yellow", 645 | ) 646 | 647 | 648 | def print_gcp_execution_logs(logging_client, job_id, execution, previous_insert_id): 649 | execution_name = execution.name.split("/")[-1] 650 | 651 | filter = " ".join( 652 | [ 653 | 'resource.type = "cloud_run_job"', 654 | f'resource.labels.job_name = "{job_id.split("/")[-1]}"', 655 | f'labels."run.googleapis.com/execution_name" = "{execution_name}"', 656 | 'labels."run.googleapis.com/task_index" = "0"', 657 | f'insertId > "{previous_insert_id}"', 658 | ] 659 | ) 660 | page_response = logging_client.list_entries(filter_=filter) 661 | insert_id = previous_insert_id 662 | for log in page_response: 663 | click.secho(f"{log.timestamp}: {log.payload}") 664 | insert_id = log.insert_id 665 | 666 | if insert_id == "0": 667 | click.secho(f"Waiting for logs to begin...\r", fg="yellow") 668 | 669 | return insert_id 670 | 671 | 672 | def print_gcp_webhook_logs(logging_client, job_id): 673 | monitor_start_time = datetime.now(timezone.utc) - timedelta(minutes=30) 674 | click.secho("Looking for most recent webhook execution...\r", fg="yellow") 675 | 676 | filter = " ".join( 677 | [ 678 | 'resource.type = "cloud_function"', 679 | f'resource.labels.function_name = "{job_id.split("/")[-1]}"', 680 | f'Timestamp>="{monitor_start_time.isoformat()}"', 681 | ] 682 | ) 683 | 684 | page_response = logging_client.list_entries(filter_=filter) 685 | 686 | execution_id = "" 687 | log_entries = [] 688 | for result in page_response: 689 | log_entries.append( 690 | { 691 | "payload": result.payload, 692 | "execution_id": result.labels["execution_id"], 693 | "timestamp": result.timestamp, 694 | } 695 | ) 696 | execution_id = result.labels["execution_id"] 697 | 698 | for log in log_entries: 699 | if log["execution_id"] == execution_id: 700 | click.secho(f"{log['timestamp']}: {log['payload']}") 701 | 702 | if len(log_entries) == 0: 703 | click.secho("No webhook logs in the past 30 minutes.\r", fg="yellow") 704 | click.secho( 705 | "Try executing your webhook again or run numerai node deploy to make sure your webhook URL is up to date\r", 706 | fg="yellow", 707 | ) 708 | 709 | 710 | @click.command() 711 | @click.option("--verbose", "-v", is_flag=True) 712 | @click.option( 713 | "--num-lines", "-n", type=int, default=20, help="the number of log lines to return" 714 | ) 715 | @click.option( 716 | "--log-type", 717 | "-l", 718 | type=click.Choice(LOG_TYPES), 719 | default=LOG_TYPE_CLUSTER, 720 | help=f"The log type to lookup. One of {LOG_TYPES}. Default is {LOG_TYPE_CLUSTER}.", 721 | ) 722 | @click.option( 723 | "--follow-tail", 724 | "-f", 725 | is_flag=True, 726 | help="tail the logs of a running task (AWS only)", 727 | ) 728 | @click.pass_context 729 | def status(ctx, verbose, num_lines, log_type, follow_tail): 730 | """ 731 | Get the logs from the latest task for this node 732 | 733 | Logs are not created until a task is in the RUNNING state, 734 | so the logs returned by this command might be out of date. 735 | """ 736 | ctx.ensure_object(dict) 737 | model = ctx.obj["model"] 738 | node = model["name"] 739 | monitor(node, load_or_init_nodes(node), verbose, num_lines, log_type, follow_tail) 740 | -------------------------------------------------------------------------------- /numerai/cli/setup.py: -------------------------------------------------------------------------------- 1 | """Setup command for Numerai CLI""" 2 | 3 | import click 4 | import logging 5 | 6 | from numerai.cli.constants import * 7 | from numerai.cli.util.docker import terraform 8 | from numerai.cli.util.files import maybe_create, copy_files 9 | from numerai.cli.util.keys import config_numerai_keys, config_provider_keys 10 | 11 | 12 | @click.command() 13 | @click.option( 14 | "--provider", 15 | "-p", 16 | type=str, 17 | prompt=True, 18 | help=f"Initialize with this providers API keys.", 19 | ) 20 | @click.option("--skip-key-setup", "-s", is_flag=True) 21 | @click.option("--verbose", "-v", is_flag=True) 22 | def setup(provider, skip_key_setup, verbose): 23 | """ 24 | Initializes cli and provider API keys. 25 | """ 26 | 27 | logger = logging.getLogger("azure") 28 | if verbose: 29 | logger.setLevel(logging.INFO) 30 | else: 31 | logger.setLevel(logging.ERROR) 32 | 33 | if os.path.isdir(CONFIG_PATH) and not os.path.isdir( 34 | os.path.join(CONFIG_PATH, "azure") 35 | ): 36 | click.secho( 37 | "Looks like you have an old configuration of numerai-cli (<=0.3)." 38 | "run `numerai upgrade` first." 39 | ) 40 | return 41 | 42 | # setup numerai keys 43 | if not skip_key_setup: 44 | click.secho( 45 | "Initializing numerai keys " "(press enter to keep value in brackets)...", 46 | fg="yellow", 47 | ) 48 | maybe_create(KEYS_PATH, protected=True) 49 | config_numerai_keys() 50 | 51 | # setup provider keys 52 | if not skip_key_setup: 53 | click.secho( 54 | f"\nInitializing {provider} keys " 55 | f"(press enter to keep value in brackets)...", 56 | fg="yellow", 57 | ) 58 | config_provider_keys(provider) 59 | 60 | # copy tf files 61 | click.secho("copying terraform files...") 62 | copy_files(TERRAFORM_PATH, CONFIG_PATH, force=True, verbose=True) 63 | 64 | # terraform init, added provider to init at the specified provider's tf directory 65 | click.secho("initializing terraform to provision cloud infrastructure...") 66 | terraform("init -upgrade ", verbose, provider) 67 | 68 | click.secho("Numerai API Keys setup and working", fg="green") 69 | click.secho(f"{provider} API Keys setup and working", fg="green") 70 | click.secho(f"Terraform files copied to {CONFIG_PATH}", fg="green") 71 | click.echo("Successfully initialized numerai-cli") 72 | -------------------------------------------------------------------------------- /numerai/cli/uninstall.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import shutil 3 | 4 | import click 5 | from numerapi import base_api 6 | 7 | from numerai.cli.constants import * 8 | from numerai.cli.util.keys import ( 9 | load_or_init_keys, 10 | load_or_init_nodes, 11 | get_numerai_keys, 12 | ) 13 | from numerai.cli.util.docker import terraform 14 | from numerai.cli.util.debug import root_cause 15 | 16 | 17 | @click.command() 18 | def uninstall(): 19 | """ 20 | Removes cloud resources, local config, and python package. 21 | """ 22 | click.secho( 23 | """DANGER WILL ROBINSON, This will: 24 | - Destroy all nodes in the cloud 25 | - Remove all docker images on your computer 26 | - Delete the .numerai configuration directory on your computer 27 | - Uninstall the numerai-cli python package 28 | - Leave Python and Docker installed on your computer 29 | """, 30 | fg="red", 31 | ) 32 | if not click.confirm("Are you absolutely sure you want to uninstall?"): 33 | return 34 | 35 | if os.path.exists(CONFIG_PATH): 36 | if len(os.listdir(CONFIG_PATH)) == 0: 37 | os.rmdir(CONFIG_PATH) 38 | 39 | else: 40 | napi = base_api.Api(*get_numerai_keys()) 41 | 42 | node_config = load_or_init_nodes() 43 | click.secho("deregistering all webhooks...") 44 | for node, config in node_config.items(): 45 | napi.set_submission_webhook(config["model_id"], None) 46 | 47 | click.secho("destroying cloud resources...") 48 | all_keys = load_or_init_keys() 49 | provider_keys = {} 50 | for provider in PROVIDERS: 51 | if provider in all_keys.keys(): 52 | provider_keys.update(all_keys[provider]) 53 | terraform( 54 | "destroy -auto-approve", 55 | provider=provider, 56 | verbose=True, 57 | env_vars=provider_keys, 58 | inputs={"node_config_file": "nodes.json"}, 59 | ) 60 | 61 | click.secho("cleaning up docker images...") 62 | subprocess.run("docker system prune -f -a --volumes", shell=True) 63 | try: 64 | shutil.rmtree(CONFIG_PATH) 65 | except PermissionError as e: 66 | root_cause("", str(e)) 67 | 68 | click.secho("uninstalling python package...") 69 | res = subprocess.run( 70 | "pip3 uninstall numerai-cli -y", 71 | stdout=subprocess.PIPE, 72 | stderr=subprocess.PIPE, 73 | shell=True, 74 | ) 75 | 76 | if res.returncode != 0: 77 | if b"PermissionError" in res.stderr: 78 | click.secho( 79 | "uninstall failed due to permissions, " 80 | 'run "pip3 uninstall numerai-cli -y" manually ' 81 | "to ensure the package was uninstalled", 82 | fg="red", 83 | ) 84 | else: 85 | click.secho(f"Unexpected error occurred:\n{res.stderr}", fg="red") 86 | 87 | click.secho("All those moments will be lost in time, like tears in rain.", fg="red") 88 | -------------------------------------------------------------------------------- /numerai/cli/upgrade.py: -------------------------------------------------------------------------------- 1 | import json 2 | import shutil 3 | 4 | import click 5 | 6 | from numerai.cli.constants import * 7 | from numerai.cli.util.docker import terraform 8 | from numerai.cli.util.files import copy_files, store_config, copy_file, move_files 9 | from numerai.cli.util.keys import ( 10 | load_or_init_keys, 11 | load_or_init_nodes, 12 | config_numerai_keys, 13 | config_provider_keys, 14 | ) 15 | 16 | 17 | # TODO: to add support for upgrade from 0.3 -> 0.4 Azure provider version 18 | 19 | 20 | @click.command() 21 | @click.option("--verbose", "-v", is_flag=True) 22 | def upgrade(verbose): 23 | """ 24 | Upgrades configuration from 0.1/0.2 to 0.3 format and 0.3 to 0.4. 25 | """ 26 | if str(os.getcwd()) == str(Path.home()): 27 | click.secho("You cannot run this command from your home directory.") 28 | return 29 | 30 | home = str(Path.home()) 31 | old_key_path = os.path.join(home, ".numerai") 32 | old_config_path = os.path.join(os.getcwd(), ".numerai/") 33 | 34 | needs_03_upgrade = os.path.exists(old_config_path) 35 | needs_04_upgrade = not os.path.isdir(os.path.join(CONFIG_PATH, "azure")) 36 | 37 | if not needs_03_upgrade and not needs_04_upgrade: 38 | click.secho( 39 | "Cannot detect any necessary upgrades. If you're trying to upgrade from 0.1 or 0.2 to 0.3," 40 | " run this command from the directory in which you first ran `numerai setup`" 41 | " (it should have the .numerai folder in it and not your home directory)." 42 | " If instead you're trying to upgrade from 0.3 to 0.4," 43 | " there appears to already be an azure directory so you're all good!." 44 | ) 45 | return 46 | 47 | click.secho( 48 | f"Upgrading, do not interrupt or else " f"your environment may be corrupted.", 49 | fg="yellow", 50 | ) 51 | 52 | # MOVE KEYS FILE 53 | if os.path.isfile(old_key_path): 54 | temp_key_path = os.path.join(old_config_path, ".keys") 55 | click.secho( 56 | f"\tmoving old keyfile from '{old_key_path}' to '{temp_key_path}'", 57 | ) 58 | shutil.move(old_key_path, temp_key_path) 59 | 60 | # MOVE CONFIG FILE 61 | if os.path.exists(old_config_path): 62 | click.secho( 63 | f"\tmoving old config from {old_config_path} to {CONFIG_PATH}", 64 | ) 65 | shutil.move(old_config_path, CONFIG_PATH) 66 | 67 | # INIT KEYS AND NODES 68 | keys_config = load_or_init_keys() 69 | supported_providers = ["aws", "azure"] 70 | if ( 71 | not os.path.exists(KEYS_PATH) 72 | or "numerai" not in keys_config 73 | or not any( 74 | [provider for provider in keys_config if provider in supported_providers] 75 | ) 76 | ): 77 | click.secho(f"Keys missing from {KEYS_PATH}, you must re-initialize your keys:") 78 | config_numerai_keys() 79 | click.secho(f"Currently supported providers: {supported_providers}") 80 | provider = click.prompt("Enter your provider:", default="aws") 81 | if provider == "aws": 82 | config_provider_keys(PROVIDER_AWS) 83 | elif provider == "azure": 84 | config_provider_keys(PROVIDER_AZURE) 85 | else: 86 | click.secho(f"Invalid provider: {provider}", fg="red") 87 | exit(1) 88 | # nodes_config = load_or_init_nodes() 89 | 90 | # DELETE OLD CONFIG FILES 91 | click.secho("Checking for old config output files...", fg="yellow") 92 | old_suburl_path = os.path.join(CONFIG_PATH, "submission_url.txt") 93 | if os.path.exists(old_suburl_path): 94 | click.secho( 95 | f"\tdeleting {old_suburl_path}, you can populate the " 96 | f"new nodes.json file with `numerai node config`" 97 | ) 98 | os.remove(old_suburl_path) 99 | old_docker_path = os.path.join(CONFIG_PATH, "docker_repo.txt") 100 | if os.path.exists(old_docker_path): 101 | click.secho( 102 | f"\tdeleting {old_docker_path}, you can populate the " 103 | f"new nodes.json file with `numerai node config`" 104 | ) 105 | os.remove(old_docker_path) 106 | 107 | # Upgrade to 0.4 108 | # create "/aws" directory, then copy all of the old config over (except .keys and nodes.json) 109 | if needs_04_upgrade: 110 | click.secho("Upgrading from 0.3 to 0.4...", fg="yellow") 111 | # Create the temp folder if it doesn't exist already 112 | temp_folder_path = os.path.join(CONFIG_PATH, "temp") 113 | if not os.path.exists(temp_folder_path): 114 | os.makedirs(temp_folder_path) 115 | else: 116 | click.secho( 117 | f"Temp folder {temp_folder_path} already exists, " 118 | f"upgrading will remove everything in this folder. " 119 | f"Please save a copy elsewhere and retry 'numerai upgrade'", 120 | fg="red", 121 | ) 122 | exit(1) 123 | 124 | # Move all files and folders in the config folder to the temp folder 125 | # and delete the old files 126 | move_files( 127 | CONFIG_PATH, 128 | temp_folder_path, 129 | verbose, 130 | exclude_files=[".keys", "nodes.json"], 131 | ) 132 | 133 | # Create /aws directory 134 | aws_path = os.path.join(CONFIG_PATH, "aws") 135 | if not os.path.exists(aws_path): 136 | os.makedirs(aws_path) 137 | 138 | # Move all files and folders from the temp folder to the aws folder 139 | move_files(temp_folder_path, aws_path, verbose) 140 | os.rmdir(temp_folder_path) 141 | 142 | # UPGRADE, RENAME, AND UPDATE TERRAFORM FILES 143 | click.secho("Upgrading terraform files...", fg="yellow") 144 | try: 145 | with open(os.path.join(CONFIG_PATH, "aws", "terraform.tfstate")) as f: 146 | tfstate = json.load(f) 147 | keys_config = load_or_init_keys("aws") 148 | if "0.12" in tfstate["terraform_version"]: 149 | terraform( 150 | "0.13upgrade -yes ", 151 | verbose, 152 | provider="aws", 153 | version="0.13.6", 154 | env_vars=keys_config, 155 | ) 156 | terraform( 157 | "init", verbose, provider="aws", version="0.13.6", env_vars=keys_config 158 | ) 159 | terraform( 160 | "apply -auto-approve", 161 | verbose, 162 | provider="aws", 163 | version="0.13.6", 164 | env_vars=keys_config, 165 | ) 166 | except FileNotFoundError: 167 | pass 168 | except click.ClickException: 169 | click.secho("Failed to upgrade to terraform state!", fg="red") 170 | return 171 | except Exception as e: 172 | click.secho(f"Uncaught exception: {str(e)}", fg="red") 173 | return 174 | 175 | # Rename terraform files, only for v0.2 -> v0.3 upgrade 176 | if needs_03_upgrade: 177 | tf_files_map = { 178 | "main.tf": "-main.tf", 179 | "variables.tf": "-inputs.tf", 180 | "outputs.tf": "-outputs.tf", 181 | } 182 | for old_file, new_file in tf_files_map.items(): 183 | old_file = os.path.join(CONFIG_PATH, old_file) 184 | new_file = os.path.join(CONFIG_PATH, "aws", new_file) 185 | if os.path.exists(new_file): 186 | os.remove(new_file) 187 | if not os.path.exists(old_file): 188 | click.secho( 189 | f"\trenaming and moving {old_file} to {new_file} to prep for upgrade..." 190 | ) 191 | shutil.move(old_file, new_file) 192 | else: 193 | os.remove(old_file) 194 | 195 | copy_files(TERRAFORM_PATH, CONFIG_PATH, force=True, verbose=verbose) 196 | 197 | # terraform init 198 | click.secho("Re-initializing terraform...", fg="yellow") 199 | terraform("init -upgrade", verbose=verbose, provider="aws") 200 | 201 | if needs_03_upgrade and click.confirm( 202 | "It's recommended you destroy your current Compute Node. Continue?" 203 | ): 204 | click.secho("Removing old cloud infrastructure...", fg="yellow") 205 | nodes_config = load_or_init_nodes() 206 | store_config(NODES_PATH, nodes_config) 207 | copy_file(NODES_PATH, f"{CONFIG_PATH}/aws/", force=True, verbose=True) 208 | terraform( 209 | "destroy -auto-approve", 210 | verbose, 211 | provider="aws", 212 | env_vars=load_or_init_keys("aws"), 213 | inputs={"node_config_file": "nodes.json"}, 214 | ) 215 | 216 | click.secho("Upgrade complete!", fg="green") 217 | click.secho( 218 | "run `numerai node config --help` to learn how to " 219 | "register this directory as a prediction node" 220 | ) 221 | -------------------------------------------------------------------------------- /numerai/cli/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/numerai/cli/util/__init__.py -------------------------------------------------------------------------------- /numerai/cli/util/debug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import sys 4 | import json 5 | 6 | import click 7 | 8 | 9 | def exception_with_msg(msg): 10 | return click.ClickException(msg) 11 | 12 | 13 | def is_win8(): 14 | if sys.platform == "win32": 15 | return "8" in platform.win32_ver()[0] 16 | return False 17 | 18 | 19 | def is_win10(): 20 | if sys.platform == "win32": 21 | return "10" in platform.win32_ver()[0] 22 | return False 23 | 24 | 25 | def is_win10_professional(): 26 | if is_win10(): 27 | # for windows 10 only, we need to know if it's pro vs home 28 | import winreg 29 | 30 | with winreg.OpenKey( 31 | winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Microsoft\Windows NT\CurrentVersion" 32 | ) as key: 33 | return winreg.QueryValueEx(key, "EditionID")[0] == "Professional" 34 | 35 | return False 36 | 37 | 38 | # error checking for docker; sadly this is a mess, 39 | # especially b/c there's tons of ways to mess up your docker install 40 | # especially on windows :( 41 | def root_cause(std_out, err_msg): 42 | all_logs = f'{std_out.decode("utf-8") }\n{err_msg.decode("utf-8") }' 43 | if b"is not recognized as an internal or external command" in err_msg: 44 | if sys.platform == "win32": 45 | if is_win10_professional(): 46 | raise exception_with_msg( 47 | f"Docker does not appear to be installed. Make sure to download/install docker from " 48 | f"https://hub.docker.com/editions/community/docker-ce-desktop-windows \n" 49 | f"If you're sure docker is already installed, then for some reason it isn't in your PATH like expected. " 50 | f"Restarting may fix it." 51 | ) 52 | 53 | else: 54 | raise exception_with_msg( 55 | f"Docker does not appear to be installed. Make sure to download/install docker from " 56 | f"https://github.com/docker/toolbox/releases and run 'Docker Quickstart Terminal' when you're done." 57 | f"\nIf you're sure docker is already installed, then for some reason it isn't in your PATH like expected. " 58 | f"Restarting may fix it." 59 | ) 60 | 61 | if b"command not found" in err_msg: 62 | if sys.platform == "darwin": 63 | raise exception_with_msg( 64 | f"Docker does not appear to be installed. You can install it with `brew cask install docker` or " 65 | f"from https://hub.docker.com/editions/community/docker-ce-desktop-mac" 66 | ) 67 | 68 | else: 69 | raise exception_with_msg( 70 | f"docker command not found. Please install docker " 71 | f"and make sure that the `docker` command is in your $PATH" 72 | ) 73 | 74 | if ( 75 | b"This error may also indicate that the docker daemon is not running" in err_msg 76 | or b"Is the docker daemon running" in err_msg 77 | ): 78 | if sys.platform == "darwin": 79 | raise exception_with_msg( 80 | f"Docker daemon not running. Make sure you've started " 81 | f"'Docker Desktop' and then run this command again." 82 | ) 83 | 84 | elif sys.platform == "linux2": 85 | raise exception_with_msg( 86 | f"Docker daemon not running or this user cannot acccess the docker socket. " 87 | f"Make sure docker is running and that your user has permissions to run docker. " 88 | f"On most systems, you can add your user to the docker group like so: " 89 | f"`sudo groupadd docker; sudo usermod -aG docker $USER` and then restarting your computer." 90 | ) 91 | 92 | elif sys.platform == "win32": 93 | if "DOCKER_TOOLBOX_INSTALL_PATH" in os.environ: 94 | raise exception_with_msg( 95 | f"Docker daemon not running. Make sure you've started " 96 | f"'Docker Quickstart Terminal' and then run this command again." 97 | ) 98 | 99 | else: 100 | raise exception_with_msg( 101 | f"Docker daemon not running. Make sure you've started " 102 | f"'Docker Desktop' and then run this command again." 103 | ) 104 | 105 | if b"invalid mode: /opt/plan" in err_msg: 106 | if sys.platform == "win32": 107 | raise exception_with_msg( 108 | f"You're running Docker Toolbox, but you're not using the 'Docker Quickstart Terminal'. " 109 | f"Please re-run `numerai setup` from that terminal." 110 | ) 111 | 112 | if b"Drive has not been shared" in err_msg: 113 | raise exception_with_msg( 114 | f"You're running from a directory that isn't shared to your docker Daemon. " 115 | f"Make sure your directory is shared through Docker Desktop: " 116 | f"https://docs.docker.com/docker-for-windows/#shared-drives" 117 | ) 118 | 119 | if b"No configuration files" in err_msg: 120 | raise exception_with_msg( 121 | "You're running from a directory that isn't shared to your docker Daemon. \ 122 | Try running from a directory under your HOME, e.g. C:\\Users\\$YOUR_NAME\\$ANY_FOLDER" 123 | ) 124 | 125 | if b"returned non-zero exit status 137" in err_msg: 126 | raise exception_with_msg( 127 | "Your docker container ran out of memory. Please open the docker desktop UI" 128 | " and increase the memory allowance in the advanced settings." 129 | ) 130 | 131 | if b"Temporary failure in name resolution" in err_msg: 132 | raise exception_with_msg("You network failed temporarily, please try again.") 133 | 134 | if b"No Fargate configuration exists for given values." in std_out: 135 | raise exception_with_msg("Invalid size preset, report this to Numerai") 136 | 137 | if ( 138 | "Can't add file" in all_logs 139 | or b"Error processing tar file(exit status 1): unexpected EOF" in err_msg 140 | ): 141 | err_files = [f for f in all_logs.split("\n") if "Can't add file" in f] 142 | raise exception_with_msg( 143 | "Docker was unable to access some files while trying to build," 144 | "either another program is using them or docker does not have permissions" 145 | f"to access them: {json.dumps(err_files, indent=2)}" 146 | ) 147 | 148 | if b"PermissionError: [Errno 13] Permission denied: 'modules.json'" in err_msg: 149 | raise exception_with_msg( 150 | "It looks like Docker daemon is running as root, please restart in rootless" 151 | "mode: https://docs.docker.com/engine/security/rootless/" 152 | ) 153 | 154 | # these are non-errors that either shouldn't be handled or are handled elsewhere 155 | if b"Can't update submission after deadline" in err_msg: 156 | return 157 | if b"ResourceNotFoundException" in std_out or b"NoSuchEntity" in std_out: 158 | return 159 | 160 | if b"Cycle" in std_out: 161 | raise exception_with_msg( 162 | "You upgraded to 1.0+ and need to replace your AWS nodes before continuing!" 163 | "\nTo do this now follow these instructions:" 164 | '\n run "numerai destroy-all --preserve-node-config"' 165 | '\n run "numerai node -m config" for each node' 166 | '\n run "numerai node -m deploy" for each node' 167 | "\nIf you do not want to do this, downgrade to 0.4.1 to continue." 168 | ) 169 | 170 | raise exception_with_msg( 171 | f"Numerai CLI was unable to identify an error, please try to use the " 172 | f'"--verbose|-v" option for more information before reporting this\n{all_logs}' 173 | ) 174 | -------------------------------------------------------------------------------- /numerai/cli/util/docker.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import base64 3 | import subprocess 4 | from queue import Queue, Empty 5 | from threading import Thread 6 | 7 | import boto3 8 | import click 9 | 10 | from numerai.cli.constants import * 11 | from numerai.cli.util.debug import root_cause 12 | from numerai.cli.util.keys import ( 13 | sanitize_message, 14 | get_aws_keys, 15 | load_or_init_keys, 16 | get_azure_keys, 17 | get_gcp_keys, 18 | get_gcp_project, 19 | ) 20 | from azure.mgmt.containerregistry import ContainerRegistryManagementClient 21 | from azure.containerregistry import ContainerRegistryClient, ArtifactManifestOrder 22 | from azure.identity import ClientSecretCredential 23 | import google.cloud.artifactregistry_v1 as artifactregistry_v1 24 | 25 | 26 | def check_for_dockerfile(path): 27 | dockerfile_path = os.path.join(path, "Dockerfile") 28 | if not os.path.exists(dockerfile_path): 29 | click.secho( 30 | f"No Dockerfile found in {path}, please ensure this node " 31 | f"was created from an example or follows the Prediction Node Architecture. " 32 | f"Learn More:\nhttps://github.com/numerai/numerai-cli/wiki/Prediction-Nodes", 33 | fg="red", 34 | ) 35 | exit(1) 36 | if Path.home() == dockerfile_path: 37 | click.secho( 38 | f"DO NOT PUT THE DOCKERFILE IN YOUR HOME PATH, please ensure this node " 39 | f"was created from an example or follows the Prediction Node Architecture. " 40 | f"Learn More:\nhttps://github.com/numerai/numerai-cli/wiki/Prediction-Nodes", 41 | fg="red", 42 | ) 43 | exit(1) 44 | 45 | 46 | def subprocess_log(stream, queue): 47 | for line in iter(stream.readline, b""): 48 | queue.put(line) 49 | stream.close() 50 | 51 | 52 | def get_from_q(q, verbose, default=b"", prefix=""): 53 | try: 54 | res = q.get(block=False) 55 | if verbose and res: 56 | click.secho(f"{prefix} {res.decode()}") 57 | return res 58 | except Empty as e: 59 | return default 60 | 61 | 62 | def execute(command, verbose, censor_substr=None): 63 | if verbose: 64 | click.echo("Running: " + sanitize_message(command, censor_substr)) 65 | 66 | on_posix = "posix" in sys.builtin_module_names 67 | proc = subprocess.Popen( 68 | command, 69 | shell=True, 70 | stdout=subprocess.PIPE, 71 | stderr=subprocess.PIPE, 72 | bufsize=1, 73 | close_fds=on_posix, 74 | ) 75 | stdout_q = Queue() 76 | stderr_q = Queue() 77 | stdout_t = Thread(target=subprocess_log, args=(proc.stdout, stdout_q)) 78 | stderr_t = Thread(target=subprocess_log, args=(proc.stderr, stderr_q)) 79 | 80 | try: 81 | stdout_t.start() 82 | stderr_t.start() 83 | stdout = b"" 84 | stderr = b"" 85 | while proc.poll() is None: 86 | stdout += get_from_q(stdout_q, verbose) 87 | stderr += get_from_q(stderr_q, verbose) 88 | 89 | returncode = proc.poll() 90 | if returncode != 0: 91 | root_cause(stdout, stderr) 92 | finally: 93 | stdout_t.join() 94 | stderr_t.join() 95 | proc.kill() 96 | 97 | return stdout, stderr 98 | 99 | 100 | def format_if_docker_toolbox(path, verbose): 101 | """ 102 | Helper function to format if the system is running docker toolbox + mingw. 103 | Paths need to be formatted like unix paths, and the drive letter lower-cased 104 | """ 105 | if "DOCKER_TOOLBOX_INSTALL_PATH" in os.environ and "MSYSTEM" in os.environ: 106 | # '//' found working on win8.1 docker quickstart terminal, previously just '/' 107 | new_path = ("//" + path[0].lower() + path[2:]).replace("\\", "/") 108 | if verbose: 109 | click.secho(f"formatted path for docker toolbox: {path} -> {new_path}") 110 | return new_path 111 | return path 112 | 113 | 114 | # Added variable to take in different providers 115 | def build_tf_cmd(tf_cmd, provider, env_vars, inputs, version, verbose): 116 | cmd = f"docker run" 117 | if env_vars: 118 | cmd += " ".join([f' -e "{key}={val}"' for key, val in env_vars.items()]) 119 | cmd += f" --rm -it -v {format_if_docker_toolbox(CONFIG_PATH, verbose)}:/opt/plan" 120 | if provider == PROVIDER_GCP: 121 | cmd += ( 122 | f" --mount type=bind,source={GCP_KEYS_PATH},target=/tmp/gcp_keys/keys.json" 123 | ) 124 | cmd += f" -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp_keys/keys.json" 125 | cmd += f" -e GOOGLE_PROJECT={get_gcp_project()}" 126 | cmd += f" -w /opt/plan hashicorp/terraform:{version}" 127 | # Added provider to pick the correct provider directory before tf command 128 | cmd += " ".join([f" -chdir={provider}"]) 129 | cmd += f" {tf_cmd}" 130 | if inputs: 131 | cmd += " ".join([f' -var="{key}={val}"' for key, val in inputs.items()]) 132 | return cmd 133 | 134 | 135 | # Added variable to take in different providers 136 | def terraform(tf_cmd, verbose, provider, env_vars=None, inputs=None, version="1.5.6"): 137 | cmd = build_tf_cmd(tf_cmd, provider, env_vars, inputs, version, verbose) 138 | stdout, stderr = execute(cmd, verbose) 139 | # if user accidentally deleted a resource, refresh terraform and try again 140 | if b"ResourceNotFoundException" in stdout or b"NoSuchEntity" in stdout: 141 | refresh = build_tf_cmd("refresh", env_vars, inputs, version, verbose) 142 | execute(refresh, verbose) 143 | stdout, stderr = execute(cmd, verbose) 144 | return stdout 145 | 146 | 147 | def build(node_config, node, verbose): 148 | numerai_keys = load_or_init_keys()["numerai"] 149 | 150 | node_path = node_config["path"] 151 | curr_path = os.path.abspath(".") 152 | if curr_path not in node_path: 153 | raise RuntimeError( 154 | f"Current directory invalid, you must run this command either from" 155 | f' "{node_path}" or a parent directory of that path.' 156 | ) 157 | path = node_path.replace(curr_path, ".").replace("\\", "/") 158 | if verbose: 159 | click.secho(f"Using relative path to node: {path}") 160 | 161 | build_arg_str = "" 162 | for arg in numerai_keys: 163 | build_arg_str += f" --build-arg {arg}={numerai_keys[arg]}" 164 | build_arg_str += f' --build-arg MODEL_ID={node_config["model_id"]}' 165 | build_arg_str += f" --build-arg SRC_PATH={path}" 166 | build_arg_str += f" --build-arg NODE={node}" 167 | 168 | cmd = ( 169 | f'docker build --platform=linux/amd64 --load -t {node_config["docker_repo"]}' 170 | f"{build_arg_str} -f {path}/Dockerfile ." 171 | ) 172 | stdout, stderr = execute(cmd, verbose) 173 | if "unknown flag: --load" in (stdout.decode() + stderr.decode()): 174 | click.secho( 175 | "Docker version too old, please upgrade to at least 18.09", 176 | fg="red", 177 | ) 178 | exit(1) 179 | 180 | 181 | def run(node_config, verbose, command=""): 182 | cmd = f"docker run --rm -it {node_config['docker_repo']} {command}" 183 | execute(cmd, verbose) 184 | 185 | 186 | def login(node_config, verbose): 187 | if node_config["provider"] == PROVIDER_AWS: 188 | username, password = login_aws() 189 | login_url = node_config['docker_repo'] 190 | elif node_config["provider"] == PROVIDER_AZURE: 191 | username, password = login_azure( 192 | node_config["registry_rg_name"], node_config["registry_name"] 193 | ) 194 | login_url = node_config['docker_repo'] 195 | elif node_config["provider"] == PROVIDER_GCP: 196 | username, password = login_gcp() 197 | login_url = node_config['artifact_registry_login_url'] 198 | else: 199 | raise ValueError(f"Unsupported provider: '{node_config['provider']}'") 200 | 201 | if os.name == "nt": 202 | echo_cmd = f'echo | set /p="{password}"' 203 | else: 204 | echo_cmd = f'echo "{password}"' 205 | 206 | cmd = ( 207 | echo_cmd + f" | docker login" 208 | f" -u {username}" 209 | f" --password-stdin" 210 | f" {login_url}" 211 | ) 212 | 213 | execute(cmd, verbose, censor_substr=password) 214 | 215 | 216 | def login_aws(): 217 | aws_public, aws_secret = get_aws_keys() 218 | ecr_client = boto3.client( 219 | "ecr", 220 | region_name="us-east-1", 221 | aws_access_key_id=aws_public, 222 | aws_secret_access_key=aws_secret, 223 | ) 224 | 225 | token = ecr_client.get_authorization_token() # TODO: use registryIds 226 | username, password = ( 227 | base64.b64decode(token["authorizationData"][0]["authorizationToken"]) 228 | .decode() 229 | .split(":") 230 | ) 231 | 232 | return username, password 233 | 234 | 235 | def login_azure(resource_group_name, registry_name): 236 | azure_subs_id, azure_client, azure_tenant, azure_secret = get_azure_keys() 237 | credentials = ClientSecretCredential( 238 | client_id=azure_client, tenant_id=azure_tenant, client_secret=azure_secret 239 | ) 240 | username_password = ContainerRegistryManagementClient( 241 | credentials, azure_subs_id 242 | ).registries.list_credentials(resource_group_name, registry_name) 243 | username = username_password.username 244 | password = username_password.passwords[0].value 245 | return username, password 246 | 247 | 248 | def login_gcp(): 249 | gcp_keys_path = get_gcp_keys() 250 | gcp_keys_file = open(gcp_keys_path, "r") 251 | gcp_keys = gcp_keys_file.read() 252 | username = "_json_key_base64" 253 | password = base64.b64encode(gcp_keys.encode()).decode("utf-8") 254 | return username, password 255 | 256 | 257 | def manifest_inspect(docker_image, verbose): 258 | cmd = f"docker manifest inspect {docker_image}" 259 | execute(cmd, verbose=verbose) 260 | 261 | 262 | def push(docker_image, verbose): 263 | cmd = f"docker push {docker_image}" 264 | execute(cmd, verbose=verbose) 265 | 266 | 267 | def pull(docker_image, verbose): 268 | cmd = f"docker pull {docker_image}" 269 | execute(cmd, verbose=verbose) 270 | 271 | 272 | def tag(original_image, new_image_tag, verbose): 273 | cmd = f"docker tag {original_image} {new_image_tag}" 274 | execute(cmd, verbose=verbose) 275 | 276 | 277 | def cleanup(node_config): 278 | provider = node_config["provider"] 279 | if provider == PROVIDER_AWS: 280 | imageIds = cleanup_aws(node_config["docker_repo"]) 281 | elif provider == PROVIDER_AZURE: 282 | imageIds = cleanup_azure(node_config) 283 | elif provider == PROVIDER_GCP: 284 | imageIds = cleanup_gcp(node_config) 285 | else: 286 | raise ValueError(f"Unsupported provider: '{provider}'") 287 | 288 | if len(imageIds) > 0: 289 | click.secho( 290 | f"Deleted {str(len(imageIds))} old image(s) from remote docker repo", 291 | fg="yellow", 292 | ) 293 | 294 | 295 | def cleanup_aws(docker_repo): 296 | aws_public, aws_secret = get_aws_keys() 297 | ecr_client = boto3.client( 298 | "ecr", 299 | region_name="us-east-1", 300 | aws_access_key_id=aws_public, 301 | aws_secret_access_key=aws_secret, 302 | ) 303 | 304 | docker_repo_name = docker_repo.split("/")[-1] 305 | 306 | resp = ecr_client.list_images( 307 | repositoryName=docker_repo_name, filter={"tagStatus": "UNTAGGED"} 308 | ) 309 | 310 | imageIds = resp["imageIds"] 311 | if len(imageIds) == 0: 312 | return [] 313 | 314 | resp = ecr_client.batch_delete_image( 315 | repositoryName=docker_repo_name, imageIds=imageIds 316 | ) 317 | 318 | return resp["imageIds"] 319 | 320 | 321 | def cleanup_azure(node_config): 322 | _, azure_client, azure_tenant, azure_secret = get_azure_keys() 323 | credentials = ClientSecretCredential( 324 | client_id=azure_client, tenant_id=azure_tenant, client_secret=azure_secret 325 | ) 326 | acr_client = ContainerRegistryClient(node_config["acr_login_server"], credentials) 327 | docker_repo = node_config["docker_repo"] 328 | node_repo_name = [ 329 | repo_name 330 | for repo_name in acr_client.list_repository_names() 331 | if repo_name == docker_repo.split("/")[-1] 332 | ][0] 333 | 334 | # get all manifests, ordered by last update time 335 | manifest_list = [ 336 | repo_detail 337 | for repo_detail in acr_client.list_manifest_properties( 338 | node_repo_name, order_by=ArtifactManifestOrder.LAST_UPDATED_ON_DESCENDING 339 | ) 340 | ] 341 | # Remove all but the latest manifest 342 | removed_manifests = [] 343 | for manifest in manifest_list[1:]: 344 | acr_client.update_manifest_properties( 345 | node_repo_name, manifest.digest, can_write=True, can_delete=True 346 | ) 347 | removed_manifests.append(manifest.digest) 348 | acr_client.delete_manifest(node_repo_name, manifest.digest) 349 | return removed_manifests 350 | 351 | 352 | def cleanup_gcp(node_config): 353 | print(node_config) 354 | gcp_key_path = get_gcp_keys() 355 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = gcp_key_path 356 | 357 | node_name = node_config["docker_repo"].split("/")[-1] 358 | 359 | client = artifactregistry_v1.ArtifactRegistryClient() 360 | list_images_request = artifactregistry_v1.ListDockerImagesRequest( 361 | parent=node_config["registry_id"] 362 | ) 363 | page_result = client.list_docker_images(request=list_images_request) 364 | 365 | latest_image_name = "" 366 | for response in page_result: 367 | if "latest" in response.tags: 368 | latest_image_name = response.name 369 | 370 | versions = artifactregistry_v1.ListVersionsRequest( 371 | parent=f"{node_config['registry_id']}/packages/{node_name}" 372 | ) 373 | page_result = client.list_versions(request=versions) 374 | versions_to_delete = [] 375 | for response in page_result: 376 | if response.metadata["name"] != latest_image_name: 377 | versions_to_delete.append(response.name) 378 | 379 | for version in versions_to_delete: 380 | result = client.delete_version(name=version) 381 | 382 | # Nothing to do 383 | return versions_to_delete 384 | -------------------------------------------------------------------------------- /numerai/cli/util/files.py: -------------------------------------------------------------------------------- 1 | import json 2 | import shutil 3 | 4 | import click 5 | 6 | from numerai.cli.constants import * 7 | 8 | 9 | def load_config(path): 10 | if os.stat(path).st_size == 0: 11 | return {} 12 | with open(path) as f: 13 | return json.load(f) 14 | 15 | 16 | def store_config(path, obj): 17 | with open(path, "w+") as f: 18 | json.dump(obj, f, indent=2) 19 | 20 | 21 | def maybe_create(path, protected=False): 22 | created = False 23 | 24 | directory = os.path.dirname(path) 25 | if not os.path.exists(directory): 26 | os.makedirs(directory) 27 | 28 | if not os.path.exists(path): 29 | created = True 30 | if protected: 31 | store_config(os.open(path, os.O_CREAT | os.O_WRONLY, 0o600), {}) 32 | os.chmod(path, 0o600) 33 | else: 34 | store_config(path, {}) 35 | 36 | return created 37 | 38 | 39 | def load_or_init_nodes(node=None): 40 | maybe_create(NODES_PATH) 41 | cfg = load_config(NODES_PATH) 42 | try: 43 | return cfg[node] if node else cfg 44 | except KeyError: 45 | click.secho( 46 | "Node has not been configured, run `numerai node --help` " 47 | "to learn how to create one", 48 | fg="red", 49 | ) 50 | exit(1) 51 | 52 | 53 | def copy_file(src_file, dst_path, force=False, verbose=True): 54 | if not os.path.exists(dst_path): 55 | if verbose: 56 | click.secho(f"creating directory {dst_path}", fg="yellow") 57 | os.mkdir(dst_path) 58 | dst_file = os.path.join(dst_path, os.path.basename(src_file)) 59 | if os.path.exists(dst_file) and not force: 60 | if not click.confirm(f"{dst_file} already exists. Overwrite?"): 61 | return 62 | if verbose: 63 | click.secho(f"copying file {dst_file}", fg="yellow") 64 | shutil.copy(src_file, dst_path) 65 | 66 | 67 | def copy_files(src, dst, force=False, verbose=True): 68 | if not os.path.exists(dst): 69 | os.mkdir(dst) 70 | for filename in os.listdir(src): 71 | src_file = os.path.join(src, filename) 72 | dst_file = os.path.join(dst, filename) 73 | if os.path.exists(dst_file) and not force: 74 | if not click.confirm(f"{filename} already exists. Overwrite?"): 75 | return 76 | if os.path.isdir(src_file): 77 | if verbose: 78 | click.secho(f"copying directory {dst_file}", fg="yellow") 79 | os.makedirs(dst_file, exist_ok=True) 80 | copy_files(src_file, dst_file, force=force, verbose=verbose) 81 | else: 82 | if verbose: 83 | click.secho(f"copying file {dst_file}", fg="yellow") 84 | shutil.copy(src_file, dst_file) 85 | 86 | 87 | def copy_example(example, dest, verbose): 88 | example_dir = os.path.join(EXAMPLE_PATH, example) 89 | dst_dir = dest if dest is not None else example 90 | 91 | if Path.home() == dst_dir: 92 | click.secho("Do not copy example files to your home directory.", fg="red") 93 | exit(1) 94 | 95 | click.echo(f"Copying {example} example to {dst_dir}") 96 | copy_files(example_dir, dst_dir, force=False, verbose=verbose) 97 | 98 | dockerignore_path = os.path.join(dst_dir, ".dockerignore") 99 | if not os.path.exists(dockerignore_path): 100 | with open(dockerignore_path, "a+") as f: 101 | f.write(".numerai\n") 102 | f.write("numerai_dataset*\n") 103 | f.write(".git\n") 104 | f.write("venv\n") 105 | return dst_dir 106 | 107 | 108 | def move_files(src, dst, verbose=False, exclude_files=[]): 109 | """ 110 | Function to move files from one folder to another, removing it from its original location 111 | Args: 112 | src (string): Folder location to move files from 113 | dst (string): Folder location to move files to 114 | verbose (bool, optional): Verbosity for the operation. Defaults to True. 115 | """ 116 | for item in os.listdir(src): 117 | src_path = os.path.join(src, item) 118 | if item in exclude_files: 119 | continue 120 | if verbose: 121 | click.secho(f"Moving {src_path} to {dst}") 122 | 123 | # Move the files and folders unless dst is a subfolder of src 124 | if os.path.isdir(src_path) and os.path.commonpath([src_path, dst]) != src_path: 125 | click.secho(f"Moving directory: {src_path}") 126 | shutil.copytree(src_path, os.path.join(dst, item)) 127 | shutil.rmtree(src_path) 128 | elif os.path.isfile(src_path): 129 | shutil.copy(src_path, dst) 130 | os.remove(src_path) 131 | if verbose: 132 | click.secho(f"Moved file: {src_path}") 133 | -------------------------------------------------------------------------------- /numerai/cli/util/keys.py: -------------------------------------------------------------------------------- 1 | import json 2 | from configparser import ConfigParser, MissingSectionHeaderError 3 | 4 | import boto3 5 | import click 6 | import shutil 7 | import numerapi 8 | 9 | 10 | from azure.identity import ClientSecretCredential # DefaultAzureCredential 11 | from azure.mgmt.subscription import SubscriptionClient 12 | 13 | from google.oauth2 import service_account 14 | from google.cloud import storage 15 | 16 | 17 | from numerai.cli.constants import * 18 | from numerai.cli.constants import KEYS_PATH 19 | from numerai.cli.util.debug import exception_with_msg 20 | from numerai.cli.util.files import load_or_init_nodes, store_config, maybe_create, load_config 21 | 22 | 23 | def reformat_keys(): 24 | # REFORMAT OLD KEYS 25 | try: 26 | config = ConfigParser() 27 | config.read(KEYS_PATH) 28 | click.secho(f"Old keyfile format found, reformatting...", fg="yellow") 29 | 30 | new_config = { 31 | "aws": { 32 | "AWS_ACCESS_KEY_ID": config["default"]["AWS_ACCESS_KEY_ID"], 33 | "AWS_SECRET_ACCESS_KEY": config["default"]["AWS_SECRET_ACCESS_KEY"], 34 | }, 35 | "numerai": { 36 | "NUMERAI_PUBLIC_ID": config["default"]["NUMERAI_PUBLIC_ID"], 37 | "NUMERAI_SECRET_KEY": config["default"]["NUMERAI_SECRET_KEY"], 38 | }, 39 | } 40 | 41 | del config["default"] 42 | with open(os.open(KEYS_PATH, os.O_CREAT | os.O_WRONLY, 0o600), "w") as f: 43 | config.write(f) 44 | json.dump(new_config, f, indent=2) 45 | 46 | # if this file is already a json file skip 47 | except MissingSectionHeaderError: 48 | pass 49 | 50 | 51 | def load_or_init_keys(provider=None): 52 | maybe_create(KEYS_PATH, protected=True) 53 | try: 54 | cfg = load_config(KEYS_PATH) 55 | except json.decoder.JSONDecodeError as e: 56 | reformat_keys() 57 | cfg = load_config(KEYS_PATH) 58 | if provider: 59 | return cfg[provider] 60 | return cfg 61 | 62 | 63 | def get_numerai_keys(): 64 | keys = load_or_init_keys() 65 | try: 66 | return ( 67 | keys["numerai"]["NUMERAI_PUBLIC_ID"], 68 | keys["numerai"]["NUMERAI_SECRET_KEY"], 69 | ) 70 | except KeyError: 71 | return None, None 72 | 73 | 74 | def prompt_for_key(name, default): 75 | hidden = sanitize_message(default) 76 | new = click.prompt(name, default=hidden).strip() 77 | if new == hidden: 78 | return default 79 | return new 80 | 81 | 82 | def config_numerai_keys(): 83 | numerai_public, numerai_secret = get_numerai_keys() 84 | 85 | numerai_public = prompt_for_key("NUMERAI_PUBLIC_ID", numerai_public) 86 | numerai_secret = prompt_for_key("NUMERAI_SECRET_KEY", numerai_secret) 87 | check_numerai_validity(numerai_public, numerai_secret) 88 | 89 | keys_config = load_or_init_keys() 90 | keys_config.setdefault("numerai", {}) 91 | keys_config["numerai"]["NUMERAI_PUBLIC_ID"] = numerai_public 92 | keys_config["numerai"]["NUMERAI_SECRET_KEY"] = numerai_secret 93 | store_config(KEYS_PATH, keys_config) 94 | 95 | 96 | def check_numerai_validity(key_id, secret): 97 | try: 98 | napi = numerapi.NumerAPI(key_id, secret) 99 | napi.get_account() 100 | except Exception: 101 | raise exception_with_msg("Numerai keys seem to be invalid. " "Make sure you've entered them correctly.") 102 | 103 | 104 | def get_provider_keys(node): 105 | provider = load_or_init_nodes(node)["provider"] 106 | return load_or_init_keys(provider) 107 | 108 | 109 | def get_aws_keys(): 110 | keys = load_or_init_keys() 111 | try: 112 | return keys["aws"]["AWS_ACCESS_KEY_ID"], keys["aws"]["AWS_SECRET_ACCESS_KEY"] 113 | except KeyError: 114 | return None, None 115 | 116 | 117 | def get_azure_keys(): 118 | keys = load_or_init_keys() 119 | try: 120 | return ( 121 | keys["azure"]["ARM_SUBSCRIPTION_ID"], 122 | keys["azure"]["ARM_CLIENT_ID"], 123 | keys["azure"]["ARM_TENANT_ID"], 124 | keys["azure"]["ARM_CLIENT_SECRET"], 125 | ) 126 | 127 | except KeyError: 128 | return None, None, None, None 129 | 130 | 131 | def get_gcp_keys(): 132 | keys = load_or_init_keys() 133 | try: 134 | return keys["gcp"]["GCP_KEYS_PATH"] 135 | except KeyError: 136 | return None 137 | 138 | 139 | def get_gcp_project(): 140 | with open(get_gcp_keys(), "r") as gcp_keys: 141 | gcp_keys_content = json.loads(gcp_keys.read()) 142 | return gcp_keys_content["project_id"] 143 | 144 | 145 | def config_aws_keys(): 146 | aws_public, aws_secret = get_aws_keys() 147 | aws_public = prompt_for_key("AWS_ACCESS_KEY_ID", aws_public) 148 | aws_secret = prompt_for_key("AWS_SECRET_ACCESS_KEY", aws_secret) 149 | check_aws_validity(aws_public, aws_secret) 150 | 151 | keys_config = load_or_init_keys() 152 | keys_config.setdefault("aws", {}) 153 | keys_config["aws"]["AWS_ACCESS_KEY_ID"] = aws_public 154 | keys_config["aws"]["AWS_SECRET_ACCESS_KEY"] = aws_secret 155 | store_config(KEYS_PATH, keys_config) 156 | 157 | 158 | def config_azure_keys(): 159 | azure_subs_id, azure_client, azure_tenant, azure_secret = get_azure_keys() 160 | azure_subs_id = prompt_for_key("Azure Subscription ID [ARM_SUBSCRIPTION_ID]", azure_subs_id) 161 | azure_client = prompt_for_key("Azure Client ID [ARM_CLIENT_ID]", azure_client) 162 | azure_tenant = prompt_for_key("Azure Tenant ID [ARM_TENANT_ID]", azure_tenant) 163 | azure_secret = prompt_for_key("Azure Client Secret [ARM_CLIENT_SECRET]", azure_secret) 164 | check_azure_validity(azure_subs_id, azure_client, azure_tenant, azure_secret) 165 | 166 | keys_config = load_or_init_keys() 167 | keys_config.setdefault("azure", {}) 168 | # Renaming the keys to match the environment variables that TF would recognize 169 | # https://developer.hashicorp.com/terraform/language/settings/backends/azurerm#environment 170 | keys_config["azure"]["ARM_SUBSCRIPTION_ID"] = azure_subs_id 171 | keys_config["azure"]["ARM_CLIENT_ID"] = azure_client 172 | keys_config["azure"]["ARM_TENANT_ID"] = azure_tenant 173 | keys_config["azure"]["ARM_CLIENT_SECRET"] = azure_secret 174 | store_config(KEYS_PATH, keys_config) 175 | 176 | 177 | def config_gcp_keys(): 178 | gcp_keys_path = get_gcp_keys() 179 | gcp_keys_path_new = prompt_for_key( 180 | f"Absolute path to GCP keys file (will be copied to {GCP_KEYS_PATH})", 181 | gcp_keys_path, 182 | ) 183 | if gcp_keys_path_new != gcp_keys_path: 184 | shutil.copy(gcp_keys_path_new, GCP_KEYS_PATH) 185 | 186 | check_gcp_validity() 187 | 188 | keys_config = load_or_init_keys() 189 | keys_config.setdefault("gcp", {}) 190 | keys_config["gcp"]["GCP_KEYS_PATH"] = GCP_KEYS_PATH 191 | store_config(KEYS_PATH, keys_config) 192 | 193 | 194 | def check_aws_validity(key_id, secret): 195 | try: 196 | client = boto3.client("s3", aws_access_key_id=key_id, aws_secret_access_key=secret) 197 | client.list_buckets() 198 | except Exception as e: 199 | if "NotSignedUp" in str(e): 200 | raise exception_with_msg( 201 | f"Your AWS keys are valid, but the account is not finished signing up. " 202 | f"You either need to update your credit card in AWS at " 203 | f"https://portal.aws.amazon.com/billing/signup?type=resubscribe#/resubscribed, " 204 | f"or wait up to 24 hours for their verification process to complete." 205 | ) 206 | 207 | raise exception_with_msg( 208 | f"AWS keys seem to be invalid. Make sure you've entered them correctly " 209 | f"and that your user has the necessary permissions (for help, see " 210 | f"https://github.com/numerai/numerai-cli/wiki/Amazon-Web-Services)." 211 | ) 212 | 213 | 214 | def check_azure_validity(subs_id, client_id, tenant_id, secret): 215 | try: 216 | credentials = ClientSecretCredential(client_id=client_id, tenant_id=tenant_id, client_secret=secret) 217 | sub_client = SubscriptionClient(credentials) 218 | subs = [sub.as_dict() for sub in sub_client.subscriptions.list()] 219 | all_subs_ids = [subs_details["subscription_id"] for subs_details in subs] 220 | if subs_id not in all_subs_ids: 221 | raise Exception("Invalid Subscription ID") 222 | 223 | except Exception as e: 224 | error_msg = ( 225 | f"Make sure you follow the instructions in the wiki page: " 226 | + f"https://github.com/numerai/numerai-cli/blob/master/docs/azure_setup_guide.md" 227 | + f"to set up the Client ID, Tenant ID and Client Secret correctly." 228 | ) 229 | if "AADSTS700016" in str(e): 230 | raise exception_with_msg(f"Invalid Client ID. " + error_msg) 231 | elif "double check your tenant name" in str(e): 232 | raise exception_with_msg(f"Invalid Tenant ID. " + error_msg) 233 | elif "Invalid client secret" in str(e): 234 | raise exception_with_msg(f"Invalid Client Secret. " + error_msg) 235 | elif "Invalid Subscription ID" in str(e): 236 | raise exception_with_msg( 237 | f"Azure Subscription ID is invalid, or IAM is NOT set up correctly. " 238 | + f"Your Azure Client ID, Tenant ID and Client Secret are valid. " 239 | + f"Make sure to follow the instructions in the wiki page: " 240 | + f"https://github.com/numerai/numerai-cli/blob/master/docs/azure_setup_guide.md" 241 | ) 242 | 243 | 244 | def check_gcp_validity(): 245 | try: 246 | credentials = service_account.Credentials.from_service_account_file( 247 | GCP_KEYS_PATH 248 | ) 249 | client = storage.Client(credentials=credentials) 250 | response = client.list_buckets() 251 | print(response) 252 | except Exception as e: 253 | error_msg = ( 254 | f"Make sure you follow the instructions in the wiki page: " 255 | + f"https://github.com/numerai/numerai-cli/blob/master/docs/gcp_setup_guide.md" 256 | + f"to set up the keys file correctly." 257 | ) 258 | if "Request had invalid authentication credentials." in str(e): 259 | raise exception_with_msg(f"Invalid credentials. " + error_msg) 260 | else: 261 | raise e 262 | 263 | 264 | def config_provider_keys(cloud_provider): 265 | if cloud_provider == PROVIDER_AWS: 266 | config_aws_keys() 267 | elif cloud_provider == PROVIDER_AZURE: 268 | config_azure_keys() 269 | elif cloud_provider == PROVIDER_GCP: 270 | config_gcp_keys() 271 | 272 | 273 | def sanitize_message(message, censor_substr=None): 274 | if message is None: 275 | return None 276 | all_keys = get_aws_keys() + get_numerai_keys() + get_azure_keys() 277 | for key in all_keys: 278 | if key: 279 | try: 280 | message = message.replace(key, f"...{key[-5:]}") 281 | except AttributeError: 282 | continue 283 | if censor_substr: 284 | message = message.replace(censor_substr, f"...{censor_substr[-5:]}") 285 | return message 286 | -------------------------------------------------------------------------------- /numerai/cli/util/terraform.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from numerai.cli.constants import PROVIDERS, NODES_PATH 4 | from numerai.cli.util.docker import terraform 5 | from numerai.cli.util import docker 6 | from numerai.cli.util.files import load_or_init_nodes, store_config 7 | from numerai.cli.util.keys import load_or_init_keys 8 | 9 | import click 10 | 11 | 12 | def apply_terraform(nodes_config, affected_providers, provider, verbose): 13 | # Apply terraform for any affected provider 14 | for affected_provider in affected_providers: 15 | if affected_provider in PROVIDERS: 16 | click.secho(f"Updating resources in {affected_provider}") 17 | terraform( 18 | "apply -auto-approve", 19 | verbose, 20 | affected_provider, 21 | env_vars=load_or_init_keys(affected_provider), 22 | inputs={"node_config_file": "nodes.json"}, 23 | ) 24 | else: 25 | click.secho(f"provider {affected_provider} not supported", fg="red") 26 | exit(1) 27 | click.secho("cloud resources created successfully", fg="green") 28 | 29 | # terraform output for node config, same for aws and azure 30 | click.echo(f"saving node configuration to {NODES_PATH}...") 31 | 32 | res = terraform(f"output -json {provider}_nodes", verbose, provider).decode("utf-8") 33 | try: 34 | nodes = json.loads(res) 35 | except json.JSONDecodeError: 36 | click.secho("failed to save node configuration, please retry.", fg="red") 37 | return 38 | for node_name, data in nodes.items(): 39 | nodes_config[node_name].update(data) 40 | 41 | store_config(NODES_PATH, nodes_config) 42 | if verbose: 43 | click.secho(f"new config:\n{json.dumps(load_or_init_nodes(), indent=2)}") 44 | 45 | 46 | def create_azure_registry(provider, provider_keys, verbose): 47 | """Creates a registry for azure""" 48 | terraform("init -upgrade", verbose, provider) 49 | terraform( 50 | 'apply -target="azurerm_container_registry.registry[0]" -target="azurerm_resource_group.acr_rg[0]" -auto-approve ', 51 | verbose, 52 | "azure", 53 | env_vars=provider_keys, 54 | inputs={"node_config_file": "nodes.json"}, 55 | ) 56 | res = terraform("output -json acr_repo_details", True, provider).decode("utf-8") 57 | return json.loads(res) 58 | 59 | 60 | def create_gcp_registry(provider, verbose): 61 | """Creates a registry for GCP""" 62 | terraform("init -upgrade", verbose, provider) 63 | terraform( 64 | 'apply -target="google_project_service.cloud_resource_manager" -auto-approve ', 65 | verbose, 66 | "gcp", 67 | inputs={"node_config_file": "nodes.json"}, 68 | ) 69 | terraform( 70 | 'apply -target="google_artifact_registry_repository.registry[0]" -auto-approve ', 71 | verbose, 72 | "gcp", 73 | inputs={"node_config_file": "nodes.json"}, 74 | ) 75 | res = terraform("output -json artifact_registry_details", True, provider).decode( 76 | "utf-8" 77 | ) 78 | return json.loads(res) 79 | -------------------------------------------------------------------------------- /numerai/examples/crypto-python3/Dockerfile: -------------------------------------------------------------------------------- 1 | # Provides us a working Python 3 environment. 2 | FROM python:3.9 3 | 4 | # These are docker arguments that `numerai node deploy/test` will always pass into docker. 5 | # They are then set in your environment so that numerapi can access them when uploading submissions. 6 | # You can also access them from your script like so: 7 | # import os 8 | # public_id = os.environ["NUMERAI_PUBLIC_ID"] 9 | # secret_key = os.environ["NUMERAI_SECRET_KEY"] 10 | ARG NUMERAI_PUBLIC_ID 11 | ENV NUMERAI_PUBLIC_ID=$NUMERAI_PUBLIC_ID 12 | 13 | ARG NUMERAI_SECRET_KEY 14 | ENV NUMERAI_SECRET_KEY=$NUMERAI_SECRET_KEY 15 | 16 | ARG MODEL_ID 17 | ENV MODEL_ID=$MODEL_ID 18 | 19 | ARG SRC_PATH 20 | ENV SRC_PATH=$SRC_PATH 21 | 22 | # We then add the requirements.txt file, and pip install every requirement from it. 23 | # The `ADD [source] [destination]` command will take a file from the source directory on your computer 24 | # and copy it over to the destination directory in the Docker container. 25 | ADD $SRC_PATH/requirements.txt . 26 | RUN pip install -r requirements.txt --no-cache-dir 27 | 28 | # Now, add everything in the source code directory. 29 | # (including your code, compiled files, serialized models, everything...) 30 | ADD $SRC_PATH . 31 | 32 | # This sets the default command to run your docker container. 33 | # It runs by default in the cloud and when running `numerai node test`. 34 | # This is overridden when using `numerai node test --command [COMMAND]`. 35 | CMD [ "python", "./predict.py" ] 36 | -------------------------------------------------------------------------------- /numerai/examples/crypto-python3/predict.py: -------------------------------------------------------------------------------- 1 | """ Sample tournament model in python 3 """ 2 | 3 | import os 4 | import json 5 | import logging 6 | import joblib 7 | import numerapi 8 | import pandas as pd 9 | import lightgbm as lgbm 10 | 11 | logging.basicConfig(filename="log.txt", filemode="a") 12 | 13 | TOURNAMENT = 12 14 | DATA_VERSION = "crypto/v1.0" 15 | TRAINED_MODEL_PREFIX = "./trained_model" 16 | 17 | DEFAULT_MODEL_ID = None 18 | DEFAULT_PUBLIC_ID = None 19 | DEFAULT_SECRET_KEY = None 20 | 21 | # Read model id and initialize API client with api keys 22 | # these are set by the docker image that you deploy after training, 23 | # but you can also set them manually above for local testing 24 | MODEL_ID = os.getenv("MODEL_ID", DEFAULT_MODEL_ID) 25 | napi = numerapi.NumerAPI( 26 | public_id=os.getenv("NUMERAI_PUBLIC_ID", DEFAULT_PUBLIC_ID), 27 | secret_key=os.getenv("NUMERAI_SECRET_KEY", DEFAULT_SECRET_KEY), 28 | ) 29 | 30 | 31 | def train(napi, model_id, force_training=False): 32 | model_name = TRAINED_MODEL_PREFIX 33 | if model_id: 34 | model_name += f"_{model_id}" 35 | 36 | # load a model if we have a trained model already and we aren't forcing a training session 37 | if os.path.exists(model_name) and not force_training: 38 | logging.info("loading existing trained model") 39 | model = joblib.load(model_name) 40 | return model 41 | 42 | logging.info("reading training data") 43 | napi.download_dataset(f"{DATA_VERSION}/train_targets.parquet") 44 | target = pd.read_parquet(f"{DATA_VERSION}/train_targets.parquet") 45 | 46 | # TODO: implement get_features and train a model 47 | # This will take a few minutes 🍵 48 | # logging.info("training model") 49 | # model = lgbm.LGBMRegressor( 50 | # n_estimators=2000, 51 | # learning_rate=0.01, 52 | # max_depth=5, 53 | # num_leaves=2**5-1, 54 | # colsample_bytree=0.1 55 | # ) 56 | # model.fit( 57 | # train_data[feature_cols], 58 | # train_data["target"] 59 | # ) 60 | 61 | # logging.info("saving model") 62 | # joblib.dump(model, model_name) 63 | # return model 64 | 65 | # just return the target for now 66 | return target 67 | 68 | 69 | def predict(napi, model): 70 | logging.info("reading prediction data") 71 | napi.download_dataset(f"{DATA_VERSION}/live_universe.parquet") 72 | live_universe = pd.read_parquet(f"{DATA_VERSION}/live_universe.parquet") 73 | 74 | # TODO: implement get_features and predict the target 75 | # logging.info("generating predictions") 76 | # predictions = model.predict(get_features(live_universe)) 77 | # predictions = pd.DataFrame( 78 | # predictions, columns=["prediction"], index=predict_data.index 79 | # ) 80 | # return predictions 81 | 82 | # just return the latest target for now 83 | napi.download_dataset(f"{DATA_VERSION}/train_targets.parquet") 84 | target = pd.read_parquet(f"{DATA_VERSION}/train_targets.parquet") 85 | return ( 86 | target[target.date == target.date.max()] 87 | .drop(columns=["date"]) 88 | .rename(columns={"target": "prediction"}) 89 | .set_index("symbol") 90 | ) 91 | 92 | 93 | def submit(predictions, predict_output_path="predictions.csv", model_id=None): 94 | logging.info("writing predictions to file and submitting") 95 | include_index = predictions.index.name is not None 96 | predictions.to_csv(predict_output_path, index=include_index) 97 | napi.upload_predictions( 98 | predict_output_path, model_id=model_id, tournament=TOURNAMENT 99 | ) 100 | 101 | 102 | if __name__ == "__main__": 103 | trained_model = train(napi, MODEL_ID) 104 | predictions = predict(napi, trained_model) 105 | submit(predictions, model_id=MODEL_ID) 106 | -------------------------------------------------------------------------------- /numerai/examples/crypto-python3/requirements.txt: -------------------------------------------------------------------------------- 1 | numerapi==2.14.0 2 | pandas==2.1.3 3 | pyarrow==7.0.0 4 | joblib==1.2.0 5 | lightgbm==3.3.1 -------------------------------------------------------------------------------- /numerai/examples/signals-python3/Dockerfile: -------------------------------------------------------------------------------- 1 | # Provides us a working Python 3 environment. 2 | FROM python:3.9 3 | 4 | # These are docker arguments that `numerai node deploy/test` will always pass into docker. 5 | # They are then set in your environment so that numerapi can access them when uploading submissions. 6 | # You can also access them from your script like so: 7 | # import os 8 | # public_id = os.environ["NUMERAI_PUBLIC_ID"] 9 | # secret_key = os.environ["NUMERAI_SECRET_KEY"] 10 | ARG NUMERAI_PUBLIC_ID 11 | ENV NUMERAI_PUBLIC_ID=$NUMERAI_PUBLIC_ID 12 | 13 | ARG NUMERAI_SECRET_KEY 14 | ENV NUMERAI_SECRET_KEY=$NUMERAI_SECRET_KEY 15 | 16 | ARG MODEL_ID 17 | ENV MODEL_ID=$MODEL_ID 18 | 19 | ARG SRC_PATH 20 | ENV SRC_PATH=$SRC_PATH 21 | 22 | # We then add the requirements.txt file, and pip install every requirement from it. 23 | # The `ADD [source] [destination]` command will take a file from the source directory on your computer 24 | # and copy it over to the destination directory in the Docker container. 25 | ADD $SRC_PATH/requirements.txt . 26 | RUN pip install -r requirements.txt --no-cache-dir 27 | 28 | # Now, add everything in the source code directory. 29 | # (including your code, compiled files, serialized models, everything...) 30 | ADD $SRC_PATH . 31 | 32 | # This sets the default command to run your docker container. 33 | # It runs by default in the cloud and when running `numerai node test`. 34 | # This is overridden when using `numerai node test --command [COMMAND]`. 35 | CMD [ "python", "./predict.py" ] 36 | -------------------------------------------------------------------------------- /numerai/examples/signals-python3/predict.py: -------------------------------------------------------------------------------- 1 | """ Sample tournament model in python 3 """ 2 | 3 | import os 4 | import json 5 | import logging 6 | import joblib 7 | import numerapi 8 | import pandas as pd 9 | import lightgbm as lgbm 10 | 11 | logging.basicConfig(filename="log.txt", filemode="a") 12 | 13 | TOURNAMENT = 11 14 | DATA_VERSION = "signals/v1.0" 15 | TARGET_COL = "target" 16 | TRAINED_MODEL_PREFIX = "./trained_model" 17 | 18 | DEFAULT_MODEL_ID = None 19 | DEFAULT_PUBLIC_ID = None 20 | DEFAULT_SECRET_KEY = None 21 | 22 | # Read model id and initialize API client with api keys 23 | # these are set by the docker image that you deploy after training, 24 | # but you can also set them manually above for local testing 25 | MODEL_ID = os.getenv("MODEL_ID", DEFAULT_MODEL_ID) 26 | napi = numerapi.NumerAPI( 27 | public_id=os.getenv("NUMERAI_PUBLIC_ID", DEFAULT_PUBLIC_ID), 28 | secret_key=os.getenv("NUMERAI_SECRET_KEY", DEFAULT_SECRET_KEY), 29 | ) 30 | 31 | 32 | def train(napi, model_id, force_training=False): 33 | model_name = TRAINED_MODEL_PREFIX 34 | if model_id: 35 | model_name += f"_{model_id}" 36 | 37 | # load a model if we have a trained model already and we aren't forcing a training session 38 | if os.path.exists(model_name) and not force_training: 39 | logging.info("loading existing trained model") 40 | model = joblib.load(model_name) 41 | return model 42 | 43 | logging.info("reading training data") 44 | napi.download_dataset(f"{DATA_VERSION}/train.parquet") 45 | train_data = pd.read_parquet(f"{DATA_VERSION}/train.parquet") 46 | feature_cols = [ 47 | col 48 | for col in train_data.columns 49 | if col.startswith('feature_') 50 | and col not in ("feature_country", "feature_exchange_code") 51 | ] 52 | 53 | # This will take a few minutes 🍵 54 | logging.info("training model") 55 | model = lgbm.LGBMRegressor( 56 | n_estimators=2000, 57 | learning_rate=0.01, 58 | max_depth=5, 59 | num_leaves=2**5 - 1, 60 | colsample_bytree=0.1, 61 | ) 62 | model.fit(train_data[feature_cols], train_data["target"]) 63 | 64 | logging.info("saving model") 65 | joblib.dump(model, model_name) 66 | return model 67 | 68 | 69 | def predict(napi, model): 70 | logging.info("reading prediction data") 71 | napi.download_dataset(f"{DATA_VERSION}/live.parquet") 72 | predict_data = pd.read_parquet(f"{DATA_VERSION}/live.parquet").set_index( 73 | 'numerai_ticker' 74 | ) 75 | feature_cols = [ 76 | col 77 | for col in predict_data.columns 78 | if col.startswith('feature_') 79 | and col not in ("feature_country", "feature_exchange_code") 80 | ] 81 | print(predict_data) 82 | 83 | logging.info("generating predictions") 84 | predictions = model.predict(predict_data[feature_cols]) 85 | predictions = pd.DataFrame( 86 | predictions, columns=["prediction"], index=predict_data.index 87 | ) 88 | return predictions 89 | 90 | 91 | def submit(predictions, predict_output_path="predictions.csv", model_id=None): 92 | logging.info("writing predictions to file and submitting") 93 | include_index = predictions.index.name is not None 94 | predictions.to_csv(predict_output_path, index=include_index) 95 | print(predictions) 96 | napi.upload_predictions( 97 | predict_output_path, model_id=model_id, tournament=TOURNAMENT 98 | ) 99 | 100 | 101 | if __name__ == "__main__": 102 | trained_model = train(napi, MODEL_ID) 103 | predictions = predict(napi, trained_model) 104 | submit(predictions, model_id=MODEL_ID) 105 | -------------------------------------------------------------------------------- /numerai/examples/signals-python3/requirements.txt: -------------------------------------------------------------------------------- 1 | numerapi==2.14.0 2 | pandas==2.1.3 3 | pyarrow==7.0.0 4 | joblib==1.2.0 5 | lightgbm==3.3.1 -------------------------------------------------------------------------------- /numerai/examples/signals-python3/train.py: -------------------------------------------------------------------------------- 1 | """ An extra entry point specifically for training. Used when running locally """ 2 | 3 | import predict 4 | 5 | train_data_path, predict_data_path, predict_output_path = predict.download_data() 6 | 7 | model_id = predict.MODEL_ID 8 | model_type = predict.MODEL 9 | 10 | predict.train(train_data_path, model_id, model_type, force_training=True) 11 | -------------------------------------------------------------------------------- /numerai/examples/tournament-python3/Dockerfile: -------------------------------------------------------------------------------- 1 | # Provides us a working Python 3 environment. 2 | FROM python:3.9 3 | 4 | # These are docker arguments that `numerai node deploy/test` will always pass into docker. 5 | # They are then set in your environment so that numerapi can access them when uploading submissions. 6 | # You can also access them from your script like so: 7 | # import os 8 | # public_id = os.environ["NUMERAI_PUBLIC_ID"] 9 | # secret_key = os.environ["NUMERAI_SECRET_KEY"] 10 | ARG NUMERAI_PUBLIC_ID 11 | ENV NUMERAI_PUBLIC_ID=$NUMERAI_PUBLIC_ID 12 | 13 | ARG NUMERAI_SECRET_KEY 14 | ENV NUMERAI_SECRET_KEY=$NUMERAI_SECRET_KEY 15 | 16 | ARG MODEL_ID 17 | ENV MODEL_ID=$MODEL_ID 18 | 19 | ARG SRC_PATH 20 | ENV SRC_PATH=$SRC_PATH 21 | 22 | # We then add the requirements.txt file, and pip install every requirement from it. 23 | # The `ADD [source] [destination]` command will take a file from the source directory on your computer 24 | # and copy it over to the destination directory in the Docker container. 25 | ADD $SRC_PATH/requirements.txt . 26 | RUN pip install -r requirements.txt --no-cache-dir 27 | 28 | # Now, add everything in the source code directory. 29 | # (including your code, compiled files, serialized models, everything...) 30 | ADD $SRC_PATH . 31 | 32 | # This sets the default command to run your docker container. 33 | # It runs by default in the cloud and when running `numerai node test`. 34 | # This is overridden when using `numerai node test --command [COMMAND]`. 35 | CMD [ "python", "./predict.py" ] 36 | -------------------------------------------------------------------------------- /numerai/examples/tournament-python3/predict.py: -------------------------------------------------------------------------------- 1 | """ Sample tournament model in python 3 """ 2 | 3 | import os 4 | import json 5 | import logging 6 | import joblib 7 | import numerapi 8 | import pandas as pd 9 | import lightgbm as lgbm 10 | 11 | logging.basicConfig(filename="log.txt", filemode="a") 12 | 13 | TOURNAMENT = 8 14 | DATA_VERSION = "v4.1" 15 | ERA_COL = "era" 16 | DATA_TYPE_COL = "data_type" 17 | TARGET_COL = "target_nomi_v4_20" 18 | TRAINED_MODEL_PREFIX = "./trained_model" 19 | 20 | DEFAULT_MODEL_ID = None 21 | DEFAULT_PUBLIC_ID = None 22 | DEFAULT_SECRET_KEY = None 23 | 24 | # Read model id and initialize API client with api keys 25 | # these are set by the docker image that you deploy after training, 26 | # but you can also set them manually above for local testing 27 | MODEL_ID = os.getenv("MODEL_ID", DEFAULT_MODEL_ID) 28 | napi = numerapi.NumerAPI( 29 | public_id=os.getenv("NUMERAI_PUBLIC_ID", DEFAULT_PUBLIC_ID), 30 | secret_key=os.getenv("NUMERAI_SECRET_KEY", DEFAULT_SECRET_KEY), 31 | ) 32 | 33 | 34 | def get_features(napi): 35 | napi.download_dataset(f"{DATA_VERSION}/features.json") 36 | with open(f"{DATA_VERSION}/features.json", "r") as f: 37 | feature_metadata = json.load(f) 38 | return feature_metadata["feature_sets"]["small"] + [ 39 | ERA_COL, 40 | DATA_TYPE_COL, 41 | TARGET_COL, 42 | ] 43 | 44 | 45 | def train(napi, model_id, force_training=False): 46 | model_name = TRAINED_MODEL_PREFIX 47 | if model_id: 48 | model_name += f"_{model_id}" 49 | 50 | # load a model if we have a trained model already and we aren't forcing a training session 51 | if os.path.exists(model_name) and not force_training: 52 | logging.info("loading existing trained model") 53 | model = joblib.load(model_name) 54 | return model 55 | 56 | logging.info("reading training data") 57 | napi.download_dataset(f"{DATA_VERSION}/train.parquet") 58 | train_data = pd.read_parquet( 59 | f"{DATA_VERSION}/train.parquet", columns=get_features(napi) 60 | ) 61 | 62 | # This will take a few minutes 🍵 63 | logging.info("training model") 64 | model = lgbm.LGBMRegressor( 65 | n_estimators=2000, 66 | learning_rate=0.01, 67 | max_depth=5, 68 | num_leaves=2**5 - 1, 69 | colsample_bytree=0.1, 70 | ) 71 | model.fit( 72 | train_data.filter(like="feature_", axis="columns"), 73 | train_data[TARGET_COL], 74 | ) 75 | 76 | logging.info("saving model") 77 | joblib.dump(model, model_name) 78 | return model 79 | 80 | 81 | def predict(napi, model): 82 | logging.info("reading prediction data") 83 | napi.download_dataset(f"{DATA_VERSION}/live.parquet") 84 | predict_data = pd.read_parquet( 85 | f"{DATA_VERSION}/live.parquet", columns=get_features(napi) 86 | ) 87 | 88 | logging.info("generating predictions") 89 | predictions = model.predict(predict_data.filter(like="feature_", axis="columns")) 90 | predictions = pd.DataFrame( 91 | predictions, columns=["prediction"], index=predict_data.index 92 | ) 93 | return predictions 94 | 95 | 96 | def submit(predictions, predict_output_path="predictions.csv", model_id=None): 97 | logging.info("writing predictions to file and submitting") 98 | include_index = predictions.index.name is not None 99 | predictions.to_csv(predict_output_path, index=include_index) 100 | napi.upload_predictions( 101 | predict_output_path, model_id=model_id, tournament=TOURNAMENT 102 | ) 103 | 104 | 105 | if __name__ == "__main__": 106 | trained_model = train(napi, MODEL_ID) 107 | predictions = predict(napi, trained_model) 108 | submit(predictions, model_id=MODEL_ID) 109 | -------------------------------------------------------------------------------- /numerai/examples/tournament-python3/requirements.txt: -------------------------------------------------------------------------------- 1 | numerapi==2.14.0 2 | pandas==2.1.3 3 | pyarrow==7.0.0 4 | joblib==1.2.0 5 | lightgbm==3.3.1 -------------------------------------------------------------------------------- /numerai/examples/tournament-python3/train.py: -------------------------------------------------------------------------------- 1 | """ An extra entry point specifically for training. Used when running locally """ 2 | 3 | import predict 4 | 5 | train_data_path, predict_data_path, predict_output_path = predict.download_data() 6 | 7 | model_id = predict.MODEL_ID 8 | model_type = predict.MODEL 9 | 10 | predict.train(train_data_path, model_id, model_type, force_training=True) 11 | -------------------------------------------------------------------------------- /numerai/examples/tournament-rlang/Dockerfile: -------------------------------------------------------------------------------- 1 | # Provides us a working R lang environment. 2 | FROM r-base:latest 3 | 4 | # These are docker arguments that `numerai node deploy/test` will always pass into docker. 5 | # They are then set in your environment so that numerapi can access them when uploading submissions. 6 | # You can also access them from your script like so: 7 | # import os 8 | # public_id = os.environ["NUMERAI_PUBLIC_ID"] 9 | # secret_key = os.environ["NUMERAI_SECRET_KEY"] 10 | ARG NUMERAI_PUBLIC_ID 11 | ENV NUMERAI_PUBLIC_ID=$NUMERAI_PUBLIC_ID 12 | 13 | ARG NUMERAI_SECRET_KEY 14 | ENV NUMERAI_SECRET_KEY=$NUMERAI_SECRET_KEY 15 | 16 | ARG MODEL_ID 17 | ENV MODEL_ID=$MODEL_ID 18 | 19 | ARG SRC_PATH 20 | ENV SRC_PATH=$SRC_PATH 21 | 22 | # install dev tools 23 | RUN apt-get update -y && apt-get install -y \ 24 | build-essential \ 25 | libcurl4-gnutls-dev \ 26 | libxml2-dev \ 27 | libssl-dev 28 | RUN R -e "install.packages('devtools', dependencies=TRUE)" 29 | 30 | # We then add the install_packages.R file, and install every requirement from it. 31 | # The `ADD [source] [destination]` command will take a file from the source directory on your computer 32 | # and copy it over to the destination directory in the Docker container. 33 | ADD $SRC_PATH/install_packages.R . 34 | RUN Rscript ./install_packages.R 35 | 36 | # Now, add everything in the source code directory. 37 | # (including your code, compiled files, serialized models, everything...) 38 | ADD $SRC_PATH . 39 | 40 | # This sets the default command to run your docker container. 41 | # It runs by default in the cloud and when running `numerai node test`. 42 | # This is overridden when using `numerai node test --command [COMMAND]`. 43 | CMD [ "Rscript", "./main.R" ] 44 | -------------------------------------------------------------------------------- /numerai/examples/tournament-rlang/install_packages.R: -------------------------------------------------------------------------------- 1 | devtools::install_github("Omni-Analytics-Group/Rnumerai") 2 | -------------------------------------------------------------------------------- /numerai/examples/tournament-rlang/main.R: -------------------------------------------------------------------------------- 1 | ## Load libraries 2 | library(Rnumerai) 3 | 4 | ## Configuration 5 | set_public_id(Sys.getenv("NUMERAI_PUBLIC_ID")) 6 | set_api_key(Sys.getenv("NUMERAI_SECRET_KEY")) 7 | MODEL_ID = Sys.getenv("MODEL_ID") 8 | 9 | ## Download data 10 | data_dir <- tempdir() 11 | data <- download_data(data_dir) 12 | data_train <- data$data_train 13 | data_tournament <- data$data_tournament 14 | 15 | ############################################################ 16 | ## Random Prediction || Insert your model here 17 | ############################################################ 18 | submission <- data.frame(id=data_tournament$id,prediction = sample(seq(.35,.65,by=.1),nrow(data_tournament),replace=TRUE)) 19 | ############################################################ 20 | ############################################################ 21 | 22 | 23 | ############################################################ 24 | ## Submit 25 | ############################################################ 26 | submission_id <- submit_predictions(submission,data_dir,tournament="Nomi",model_id=MODEL_ID) 27 | ############################################################ 28 | ############################################################ 29 | -------------------------------------------------------------------------------- /numerai/terraform/aws/-inputs.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | description = "The AWS region to create things in." 3 | type = string 4 | default = "us-east-1" 5 | } 6 | 7 | variable "az_count" { 8 | description = "Number of AZs to cover in a given region" 9 | type = string 10 | default = "2" 11 | } 12 | 13 | variable "node_config_file" { 14 | description = "Path to the json file used to configure nodes" 15 | type = string 16 | default = "nodes.json" 17 | } 18 | 19 | variable "node_container_port" { 20 | description = "Port exposed by the docker image to redirect traffic to" 21 | type = number 22 | default = 3000 23 | } 24 | 25 | variable "gateway_stage_path" { 26 | description = "The prefixed path for the api gateway" 27 | type = string 28 | default = "v1" 29 | } 30 | -------------------------------------------------------------------------------- /numerai/terraform/aws/-main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = "1.5.6" 3 | } 4 | 5 | # Specify the provider and access details 6 | provider "aws" { 7 | region = var.region 8 | } 9 | 10 | locals { 11 | nodes = jsondecode(file(var.node_config_file)) 12 | aws_nodes = { 13 | for node, config in local.nodes : 14 | node => config if config.provider == "aws" 15 | } 16 | } 17 | 18 | module "aws" { 19 | count = length(local.aws_nodes) > 0 ? 1 : 0 20 | source = "./aws" 21 | aws_region = var.region 22 | az_count = var.az_count 23 | nodes = local.aws_nodes 24 | node_container_port = var.node_container_port 25 | gateway_stage_path = var.gateway_stage_path 26 | } 27 | -------------------------------------------------------------------------------- /numerai/terraform/aws/-outputs.tf: -------------------------------------------------------------------------------- 1 | output "aws_nodes" { 2 | value = try(length(module.aws) > 0, false) ? jsondecode(jsonencode(module.aws[0].outputs)) : {} 3 | } 4 | -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/-inputs.tf: -------------------------------------------------------------------------------- 1 | variable "aws_region" { 2 | description = "The AWS region to create things in." 3 | type = string 4 | default = "us-east-1" 5 | } 6 | 7 | variable "az_count" { 8 | description = "Number of AWS Available Zones to cover in a given AWS region" 9 | type = string 10 | default = "1" 11 | } 12 | 13 | variable "nodes" { 14 | description = "Map of node names to their configurations" 15 | type = map(map(any)) 16 | } 17 | 18 | variable "node_container_port" { 19 | description = "Port exposed by the docker image to redirect traffic to" 20 | type = number 21 | default = 3000 22 | } 23 | 24 | variable "gateway_stage_path" { 25 | description = "The prefixed path for the api gateway" 26 | type = string 27 | default = "v1" 28 | } 29 | -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/-locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | node_prefix = "numerai-submission" 3 | max_node_volume_size = max([for node, config in var.nodes : lookup(config, "volume", 0)]...) 4 | } 5 | -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/-outputs.tf: -------------------------------------------------------------------------------- 1 | output "outputs" { 2 | value = { for node, config in var.nodes : 3 | node => { 4 | docker_repo = aws_ecr_repository.node[node].repository_url 5 | webhook_url = aws_lambda_function_url.submission[node].function_url 6 | cluster_log_group = aws_cloudwatch_log_group.ec2[node].name 7 | webhook_log_group = aws_cloudwatch_log_group.lambda[node].name 8 | cluster_arn = aws_batch_compute_environment.node.ecs_cluster_arn 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/cluster.tf: -------------------------------------------------------------------------------- 1 | ############### 2 | # ECR and IAM # 3 | ############### 4 | 5 | resource "aws_ecr_repository" "node" { 6 | for_each = { for name, config in var.nodes : name => config } 7 | force_delete = true 8 | name = each.key 9 | } 10 | 11 | resource "aws_iam_role" "ecs_task_execution_role" { 12 | name = local.node_prefix 13 | assume_role_policy = jsonencode({ 14 | Version : "2012-10-17", 15 | Statement : [{ 16 | Effect : "Allow", 17 | Action : "sts:AssumeRole", 18 | Principal : { 19 | Service : [ 20 | "ecs-tasks.amazonaws.com" 21 | ] 22 | } 23 | }] 24 | }) 25 | } 26 | 27 | resource "aws_iam_role_policy_attachment" "ecs_task_execution_role" { 28 | role = aws_iam_role.ecs_task_execution_role.name 29 | policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 30 | } 31 | 32 | 33 | ############################# 34 | # Batch compute environment # 35 | ############################# 36 | data "aws_iam_policy_document" "ec2_assume_role" { 37 | statement { 38 | effect = "Allow" 39 | 40 | principals { 41 | type = "Service" 42 | identifiers = ["ec2.amazonaws.com"] 43 | } 44 | 45 | actions = ["sts:AssumeRole"] 46 | } 47 | } 48 | 49 | resource "aws_iam_role" "batch_ecs_instance_role" { 50 | name = "batch_ecs_instance_role" 51 | assume_role_policy = data.aws_iam_policy_document.ec2_assume_role.json 52 | } 53 | 54 | resource "aws_iam_role_policy_attachment" "batch_ecs_instance_role" { 55 | role = aws_iam_role.batch_ecs_instance_role.name 56 | policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" 57 | } 58 | 59 | resource "aws_iam_instance_profile" "batch_ecs_instance_role" { 60 | name = "batch_ecs_instance_role" 61 | role = aws_iam_role.batch_ecs_instance_role.name 62 | } 63 | 64 | data "aws_iam_policy_document" "batch_assume_role" { 65 | statement { 66 | effect = "Allow" 67 | 68 | principals { 69 | type = "Service" 70 | identifiers = ["batch.amazonaws.com"] 71 | } 72 | 73 | actions = ["sts:AssumeRole"] 74 | } 75 | } 76 | 77 | resource "aws_iam_role" "aws_batch_service_role" { 78 | name = "aws_batch_service_role" 79 | assume_role_policy = data.aws_iam_policy_document.batch_assume_role.json 80 | } 81 | 82 | resource "aws_iam_role_policy_attachment" "aws_batch_service_role" { 83 | role = aws_iam_role.aws_batch_service_role.name 84 | policy_arn = "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole" 85 | } 86 | 87 | data "aws_ami" "ecs_al2" { 88 | most_recent = true 89 | filter { 90 | name = "name" 91 | values = ["amzn2-ami-ecs-hvm-*-x86_64-ebs"] 92 | } 93 | } 94 | 95 | resource "aws_launch_template" "node" { 96 | image_id = data.aws_ami.ecs_al2.id 97 | update_default_version = true 98 | dynamic "block_device_mappings" { 99 | for_each = local.max_node_volume_size > 0 ? {size: local.max_node_volume_size} : {} 100 | content { 101 | device_name = "/dev/xvda" 102 | 103 | ebs { 104 | encrypted = true 105 | volume_size = local.max_node_volume_size 106 | volume_type = "gp3" 107 | } 108 | } 109 | } 110 | } 111 | 112 | resource "aws_batch_compute_environment" "node" { 113 | compute_environment_name_prefix = "${local.node_prefix}-" 114 | 115 | compute_resources { 116 | instance_role = aws_iam_instance_profile.batch_ecs_instance_role.arn 117 | 118 | launch_template { 119 | launch_template_id = aws_launch_template.node.id 120 | version = aws_launch_template.node.latest_version 121 | } 122 | 123 | max_vcpus = 64 124 | 125 | security_group_ids = [ 126 | aws_security_group.ecs_tasks.id 127 | ] 128 | 129 | subnets = [for s in aws_subnet.public : s.id] 130 | 131 | type = "EC2" 132 | allocation_strategy = "BEST_FIT" 133 | instance_type = ["optimal"] 134 | } 135 | 136 | service_role = aws_iam_role.aws_batch_service_role.arn 137 | type = "MANAGED" 138 | depends_on = [aws_iam_role_policy_attachment.aws_batch_service_role] 139 | 140 | lifecycle { 141 | create_before_destroy = true 142 | } 143 | } 144 | 145 | 146 | resource "aws_batch_job_queue" "node" { 147 | name = "${local.node_prefix}-queue" 148 | 149 | state = "ENABLED" 150 | priority = 1 151 | 152 | compute_environment_order { 153 | order = 1 154 | compute_environment = aws_batch_compute_environment.node.arn 155 | } 156 | } 157 | 158 | 159 | ############# 160 | # Job Setup # 161 | ############# 162 | 163 | resource "aws_cloudwatch_log_group" "ec2" { 164 | for_each = { for name, config in var.nodes : name => config } 165 | 166 | name = "/ec2/service/${each.key}" 167 | retention_in_days = "14" 168 | } 169 | 170 | resource "aws_batch_job_definition" "node" { 171 | for_each = { for name, config in var.nodes : name => config } 172 | 173 | name = each.key 174 | type = "container" 175 | timeout { 176 | attempt_duration_seconds = each.value.timeout_minutes * 60 177 | } 178 | 179 | retry_strategy { 180 | attempts = 2 181 | evaluate_on_exit { 182 | on_reason = "CannotInspectContainerError:*" 183 | action = "RETRY" 184 | } 185 | evaluate_on_exit { 186 | on_reason = "CannotPullContainerError:*" 187 | action = "RETRY" 188 | } 189 | evaluate_on_exit { 190 | action = "RETRY" 191 | on_reason = "CannotStartContainerError:*" 192 | } 193 | evaluate_on_exit { 194 | action = "RETRY" 195 | on_reason = "Task failed to start" 196 | } 197 | evaluate_on_exit { 198 | action = "EXIT" 199 | on_reason = "*" 200 | } 201 | } 202 | 203 | container_properties = jsonencode({ 204 | image = aws_ecr_repository.node[each.key].repository_url 205 | executionRoleArn = aws_iam_role.ecs_task_execution_role.arn 206 | 207 | logConfiguration = { 208 | "logDriver" : "awslogs", 209 | "options" : { 210 | "awslogs-group" : aws_cloudwatch_log_group.ec2[each.key].name, 211 | "awslogs-region" : var.aws_region, 212 | "awslogs-stream-prefix" : "ecs" 213 | } 214 | } 215 | 216 | resourceRequirements = [ 217 | { 218 | type = "VCPU" 219 | value = tostring(each.value.cpu / 1024) 220 | }, 221 | { 222 | type = "MEMORY" 223 | value = tostring(each.value.memory) 224 | } 225 | ] 226 | }) 227 | } 228 | -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/main.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/numerai/terraform/aws/aws/main.zip -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/network.tf: -------------------------------------------------------------------------------- 1 | 2 | ### Network 3 | # Fetch AZs in the current region 4 | data "aws_availability_zones" "available" {} 5 | 6 | resource "aws_vpc" "main" { 7 | cidr_block = "172.17.0.0/16" 8 | } 9 | 10 | # Create var.az_count public subnets, each in a different AZ 11 | resource "aws_subnet" "public" { 12 | count = var.az_count 13 | cidr_block = cidrsubnet(aws_vpc.main.cidr_block, 8, 255 - count.index) 14 | availability_zone = data.aws_availability_zones.available.names[count.index] 15 | vpc_id = aws_vpc.main.id 16 | map_public_ip_on_launch = true 17 | } 18 | 19 | # IGW for the public subnet 20 | resource "aws_internet_gateway" "gw" { 21 | vpc_id = aws_vpc.main.id 22 | } 23 | 24 | # Route the public subnet traffic through the IGW 25 | resource "aws_route" "internet_access" { 26 | route_table_id = aws_vpc.main.main_route_table_id 27 | destination_cidr_block = "0.0.0.0/0" 28 | gateway_id = aws_internet_gateway.gw.id 29 | } 30 | 31 | ### Security 32 | 33 | # Traffic to the ECS Cluster security group 34 | resource "aws_security_group" "ecs_tasks" { 35 | name = "${local.node_prefix}-tasks-security-group" 36 | description = "Allow all outbound" 37 | vpc_id = aws_vpc.main.id 38 | 39 | egress { 40 | protocol = "-1" 41 | from_port = 0 42 | to_port = 0 43 | cidr_blocks = ["0.0.0.0/0"] 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /numerai/terraform/aws/aws/webhook.tf: -------------------------------------------------------------------------------- 1 | 2 | ### Lambda 3 | resource "aws_lambda_function" "submission" { 4 | for_each = { for name, config in var.nodes : name => config } 5 | 6 | filename = "${path.module}/main.zip" 7 | function_name = each.key 8 | role = aws_iam_role.iam_for_lambda.arn 9 | handler = "main.lambda_handler" 10 | 11 | source_code_hash = filebase64sha256("${path.module}/main.zip") 12 | 13 | runtime = "python3.11" 14 | 15 | environment { 16 | variables = { 17 | JOB_DEFINITION = aws_batch_job_definition.node[each.key].name 18 | JOB_QUEUE = aws_batch_job_queue.node.name 19 | } 20 | } 21 | } 22 | 23 | resource "aws_lambda_function_url" "submission" { 24 | for_each = { for name, config in var.nodes : name => config } 25 | 26 | function_name = aws_lambda_function.submission[each.key].function_name 27 | authorization_type = "NONE" 28 | } 29 | 30 | 31 | ### Cloudwatch 32 | # This is to optionally manage the CloudWatch Log Group for the Lambda Function. 33 | # If skipping this resource configuration, also add "logs:CreateLogGroup" to the IAM policy below. 34 | resource "aws_cloudwatch_log_group" "lambda" { 35 | for_each = { for name, config in var.nodes : name => config } 36 | 37 | name = "/aws/lambda/${each.key}" 38 | retention_in_days = 14 39 | } 40 | 41 | 42 | # cron triggers 43 | resource "aws_cloudwatch_event_rule" "cron_trigger" { 44 | for_each = { 45 | for name, config in var.nodes : 46 | name => lookup(config, "cron", null) 47 | if lookup(config, "cron", null) != null 48 | } 49 | 50 | name = "${each.key}-cron-trigger" 51 | description = "Cron-based trigger for lambda ${aws_lambda_function.submission[each.key].arn}" 52 | schedule_expression = "cron(${trim(each.value, "\"")})" 53 | } 54 | resource "aws_cloudwatch_event_target" "cron_target" { 55 | for_each = { 56 | for name, config in var.nodes : 57 | name => lookup(config, "cron", null) 58 | if lookup(config, "cron", null) != null 59 | } 60 | 61 | rule = aws_cloudwatch_event_rule.cron_trigger[each.key].name 62 | arn = aws_lambda_function.submission[each.key].arn 63 | } 64 | resource "aws_lambda_permission" "cron_permission" { 65 | for_each = { 66 | for name, config in var.nodes : 67 | name => lookup(config, "cron", null) 68 | if lookup(config, "cron", null) != null 69 | } 70 | statement_id = "AllowExecutionFromCloudWatch" 71 | action = "lambda:InvokeFunction" 72 | function_name = aws_lambda_function.submission[each.key].function_name 73 | principal = "events.amazonaws.com" 74 | source_arn = aws_cloudwatch_event_rule.cron_trigger[each.key].arn 75 | } 76 | 77 | 78 | ######################## 79 | # Lambda Function Role # 80 | ######################## 81 | 82 | resource "aws_iam_role" "iam_for_lambda" { 83 | name = "${local.node_prefix}-lambda" 84 | 85 | assume_role_policy = jsonencode({ 86 | "Version" : "2012-10-17", 87 | "Statement" : [ 88 | { 89 | Action : "sts:AssumeRole", 90 | Principal : { 91 | "Service" : "lambda.amazonaws.com" 92 | }, 93 | Effect : "Allow", 94 | Sid : "" 95 | } 96 | ] 97 | }) 98 | } 99 | 100 | resource "aws_iam_policy" "lambda_logging" { 101 | name = "${local.node_prefix}-lambda-logging" 102 | path = "/" 103 | description = "IAM policy for logging from a lambda" 104 | 105 | policy = jsonencode({ 106 | "Version" : "2012-10-17", 107 | "Statement" : [ 108 | { 109 | Action : [ 110 | "logs:CreateLogStream", 111 | "logs:PutLogEvents" 112 | ], 113 | Resource : "arn:aws:logs:*:*:*", 114 | Effect : "Allow" 115 | }, 116 | { 117 | Effect : "Allow", 118 | Action : "batch:SubmitJob", 119 | Resource : "*" 120 | }, 121 | ] 122 | }) 123 | } 124 | 125 | resource "aws_iam_role_policy_attachment" "lambda_logs" { 126 | role = aws_iam_role.iam_for_lambda.name 127 | policy_arn = aws_iam_policy.lambda_logging.arn 128 | } 129 | -------------------------------------------------------------------------------- /numerai/terraform/azure/azure/cluster.tf: -------------------------------------------------------------------------------- 1 | # Resource group for the submission node 2 | resource "azurerm_resource_group" "rg" { 3 | for_each = { for name, config in var.nodes : name => config } 4 | location = var.az_resource_group_location 5 | name = "${each.key}-resource-grp" 6 | } 7 | 8 | # Container Instance 9 | variable "restart_policy" { 10 | type = string 11 | description = "The behavior of Azure runtime if container has stopped." 12 | default = "Never" 13 | validation { 14 | condition = contains(["Always", "Never", "OnFailure"], var.restart_policy) 15 | error_message = "The restart_policy must be one of the following: Always, Never, OnFailure." 16 | } 17 | } 18 | 19 | resource "azurerm_log_analytics_workspace" "container_instance" { 20 | for_each = { for name, config in var.nodes : name => config } 21 | name = "${local.node_prefix}-aci-log-analytics-${random_string.random[each.key].result}" 22 | location = azurerm_resource_group.rg[each.key].location 23 | resource_group_name = azurerm_resource_group.rg[each.key].name 24 | sku = "PerGB2018" 25 | retention_in_days = 30 26 | } 27 | 28 | 29 | data "azurerm_resource_group" "acr_rg" { 30 | name = "numerai-cli-acr-resource-grp" 31 | } 32 | 33 | data "azurerm_container_registry" "registry" { 34 | name = var.registry_name 35 | resource_group_name = data.azurerm_resource_group.acr_rg.name 36 | } 37 | 38 | # Create a container group with single container 39 | resource "azurerm_container_group" "container" { 40 | for_each = { for name, config in var.nodes : name => config } 41 | name = "${local.node_prefix}-container-group-${random_string.random[each.key].result}" 42 | location = azurerm_resource_group.rg[each.key].location 43 | resource_group_name = azurerm_resource_group.rg[each.key].name 44 | ip_address_type = "Public" 45 | os_type = "Linux" 46 | restart_policy = var.restart_policy 47 | 48 | diagnostics { 49 | log_analytics { 50 | log_type = "ContainerInsights" 51 | workspace_id = azurerm_log_analytics_workspace.container_instance[each.key].workspace_id 52 | workspace_key = azurerm_log_analytics_workspace.container_instance[each.key].primary_shared_key 53 | } 54 | } 55 | 56 | image_registry_credential { 57 | username = data.azurerm_container_registry.registry.admin_username 58 | password = data.azurerm_container_registry.registry.admin_password 59 | server = data.azurerm_container_registry.registry.login_server 60 | } 61 | 62 | container { 63 | name = "submission-node-${random_string.random[each.key].result}" 64 | image = each.value.docker_repo 65 | cpu = each.value.cpu / 1024 66 | memory = each.value.memory / 1024 67 | 68 | ports { 69 | port = var.node_container_port 70 | protocol = "TCP" 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /numerai/terraform/azure/azure/input.tf: -------------------------------------------------------------------------------- 1 | # Store all terraform variables in this file 2 | variable "az_resource_group_location" { 3 | description = "Default location of the Azure resource group." 4 | type = string 5 | default = "eastus" 6 | } 7 | 8 | variable "nodes" { 9 | description = "Map of node names to their configurations" 10 | type = map(map(any)) 11 | } 12 | 13 | variable "node_container_port" { 14 | description = "Port exposed by the container instance to redirect traffic to" 15 | type = number 16 | default = 3000 17 | } 18 | 19 | variable "registry_name" { 20 | description = "Name of Azure container registry" 21 | type = string 22 | } 23 | -------------------------------------------------------------------------------- /numerai/terraform/azure/azure/locals.tf: -------------------------------------------------------------------------------- 1 | 2 | locals { 3 | node_prefix = "numerai-cli" 4 | } 5 | -------------------------------------------------------------------------------- /numerai/terraform/azure/azure/outputs.tf: -------------------------------------------------------------------------------- 1 | output "outputs" { 2 | value = { for node, config in var.nodes : 3 | node => { 4 | docker_repo = config.docker_repo 5 | webhook_url = "https://${azurerm_linux_function_app.function_app[node].name}.azurewebsites.net/api/orchestrators/start_submission" 6 | resource_group_name = azurerm_resource_group.rg[node].name 7 | webhook_storage_account_name = azurerm_storage_account.function_app[node].name 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /numerai/terraform/azure/azure/webhook.tf: -------------------------------------------------------------------------------- 1 | # Azure Function App 2 | # https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/function_app_function 3 | 4 | # Access existing Azure subscription under current IAM role 5 | data "azurerm_subscription" "current" {} 6 | 7 | data "azurerm_role_definition" "contributor" { 8 | name = "Contributor" 9 | } 10 | 11 | # Assign "Contributor" role of the Azure Resource Group to the Function App 12 | resource "azurerm_role_assignment" "function_app" { 13 | for_each = { for name, config in var.nodes : name => config } 14 | scope = azurerm_resource_group.rg[each.key].id # Limit scope to the submission node resource group 15 | role_definition_id = "${data.azurerm_subscription.current.id}${data.azurerm_role_definition.contributor.id}" 16 | principal_id = azurerm_linux_function_app.function_app[each.key].identity[0].principal_id 17 | } 18 | 19 | 20 | # Define the service plan for the Function App 21 | # Function consumption plan: https://learn.microsoft.com/en-us/azure/azure-functions/functions-scale 22 | resource "azurerm_service_plan" "function_app" { 23 | for_each = { for name, config in var.nodes : name => config } 24 | name = "${local.node_prefix}-function-app-plan" 25 | resource_group_name = azurerm_resource_group.rg[each.key].name 26 | location = azurerm_resource_group.rg[each.key].location 27 | os_type = "Linux" 28 | sku_name = "Y1" 29 | } 30 | 31 | # Create storage account for the Function App 32 | resource "azurerm_storage_account" "function_app" { 33 | for_each = { for name, config in var.nodes : name => config } 34 | name = "funcstorage${random_string.random[each.key].result}" 35 | resource_group_name = azurerm_resource_group.rg[each.key].name 36 | location = azurerm_resource_group.rg[each.key].location 37 | 38 | account_tier = "Standard" 39 | account_replication_type = "LRS" 40 | 41 | } 42 | 43 | resource "random_string" "random" { 44 | for_each = { for name, config in var.nodes : name => config } 45 | length = 10 46 | lower = true 47 | upper = false 48 | special = false 49 | } 50 | 51 | # Create Linux Function App, with a azure_trigger Function 52 | # https://learn.microsoft.com/en-us/azure/azure-functions/functions-reference-python?pivots=python-mode-decorators&tabs=wsgi%2Capplication-level#folder-structure 53 | resource "azurerm_linux_function_app" "function_app" { 54 | for_each = { for name, config in var.nodes : name => config } 55 | name = "${substr(replace(each.key, "_", "-"), 8, 49)}-${random_string.random[each.key].result}" 56 | resource_group_name = azurerm_resource_group.rg[each.key].name 57 | location = azurerm_resource_group.rg[each.key].location 58 | service_plan_id = azurerm_service_plan.function_app[each.key].id 59 | 60 | storage_account_name = azurerm_storage_account.function_app[each.key].name 61 | storage_account_access_key = azurerm_storage_account.function_app[each.key].primary_access_key 62 | 63 | zip_deploy_file = "azure_trigger.zip" 64 | 65 | site_config { 66 | application_stack { 67 | python_version = "3.10" 68 | } 69 | 70 | application_insights_key = azurerm_application_insights.app_insights[each.key].instrumentation_key 71 | application_insights_connection_string = azurerm_application_insights.app_insights[each.key].connection_string 72 | 73 | } 74 | 75 | # Add environment variables for the azure_trigger Function use 76 | # https://learn.microsoft.com/en-us/azure/app-service/reference-app-settings?tabs=kudu%2Cdotnet 77 | app_settings = { 78 | "AZURE_SUBSCRIPTION_ID" = data.azurerm_subscription.current.subscription_id 79 | "AZURE_RESOURCE_GRP_NAME" = azurerm_resource_group.rg[each.key].name 80 | "AZURE_CONTAINER_GRP_NAME" = azurerm_container_group.container[each.key].name 81 | "AzureWebJobsFeatureFlags" = "EnableWorkerIndexing" 82 | } 83 | 84 | identity { 85 | type = "SystemAssigned" 86 | } 87 | } 88 | 89 | # Application insights to log function usage and errors 90 | resource "azurerm_application_insights" "app_insights" { 91 | for_each = { for name, config in var.nodes : name => config } 92 | name = "func-app-application-insights-${random_string.random[each.key].result}" 93 | location = azurerm_resource_group.rg[each.key].location 94 | resource_group_name = azurerm_resource_group.rg[each.key].name 95 | workspace_id = azurerm_log_analytics_workspace.function_app[each.key].id 96 | application_type = "other" 97 | } 98 | 99 | # Log analytics workspace to log function usage and errors 100 | resource "azurerm_log_analytics_workspace" "function_app" { 101 | for_each = { for name, config in var.nodes : name => config } 102 | name = "func-app-log-analytics-${random_string.random[each.key].result}" 103 | location = azurerm_resource_group.rg[each.key].location 104 | resource_group_name = azurerm_resource_group.rg[each.key].name 105 | sku = "PerGB2018" 106 | retention_in_days = 30 107 | } 108 | 109 | # Cron trigger 110 | # Define the service plan for the Function App 111 | # Function consumption plan: https://learn.microsoft.com/en-us/azure/azure-functions/functions-scale 112 | resource "azurerm_service_plan" "cron_function_app" { 113 | for_each = { 114 | for name, config in var.nodes : 115 | name => lookup(config, "cron", null) 116 | if lookup(config, "cron", null) != null 117 | } 118 | name = "${local.node_prefix}-cron-func-app-plan" 119 | resource_group_name = azurerm_resource_group.rg[each.key].name 120 | location = azurerm_resource_group.rg[each.key].location 121 | os_type = "Linux" 122 | sku_name = "Y1" 123 | } 124 | 125 | resource "azurerm_linux_function_app" "cron_function_app" { 126 | for_each = { 127 | for name, config in var.nodes : 128 | name => lookup(config, "cron", null) 129 | if lookup(config, "cron", null) != null 130 | } 131 | 132 | name = "cron-trigger-${random_string.random[each.key].result}" 133 | resource_group_name = azurerm_resource_group.rg[each.key].name 134 | location = azurerm_resource_group.rg[each.key].location 135 | service_plan_id = azurerm_service_plan.cron_function_app[each.key].id 136 | 137 | storage_account_name = azurerm_storage_account.function_app[each.key].name 138 | storage_account_access_key = azurerm_storage_account.function_app[each.key].primary_access_key 139 | 140 | zip_deploy_file = "azure_cron_trigger.zip" 141 | 142 | site_config { 143 | application_stack { 144 | python_version = "3.10" 145 | } 146 | 147 | application_insights_key = azurerm_application_insights.app_insights[each.key].instrumentation_key 148 | application_insights_connection_string = azurerm_application_insights.app_insights[each.key].connection_string 149 | 150 | } 151 | 152 | # Add environment variables for the azure_trigger Function use 153 | # https://learn.microsoft.com/en-us/azure/app-service/reference-app-settings?tabs=kudu%2Cdotnet 154 | app_settings = { 155 | "CRON_EXPRESSION" = each.value 156 | "WEBHOOK_URL" = "https://${azurerm_linux_function_app.function_app[each.key].name}.azurewebsites.net/api/orchestrators/start_submission" 157 | "AzureWebJobsFeatureFlags" = "EnableWorkerIndexing" 158 | } 159 | 160 | identity { 161 | type = "SystemAssigned" 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /numerai/terraform/azure/azure_cron_trigger.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/numerai/terraform/azure/azure_cron_trigger.zip -------------------------------------------------------------------------------- /numerai/terraform/azure/azure_trigger.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/numerai/terraform/azure/azure_trigger.zip -------------------------------------------------------------------------------- /numerai/terraform/azure/input.tf: -------------------------------------------------------------------------------- 1 | # Store all terraform variables in this file 2 | variable "az_resource_group_location" { 3 | description = "Default location of the Azure resource group." 4 | type = string 5 | default = "eastus" 6 | } 7 | 8 | # Load all nodes' config from the nodes.json file 9 | variable "node_config_file" { 10 | description = "Path to the json file used to configure nodes" 11 | type = string 12 | default = "nodes.json" 13 | } 14 | 15 | variable "node_container_port" { 16 | description = "Port exposed by the container instance to redirect traffic to" 17 | type = number 18 | default = 3000 19 | } 20 | -------------------------------------------------------------------------------- /numerai/terraform/azure/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = "1.5.6" 3 | 4 | required_providers { 5 | azurerm = { 6 | source = "hashicorp/azurerm" 7 | version = ">=3.57" 8 | } 9 | } 10 | } 11 | 12 | # Auth passed via .numerai/.keys already 13 | provider "azurerm" { 14 | skip_provider_registration = true 15 | features { 16 | resource_group { 17 | prevent_deletion_if_contains_resources = false 18 | } 19 | } 20 | } 21 | 22 | # Read the node configuration file to get node details 23 | locals { 24 | nodes = jsondecode(file(var.node_config_file)) 25 | azure_nodes = { 26 | for node, config in local.nodes : 27 | node => config if config.provider == "azure" 28 | } 29 | } 30 | 31 | resource "azurerm_resource_group" "acr_rg" { 32 | count = length(local.azure_nodes) > 0 ? 1 : 0 33 | location = var.az_resource_group_location 34 | name = "numerai-cli-acr-resource-grp" 35 | } 36 | 37 | module "azure" { 38 | count = length(local.azure_nodes) > 0 ? 1 : 0 39 | source = "./azure" 40 | az_resource_group_location = var.az_resource_group_location 41 | nodes = local.azure_nodes 42 | node_container_port = var.node_container_port 43 | registry_name = azurerm_container_registry.registry[0].name 44 | 45 | depends_on = [ 46 | azurerm_container_registry.registry[0], 47 | azurerm_resource_group.acr_rg 48 | ] 49 | } 50 | 51 | -------------------------------------------------------------------------------- /numerai/terraform/azure/outputs.tf: -------------------------------------------------------------------------------- 1 | output "azure_nodes" { 2 | value = try(length(module.azure) > 0, false) ? jsondecode(jsonencode(module.azure[0].outputs)) : {} 3 | } 4 | 5 | 6 | -------------------------------------------------------------------------------- /numerai/terraform/azure/registry.tf: -------------------------------------------------------------------------------- 1 | resource "random_string" "registry_name_random" { 2 | count = length(local.azure_nodes) > 0 ? 1 : 0 3 | length = 10 4 | lower = true 5 | upper = false 6 | special = false 7 | } 8 | 9 | variable "registry_sku" { 10 | type = string 11 | description = "The sku option of Azure Container Registry" 12 | default = "Basic" 13 | validation { 14 | condition = contains(["Basic", "Standard", "Premium"], var.registry_sku) 15 | error_message = "The registry_sku must be one of the following: Basic, Standard, Premium." 16 | } 17 | } 18 | 19 | # Does not support non alphanumeric characters in the name 20 | resource "azurerm_container_registry" "registry" { 21 | count = length(local.azure_nodes) > 0 ? 1 : 0 22 | name = "NumeraiACR${random_string.registry_name_random[0].result}" 23 | resource_group_name = azurerm_resource_group.acr_rg[0].name 24 | location = azurerm_resource_group.acr_rg[0].location 25 | sku = var.registry_sku 26 | admin_enabled = true 27 | } 28 | 29 | output "acr_repo_details" { 30 | value = length(local.azure_nodes) > 0 ? { 31 | registry_rg_name = azurerm_resource_group.acr_rg[0].name 32 | registry_name = azurerm_container_registry.registry[0].name 33 | acr_login_server = azurerm_container_registry.registry[0].login_server 34 | } : null 35 | } 36 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/-inputs.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | description = "The GCP region to create things in." 3 | type = string 4 | default = "us-east1" 5 | } 6 | 7 | variable "node_config_file" { 8 | description = "Path to the json file used to configure nodes" 9 | type = string 10 | default = "nodes.json" 11 | } 12 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/-main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = "1.5.6" 3 | 4 | required_providers { 5 | google = { 6 | source = "hashicorp/google" 7 | version = ">=4.79" 8 | } 9 | } 10 | } 11 | 12 | # Specify the provider and access details 13 | provider "google" { 14 | region = var.region 15 | } 16 | 17 | locals { 18 | nodes = jsondecode(file(var.node_config_file)) 19 | gcp_nodes = { 20 | for node, config in local.nodes : 21 | node => config if config.provider == "gcp" 22 | } 23 | } 24 | 25 | module "gcp" { 26 | count = length(local.gcp_nodes) > 0 ? 1 : 0 27 | source = "./gcp" 28 | gcp_region = var.region 29 | nodes = local.gcp_nodes 30 | project = split("/", google_project_service.cloud_resource_manager.id)[0] 31 | registry_name = google_artifact_registry_repository.registry[0].name 32 | depends_on = [ 33 | google_project_service.artifact_registry, 34 | google_project_service.cloudbuild, 35 | google_project_service.cloudfunctions, 36 | google_project_service.storage, 37 | google_project_service.workflows, 38 | google_project_service.run 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/-outputs.tf: -------------------------------------------------------------------------------- 1 | output "gcp_nodes" { 2 | value = try(length(module.gcp) > 0, false) ? jsondecode(jsonencode(module.gcp[0].outputs)) : {} 3 | } 4 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/cloud-function.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numerai/numerai-cli/e4e16559ac609801acd4598eee417efccff8d3c8/numerai/terraform/gcp/cloud-function.zip -------------------------------------------------------------------------------- /numerai/terraform/gcp/gcp/-inputs.tf: -------------------------------------------------------------------------------- 1 | variable "gcp_region" { 2 | description = "The GCP region to create things in." 3 | type = string 4 | default = "us-east1" 5 | } 6 | 7 | variable "project" { 8 | description = "The project to create things in" 9 | } 10 | 11 | variable "nodes" { 12 | description = "Map of node names to their configurations" 13 | type = map(map(any)) 14 | } 15 | 16 | variable "registry_name" { 17 | description = "The name of the registry where containers for this project are stored" 18 | } 19 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/gcp/-locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | node_prefix = "numerai-submission" 3 | } 4 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/gcp/-outputs.tf: -------------------------------------------------------------------------------- 1 | output "outputs" { 2 | value = { for node, config in var.nodes : 3 | node => { 4 | docker_repo = "${var.gcp_region}-docker.pkg.dev/${var.project}/${var.registry_name}/${node}" 5 | webhook_url = google_cloudfunctions_function.webhook[node].https_trigger_url 6 | job_id = google_cloud_run_v2_job.node[node].id 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/gcp/cluster.tf: -------------------------------------------------------------------------------- 1 | resource "google_cloud_run_v2_job" "node" { 2 | for_each = { for name, config in var.nodes : name => config } 3 | 4 | location = var.gcp_region 5 | project = var.project 6 | name = replace(each.key, "_", "-") 7 | 8 | template { 9 | template { 10 | containers { 11 | image = "${var.gcp_region}-docker.pkg.dev/${var.project}/${var.registry_name}/${each.key}:latest" 12 | 13 | resources { 14 | limits = { 15 | memory = "${each.value.memory / 1024}Gi" 16 | cpu = "${1000 * each.value.cpu / 1024}m" 17 | } 18 | } 19 | } 20 | timeout = "${each.value.timeout_minutes * 60}s" 21 | max_retries = 0 22 | } 23 | } 24 | 25 | lifecycle { 26 | ignore_changes = [ 27 | launch_stage, 28 | ] 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/gcp/webhook.tf: -------------------------------------------------------------------------------- 1 | resource "google_storage_bucket" "webhook" { 2 | for_each = { for name, config in var.nodes : name => config } 3 | name = replace(each.key, "_", "-") 4 | location = var.gcp_region 5 | project = var.project 6 | } 7 | 8 | resource "google_storage_bucket_object" "webhook" { 9 | for_each = { for name, config in var.nodes : name => config } 10 | name = "index.${filemd5("cloud-function.zip")}.zip" 11 | bucket = google_storage_bucket.webhook[each.key].name 12 | source = "cloud-function.zip" 13 | } 14 | 15 | resource "google_workflows_workflow" "webhook" { 16 | for_each = { for name, config in var.nodes : name => config } 17 | name = replace(each.key, "_", "-") 18 | region = var.gcp_region 19 | project = var.project 20 | 21 | source_contents = templatefile("${path.module}/workflow-source.yaml", { 22 | project = var.project 23 | job = replace(each.key, "_", "-") 24 | region = var.gcp_region 25 | }) 26 | } 27 | 28 | resource "google_cloudfunctions_function" "webhook" { 29 | for_each = { for name, config in var.nodes : name => config } 30 | name = replace(each.key, "_", "-") 31 | project = var.project 32 | 33 | runtime = "python39" 34 | available_memory_mb = 128 35 | source_archive_bucket = google_storage_bucket.webhook[each.key].name 36 | source_archive_object = google_storage_bucket_object.webhook[each.key].name 37 | trigger_http = true 38 | entry_point = "run_job" 39 | environment_variables = { 40 | PROJECT = var.project 41 | LOCATION = var.gcp_region 42 | WORKFLOW = replace(each.key, "_", "-") 43 | } 44 | } 45 | 46 | resource "google_cloudfunctions_function_iam_binding" "webhook" { 47 | for_each = { for name, config in var.nodes : name => config } 48 | cloud_function = google_cloudfunctions_function.webhook[each.key].name 49 | role = "roles/cloudfunctions.invoker" 50 | project = var.project 51 | members = [ 52 | "allUsers", 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/gcp/workflow-source.yaml: -------------------------------------------------------------------------------- 1 | main: 2 | params: [event] 3 | steps: 4 | - init: 5 | assign: 6 | - trigger_id: $${event.trigger_id} 7 | - run_job: 8 | call: googleapis.run.v1.namespaces.jobs.run 9 | args: 10 | name: namespaces/${project}/jobs/${job} 11 | location: ${region} 12 | body: 13 | overrides: 14 | containerOverrides: 15 | env: 16 | - name: TRIGGER_ID 17 | value: $${trigger_id} 18 | result: job_execution 19 | - finish: 20 | return: $${job_execution} 21 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/registry.tf: -------------------------------------------------------------------------------- 1 | resource "google_artifact_registry_repository" "registry" { 2 | count = length(local.gcp_nodes) > 0 ? 1 : 0 3 | repository_id = "numerai-container-registry" 4 | format = "DOCKER" 5 | depends_on = [ 6 | google_project_service.artifact_registry, 7 | google_project_service.cloud_resource_manager 8 | ] 9 | } 10 | 11 | output "artifact_registry_details" { 12 | value = length(local.gcp_nodes) > 0 ? { 13 | registry_id = google_artifact_registry_repository.registry[0].id 14 | } : null 15 | } 16 | -------------------------------------------------------------------------------- /numerai/terraform/gcp/services.tf: -------------------------------------------------------------------------------- 1 | resource "google_project_service" "cloud_resource_manager" { 2 | service = "cloudresourcemanager.googleapis.com" 3 | 4 | timeouts { 5 | create = "30m" 6 | update = "40m" 7 | } 8 | 9 | disable_dependent_services = true 10 | } 11 | 12 | resource "google_project_service" "artifact_registry" { 13 | service = "artifactregistry.googleapis.com" 14 | 15 | timeouts { 16 | create = "30m" 17 | update = "40m" 18 | } 19 | 20 | disable_dependent_services = true 21 | depends_on = [google_project_service.cloud_resource_manager] 22 | } 23 | 24 | resource "google_project_service" "storage" { 25 | service = "storage.googleapis.com" 26 | 27 | timeouts { 28 | create = "30m" 29 | update = "40m" 30 | } 31 | 32 | disable_dependent_services = true 33 | } 34 | 35 | resource "google_project_service" "cloudfunctions" { 36 | service = "cloudfunctions.googleapis.com" 37 | 38 | timeouts { 39 | create = "30m" 40 | update = "40m" 41 | } 42 | 43 | disable_dependent_services = true 44 | } 45 | 46 | resource "google_project_service" "cloudbuild" { 47 | service = "cloudbuild.googleapis.com" 48 | 49 | timeouts { 50 | create = "30m" 51 | update = "40m" 52 | } 53 | 54 | disable_dependent_services = true 55 | } 56 | 57 | resource "google_project_service" "workflows" { 58 | service = "workflows.googleapis.com" 59 | 60 | timeouts { 61 | create = "30m" 62 | update = "40m" 63 | } 64 | 65 | disable_dependent_services = true 66 | } 67 | 68 | resource "google_project_service" "run" { 69 | service = "run.googleapis.com" 70 | 71 | timeouts { 72 | create = "30m" 73 | update = "40m" 74 | } 75 | 76 | disable_dependent_services = true 77 | } 78 | -------------------------------------------------------------------------------- /scripts/setup-mac.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | { 4 | # Install xcode cli tools if not found 5 | if [[ $(which xcode-select) = "xcode-select not installed" ]]; then 6 | echo "Xcode command line tools not found, installing now..." 7 | xcode-select --installed 8 | fi 9 | 10 | # Install Python 3.9.1 if not found, checks for OS X 10.9 or later and Intel vs. Apple Silicon 11 | if [[ $(which python3) = "python3 not installed" ]]; then 12 | echo "Python 3 not found, installing now..." 13 | 14 | sys_ver_os=$(system_profiler SPSoftwareDataType | grep "System Version:") 15 | if [[ sys_ver_os =~ (.*[macOS|OS X] [10\.9|10.1\d|11\.]) ]]; then 16 | echo "Mac OS 10.9 or later detected, installing Python 3.9.1" 17 | 18 | if [[ $(system_profiler SPHardwareDataType | grep "Processor Name:" ) =~ .*Intel.* ]]; then 19 | echo "Intel chip detected..." 20 | curl https://www.python.org/ftp/python/3.9.1/python-3.9.1-macosx10.9.pkg --output ~/Downloads/python-3.9.1-installer.pkg 21 | else 22 | echo "Apple Silicon detected..." 23 | curl https://www.python.org/ftp/python/3.9.1/python-3.9.1-macos11.0.pkg --output ~/Downloads/python-3.9.1-installer.pkg 24 | fi 25 | sudo installer -pkg ~/Downloads/python-3.9.1-installer.pkg -target / 26 | 27 | else 28 | echo "Your Mac OS version is too old, consider updating to 10.9 before installing python..." 29 | echo $sys_ver_os 30 | exit 1 31 | fi 32 | 33 | echo "Python 3.9.1 installed!" 34 | else 35 | echo "Python 3 installed!" 36 | fi 37 | 38 | if [[ $(which docker) = "docker not found" ]]; then 39 | echo "Docker not found, downloading Docker Desktop now..." 40 | curl https://desktop.docker.com/mac/stable/Docker.dmg --output ~/Downloads/docker-installer.dmg 41 | 42 | echo "Installing..." 43 | MOUNTDIR=$(echo `hdiutil mount ~/Downloads/docker-installer.dmg | tail -1 | awk '{$1=$2=""; print $0}'` | xargs -0 echo) 44 | cp -R "${MOUNTDIR}/Docker.app" "${MOUNTDIR}/Applications/Docker.app" 45 | 46 | echo "Cleaning up..." 47 | hdiutil unmount "${MOUNTDIR}" 48 | rm docker.dmg 49 | 50 | echo "Starting Docker, please walk through the setup steps to finish the installation..." 51 | open /Applications/Docker.app 52 | 53 | echo "Docker started! After finishing the install, run 'docker' in your terminal to ensure it's installed." 54 | else 55 | echo "Docker installed!" 56 | fi 57 | 58 | echo "Setup done, ready for you to install numerai-cli :)" 59 | } || { 60 | echo "Setup script failed, please include the following along with the error if you report this:" 61 | system_profiler SPSoftwareDataType 62 | system_profiler SPHardwareDataType 63 | } -------------------------------------------------------------------------------- /scripts/setup-ubu.sh: -------------------------------------------------------------------------------- 1 | #!bin/bash 2 | 3 | { 4 | apt update 5 | if [[ $(which python3) = "" ]]; then 6 | echo "Python 3 not found, installing with apt now..." 7 | apt install -y python3 python3-pip 8 | echo "Python 3 installed!" 9 | else 10 | echo "Python 3 installed!" 11 | fi 12 | 13 | if [[ $(which docker) = "" ]]; then 14 | echo "Docker not found, installing Docker for Ubuntu" 15 | apt remove -y docker docker-engine docker.io 16 | apt install -y \ 17 | systemd \ 18 | apt-transport-https \ 19 | ca-certificates \ 20 | curl \ 21 | gnupg-agent \ 22 | software-properties-common 23 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - 24 | 25 | add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 26 | 27 | apt update 28 | apt-cache policy docker-ce 29 | apt install -y docker-ce 30 | else 31 | echo "Docker installed!" 32 | fi 33 | 34 | echo "Setup done, ready for you to install numerai-cli." 35 | echo "If you encounter issues, include this in your support request:" 36 | lsb_release -a 37 | uname -a 38 | systemctl status docker 39 | which docker 40 | } || { 41 | echo "Setup script failed, please include the following along with the error if you report this:" 42 | lsb_release -a 43 | uname -a 44 | } -------------------------------------------------------------------------------- /scripts/setup-win10.ps1: -------------------------------------------------------------------------------- 1 | 2 | [CmdletBinding()] Param( 3 | $downloadsDir = "C:\Users\$env:UserName\Downloads", 4 | $installsDir = "C:\Program Files", 5 | $pythonVersion = "3.9.1", 6 | $pythonUrl = "https://www.python.org/ftp/python/$pythonVersion/python-$pythonVersion-amd64.exe", 7 | $pythonDownloadPath = "$downloadsDir\python-$pythonVersion-amd64-installer.exe", 8 | $pythonInstallDir = "$installsDir\Python$pythonVersion", 9 | $dockerUrl = "https://desktop.docker.com/win/stable/Docker%20Desktop%20Installer.exe", 10 | $dockerDownloadPath = "$downloadsDir\docker-installer.exe" 11 | ) 12 | try { 13 | if (!(Get-Command python -errorAction SilentlyContinue)) { 14 | echo "python not found, installing..." 15 | 16 | New-Item -ItemType Directory -Force -Path $pythonInstallDir 17 | 18 | (New-Object Net.WebClient).DownloadFile($pythonUrl, $pythonDownloadPath) 19 | & $pythonDownloadPath /quiet InstallAllUsers=1 PrependPath=1 Include_test=0 TargetDir=$pythonInstallDir 20 | if ($LASTEXITCODE -ne 0) { 21 | throw "The python installer at '$pythonDownloadPath' exited with error code '$LASTEXITCODE'" 22 | } 23 | # Set the PATH environment variable for the entire machine (that is, for all users) to include the Python install dir 24 | [Environment]::SetEnvironmentVariable("PATH", "${env:path};${pythonInstallDir}", "Machine") 25 | } 26 | echo "python installed!" 27 | 28 | if (!(Get-Command docker -errorAction SilentlyContinue)) { 29 | echo "docker not found, installing..." 30 | 31 | (New-Object Net.WebClient).DownloadFile($dockerUrl, $dockerDownloadPath) 32 | & $dockerDownloadPath 33 | } 34 | echo "docker installed!" 35 | 36 | echo "Installation locations:" 37 | Get-Command python -errorAction SilentlyContinue 38 | Get-Command docker -errorAction SilentlyContinue 39 | 40 | echo "Setup done, ready for you to install numerai-cli :)" 41 | } 42 | Catch [Exception] { 43 | echo "Setup script failed, please include the following along with the error if you report this:" 44 | Write-Host $_.Exception | format-list -force 45 | Get-ComputerInfo 46 | } 47 | 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from os import path 3 | 4 | # read the contents of the README file 5 | this_directory = path.abspath(path.dirname(__file__)) 6 | with open(path.join(this_directory, "README.md"), encoding="utf-8") as f: 7 | long_description = f.read() 8 | 9 | 10 | setup( 11 | name="numerai-cli", 12 | version="1.1.3", 13 | description="A library for deploying Numer.ai Prediction Nodes.", 14 | url="https://github.com/numerai/numerai-cli", 15 | author="Numer.ai", 16 | author_email="contact@numer.ai", 17 | license="MIT", 18 | packages=["numerai"], 19 | include_package_data=True, 20 | long_description=long_description, 21 | long_description_content_type="text/markdown", 22 | python_requires=">=3.6.0", 23 | install_requires=[ 24 | "click>=7", 25 | "boto3", 26 | "botocore", 27 | "numerapi>=2.4.5", 28 | "colorama", 29 | "requests", 30 | "azure-identity", 31 | "azure-mgmt-subscription", 32 | "azure-mgmt-containerregistry", 33 | "azure-containerregistry", 34 | "azure-data-tables", 35 | "azure-mgmt-storage", 36 | "google-cloud-storage", 37 | "google-cloud-run", 38 | "google-cloud-artifact-registry", 39 | "google-cloud-logging", 40 | ], 41 | entry_points={ 42 | "console_scripts": ["numerai=numerai:main"], 43 | }, 44 | ) 45 | --------------------------------------------------------------------------------