├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── PULL_REQUEST_TEMPLATE.md
├── mergeable.yml
└── workflows
│ ├── executor-tests.yml
│ └── unit-tests.yml
├── .gitignore
├── CONTRIBUTING.md
├── Development.md
├── LICENSE
├── README.md
├── Vagrantfile
├── cla
├── Project Cook_Corporate_Contributor_License_Agreement.docx
└── Project Cook_Individual_Contributor_License_Agreement.docx
├── cli
├── .cs.json
├── .gitignore
├── README.md
├── cook
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli.py
│ ├── configuration.py
│ ├── dateparser.py
│ ├── exceptions.py
│ ├── format.py
│ ├── http.py
│ ├── mesos.py
│ ├── metrics.py
│ ├── plugins.py
│ ├── progress.py
│ ├── querying.py
│ ├── subcommands
│ │ ├── __init__.py
│ │ ├── admin.py
│ │ ├── cat.py
│ │ ├── config.py
│ │ ├── jobs.py
│ │ ├── kill.py
│ │ ├── ls.py
│ │ ├── show.py
│ │ ├── ssh.py
│ │ ├── submit.py
│ │ ├── tail.py
│ │ ├── usage.py
│ │ └── wait.py
│ ├── terminal.py
│ ├── util.py
│ └── version.py
├── pytest.ini
├── setup.py
├── tests
│ └── subcommands
│ │ ├── __init__.py
│ │ ├── test_dateparser.py
│ │ └── test_querying.py
└── travis
│ └── setup.sh
├── cook.svg
├── dask
└── docs
│ └── design.md
├── executor
├── .dockerignore
├── .gitignore
├── Dockerfile.build
├── README.md
├── RELEASING.md
├── bin
│ ├── build-docker.sh
│ ├── build-local.sh
│ ├── check-version.sh
│ └── prepare-executor.sh
├── cook
│ ├── __init__.py
│ ├── __main__.py
│ ├── _version.py
│ ├── config.py
│ ├── executor.py
│ ├── io_helper.py
│ ├── progress.py
│ ├── subprocess.py
│ └── util.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_config.py
│ ├── test_executor.py
│ ├── test_progress.py
│ ├── test_subprocess.py
│ └── utils.py
└── travis
│ ├── run_tests.sh
│ └── setup.sh
├── integration
├── .dockerignore
├── .gitignore
├── Dockerfile
├── README.md
├── bin
│ ├── build-docker-image.sh
│ ├── only-run
│ └── run-integration.sh
├── requirements.txt
├── setup.cfg
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ └── cook
│ │ ├── __init__.py
│ │ ├── cli.py
│ │ ├── conftest.py
│ │ ├── mesos.py
│ │ ├── reasons.py
│ │ ├── test_basic.py
│ │ ├── test_cli.py
│ │ ├── test_cli_multi_cluster.py
│ │ ├── test_cli_subcommand_plugin.py
│ │ ├── test_client.py
│ │ ├── test_dynamic_clusters.py
│ │ ├── test_impersonation.py
│ │ ├── test_master_slave.py
│ │ ├── test_multi_cluster.py
│ │ ├── test_multi_user.py
│ │ └── util.py
└── travis
│ ├── prepare_integration.sh
│ ├── run_integration.sh
│ └── scheduler_travis_config.edn
├── jobclient
├── README.md
├── java
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ └── java
│ │ │ └── com
│ │ │ └── twosigma
│ │ │ └── cook
│ │ │ └── jobclient
│ │ │ ├── Application.java
│ │ │ ├── Checkpoint.java
│ │ │ ├── Disk.java
│ │ │ ├── Executor.java
│ │ │ ├── FetchableURI.java
│ │ │ ├── Group.java
│ │ │ ├── GroupListener.java
│ │ │ ├── HostPlacement.java
│ │ │ ├── Instance.java
│ │ │ ├── InstanceDecorator.java
│ │ │ ├── Job.java
│ │ │ ├── JobClient.java
│ │ │ ├── JobClientException.java
│ │ │ ├── JobClientInterface.java
│ │ │ ├── JobListener.java
│ │ │ ├── StragglerHandling.java
│ │ │ ├── auth
│ │ │ └── spnego
│ │ │ │ ├── BasicSPNegoSchemeFactory.java
│ │ │ │ └── GSSCredentialProvider.java
│ │ │ └── constraint
│ │ │ ├── Constraint.java
│ │ │ ├── Constraints.java
│ │ │ ├── OneToOneConstraint.java
│ │ │ └── Operator.java
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── twosigma
│ │ ├── ConstraintTest.java
│ │ └── cook
│ │ └── jobclient
│ │ ├── FetchableURITest.java
│ │ ├── GroupTest.java
│ │ ├── InstanceTest.java
│ │ ├── JobClientTest.java
│ │ └── JobTest.java
└── python
│ ├── README.md
│ ├── cookclient
│ ├── __init__.py
│ ├── containers.py
│ ├── instance.py
│ ├── jobs.py
│ └── util.py
│ ├── docs
│ ├── Makefile
│ ├── make.bat
│ └── source
│ │ ├── api.rst
│ │ ├── conf.py
│ │ ├── index.rst
│ │ └── usage.rst
│ ├── requirements.txt
│ ├── setup.py
│ └── tests
│ ├── test_instance.py
│ └── test_job.py
├── scheduler
├── .dockerignore
├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── README-k8s.md
├── README.adoc
├── api-only-config.edn
├── bin
│ ├── bootstrap
│ ├── build-docker-image.sh
│ ├── help-delete-temporary-clusters
│ ├── help-make-cluster
│ ├── make-gke-test-cluster
│ ├── make-gke-test-clusters
│ ├── priority-class-cook-workload.yaml
│ ├── priority-class-synthetic-pod.yaml
│ ├── run-docker.sh
│ ├── run-local-kubernetes.sh
│ ├── run-local.sh
│ ├── sample_launch.sh
│ ├── start-datomic.sh
│ └── submit-docker.sh
├── config-composite.edn
├── config-k8s.edn
├── config.edn
├── datomic
│ ├── data
│ │ ├── seed_k8s_pools.clj
│ │ └── seed_running_jobs.clj
│ ├── datomic-free-0.9.5561.56.zip
│ └── datomic_transactor.properties
├── dev-config.edn
├── docker
│ └── run-cook.sh
├── docs
│ ├── clj-http-async-pool.md
│ ├── concepts.md
│ ├── configuration.adoc
│ ├── dev-getting-started.md
│ ├── faq.md
│ ├── groups.md
│ ├── kubernetes-state.dot
│ ├── make-kubernetes-namespace.json
│ ├── metatransactions.md
│ ├── optimizer.md
│ ├── reason-code
│ ├── rebalancer-config.adoc
│ ├── scheduler-rest-api.adoc
│ └── simulator.md
├── example-prod-config.edn
├── java
│ └── com
│ │ ├── netflix
│ │ └── fenzo
│ │ │ └── SimpleAssignmentResult.java
│ │ └── twosigma
│ │ └── cook
│ │ └── kubernetes
│ │ ├── FinalizerHelper.java
│ │ ├── ParallelWatchQueue.java
│ │ ├── TokenRefreshingAuthenticator.java
│ │ └── WatchHelper.java
├── liquibase
│ ├── README.md
│ └── changelog
│ │ └── com
│ │ └── twosigma
│ │ └── cook
│ │ └── changelogs
│ │ └── setup.postgresql.sql
├── postgresql
│ ├── README.md
│ ├── bin
│ │ ├── make-launch-postgres-docker.sh
│ │ ├── setup-database.sh
│ │ ├── setup-new-schema.sh
│ │ ├── vagrant-setup-database.sh
│ │ └── vagrant-setup-new-schema.sh
│ └── sql
│ │ ├── docker_init_new_database.sql
│ │ ├── init_cook_database.sql
│ │ ├── insert_rows_for_opensource_integration_tests.sql
│ │ ├── reset_cook_database.sql
│ │ └── reset_init_cook_database.sql
├── project.clj
├── simulator_files
│ ├── analysis
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── analysis.ipynb
│ │ ├── analysis
│ │ │ └── __init__.py
│ │ ├── requirements_dev.txt
│ │ ├── setup.cfg
│ │ ├── setup.py
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ └── analysis
│ │ │ ├── __init__.py
│ │ │ └── test_basic.py
│ ├── example-config.edn
│ ├── example-hosts.json
│ ├── example-out-trace.csv
│ └── example-trace.json
├── src
│ ├── cook
│ │ ├── cache.clj
│ │ ├── cached_queries.clj
│ │ ├── caches.clj
│ │ ├── components.clj
│ │ ├── compute_cluster.clj
│ │ ├── compute_cluster
│ │ │ └── metrics.clj
│ │ ├── config.clj
│ │ ├── config_incremental.clj
│ │ ├── curator.clj
│ │ ├── datomic.clj
│ │ ├── group.clj
│ │ ├── kubernetes
│ │ │ ├── api.clj
│ │ │ ├── compute_cluster.clj
│ │ │ ├── controller.clj
│ │ │ └── metrics.clj
│ │ ├── log_structured.clj
│ │ ├── mesos.clj
│ │ ├── mesos
│ │ │ ├── heartbeat.clj
│ │ │ ├── mesos_compute_cluster.clj
│ │ │ ├── mesos_mock.clj
│ │ │ ├── reason.clj
│ │ │ ├── sandbox.clj
│ │ │ └── task.clj
│ │ ├── monitor.clj
│ │ ├── passport.clj
│ │ ├── plugins
│ │ │ ├── adjustment.clj
│ │ │ ├── completion.clj
│ │ │ ├── definitions.clj
│ │ │ ├── demo_plugin.clj
│ │ │ ├── file.clj
│ │ │ ├── job_submission_modifier.clj
│ │ │ ├── launch.clj
│ │ │ ├── pool.clj
│ │ │ ├── pool_mover.clj
│ │ │ ├── submission.clj
│ │ │ └── util.clj
│ │ ├── pool.clj
│ │ ├── postgres.clj
│ │ ├── progress.clj
│ │ ├── prometheus_metrics.clj
│ │ ├── queries.clj
│ │ ├── queue_limit.clj
│ │ ├── quota.clj
│ │ ├── rate_limit.clj
│ │ ├── rate_limit
│ │ │ ├── generic.clj
│ │ │ └── token_bucket_filter.clj
│ │ ├── rebalancer.clj
│ │ ├── regexp_tools.clj
│ │ ├── reporter.clj
│ │ ├── rest
│ │ │ ├── api.clj
│ │ │ ├── authorization.clj
│ │ │ ├── basic_auth.clj
│ │ │ ├── cors.clj
│ │ │ ├── impersonation.clj
│ │ │ ├── secret.clj
│ │ │ └── spnego.clj
│ │ ├── scheduler
│ │ │ ├── constraints.clj
│ │ │ ├── dru.clj
│ │ │ ├── fenzo_utils.clj
│ │ │ ├── offer.clj
│ │ │ ├── optimizer.clj
│ │ │ ├── scheduler.clj
│ │ │ └── share.clj
│ │ ├── schema.clj
│ │ ├── scratch.clj
│ │ ├── task.clj
│ │ ├── task_stats.clj
│ │ ├── test
│ │ │ ├── postgres.clj
│ │ │ └── testutil.clj
│ │ ├── tools.clj
│ │ ├── unscheduled.clj
│ │ └── util.clj
│ ├── fork
│ │ └── metrics_clojure
│ │ │ ├── LICENSE.markdown
│ │ │ ├── README.txt
│ │ │ └── metrics
│ │ │ └── jvm
│ │ │ └── core.clj
│ └── metatransaction
│ │ ├── core.clj
│ │ └── utils.clj
├── test-resources
│ └── log4j.properties
├── test
│ ├── cook
│ │ └── test
│ │ │ ├── benchmark.clj
│ │ │ ├── cache.clj
│ │ │ ├── components.clj
│ │ │ ├── compute_cluster.clj
│ │ │ ├── config.clj
│ │ │ ├── config_incremental.clj
│ │ │ ├── group.clj
│ │ │ ├── jobclient
│ │ │ └── jobclient.clj
│ │ │ ├── kubernetes
│ │ │ ├── api.clj
│ │ │ ├── compute_cluster.clj
│ │ │ └── controller.clj
│ │ │ ├── log_structured.clj
│ │ │ ├── mesos.clj
│ │ │ ├── mesos
│ │ │ ├── heartbeat.clj
│ │ │ ├── mesos_compute_cluster.clj
│ │ │ ├── mesos_mock.clj
│ │ │ ├── reason.clj
│ │ │ ├── sandbox.clj
│ │ │ └── task.clj
│ │ │ ├── monitor.clj
│ │ │ ├── plugins.clj
│ │ │ ├── plugins
│ │ │ ├── job_submission_modifier.clj
│ │ │ ├── pool.clj
│ │ │ └── submission.clj
│ │ │ ├── pool.clj
│ │ │ ├── progress.clj
│ │ │ ├── queue_limit.clj
│ │ │ ├── quota.clj
│ │ │ ├── rate_limit
│ │ │ ├── generic.clj
│ │ │ └── token_bucket_filter.clj
│ │ │ ├── rebalancer.clj
│ │ │ ├── regexp_tools.clj
│ │ │ ├── rest
│ │ │ ├── api.clj
│ │ │ ├── authorization.clj
│ │ │ ├── basic_auth.clj
│ │ │ ├── cors.clj
│ │ │ └── impersonation.clj
│ │ │ ├── scheduler
│ │ │ ├── constraints.clj
│ │ │ ├── dru.clj
│ │ │ ├── fenzo_utils.clj
│ │ │ ├── optimizer.clj
│ │ │ ├── scheduler.clj
│ │ │ └── share.clj
│ │ │ ├── schema.clj
│ │ │ ├── task.clj
│ │ │ ├── tools.clj
│ │ │ ├── unscheduled.clj
│ │ │ ├── util.clj
│ │ │ └── zz_simulator.clj
│ └── metatransaction
│ │ ├── core_test.clj
│ │ └── utils_test.clj
└── travis
│ └── setup.sh
├── sidecar
├── .dockerignore
├── .gitignore
├── Dockerfile
├── README.md
├── cook
│ ├── __init__.py
│ └── sidecar
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── config.py
│ │ ├── exit_sentinel.py
│ │ ├── file_server.py
│ │ ├── progress.py
│ │ ├── tracker.py
│ │ ├── util.py
│ │ └── version.py
└── setup.py
├── simulator
├── README.md
├── config
│ ├── larger_cluster_simulation.edn
│ └── settings.edn
├── doc
│ └── development.md
├── project.clj
├── resources
│ └── job_schedule.edn
├── src
│ ├── dev
│ │ └── cook
│ │ │ └── sim
│ │ │ └── repl.clj
│ └── main
│ │ └── cook
│ │ └── sim
│ │ ├── cli.clj
│ │ ├── database.clj
│ │ ├── reporting.clj
│ │ ├── reporting
│ │ └── groups.clj
│ │ ├── runner.clj
│ │ ├── schedule.clj
│ │ ├── system.clj
│ │ ├── travis.clj
│ │ └── util.clj
└── travis
│ ├── prepare_simulation.sh
│ ├── run_simulation.sh
│ ├── scheduler_config.edn
│ └── simulator_config.edn
├── spark
├── 0001-Add-cook-support-for-spark-v1.5.0.patch
├── 0001-Add-cook-support-for-spark-v1.6.1.patch
└── README.md
└── travis
├── build_cook_executor.sh
├── gdrive_upload
├── install_mesos.sh
├── minimesos
├── minimesosFile
├── prepare.sh
├── show_executor_logs.sh
├── show_scheduler_logs.sh
├── start_scheduler.sh
└── upload_logs.sh
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 |
5 | ---
6 |
7 | **Describe the bug**
8 | A clear and concise description of what the bug is.
9 |
10 | **To Reproduce**
11 | Steps to reproduce the behavior:
12 | 1. Go to '...'
13 | 2. Click on '....'
14 | 3. Scroll down to '....'
15 | 4. See error
16 |
17 | **Expected behavior**
18 | A clear and concise description of what you expected to happen.
19 |
20 | **Screenshots**
21 | If applicable, add screenshots to help explain your problem.
22 |
23 | **Additional context**
24 | Add any other context about the problem here.
25 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 |
5 | ---
6 |
7 | **Is your feature request related to a problem? Please describe.**
8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
9 |
10 | **Describe the solution you'd like**
11 | A clear and concise description of what you want to happen.
12 |
13 | **Describe alternatives you've considered**
14 | A clear and concise description of any alternative solutions or features you've considered.
15 |
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Changes proposed in this PR
2 |
3 | -
4 | -
5 | -
6 |
7 | ## Why are we making these changes?
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.github/mergeable.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | mergeable:
3 | - when: pull_request.*
4 | validate:
5 | - do: label
6 | must_include:
7 | regex: '^internal-green$'
8 | must_exclude:
9 | regex: '^wip$'
10 |
--------------------------------------------------------------------------------
/.github/workflows/executor-tests.yml:
--------------------------------------------------------------------------------
1 | name: Cook Executor tests
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - master
7 | push:
8 | branches:
9 | - master
10 | - 'build**'
11 | - kubernetes_integration
12 | schedule:
13 | - cron: '0 0 * * *'
14 |
15 | jobs:
16 | test:
17 | runs-on: ubuntu-20.04
18 | env:
19 | PYTEST_ADDOPTS: --color=yes
20 | MESOS_NATIVE_JAVA_LIBRARY: /usr/lib/libmesos.so
21 | CLJ_HTTP_ASYNC_POOL_TEST_DURATION_MULTIPLIER: 5
22 | GDRIVE_LOG_POST_URL: https://script.google.com/macros/s/AKfycbxOB55OzrQSbpZO_0gzsxZaJ8LaUWWo3PDLNc-gCiMN1iObxu7x/exec
23 | steps:
24 | - uses: actions/checkout@v2
25 | - name: Set up JDK 11
26 | uses: actions/setup-java@v1
27 | with:
28 | java-version: '11'
29 | - name: Cache Maven packages
30 | uses: actions/cache@v2
31 | with:
32 | path: ~/.m2
33 | key: ${{ runner.os }}-m2-${{ hashFiles('**/project.clj') }}
34 | restore-keys: ${{ runner.os }}-m2
35 | - name: Set up Python
36 | uses: actions/setup-python@v2
37 | with:
38 | python-version: '3.6.x'
39 | - name: Cache pip
40 | uses: actions/cache@v2
41 | with:
42 | path: ~/.cache/pip
43 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
44 | restore-keys: |
45 | ${{ runner.os }}-pip-
46 | - name: Setup tests
47 | run: cd executor && ./travis/setup.sh && env
48 | - name: Run tests
49 | run: env && cd executor && ./travis/run_tests.sh
50 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.iml
3 | *.jar
4 | *.swo
5 | *.swp
6 | *~
7 | *.pyc
8 | .DS_Store
9 | .idea/
10 | .vscode/
11 | .kill_lein
12 | .lein-cljsbuild-compiler*
13 | .lein-deps-sum
14 | .lein-failures
15 | .lein-plugins
16 | .lein-repl-history
17 | .minimesos/
18 | .nrepl-port
19 | .vim_jack_in
20 | /checkouts
21 | /classes
22 | /lib
23 | /log
24 | /target
25 | __pycache__/
26 | gclog*
27 | jobclient/src/cfg
28 | pom.xml
29 | pom.xml.asc
30 | scheduler/checkouts
31 | scheduler/log/
32 | scheduler/resources/public/cook-executor*
33 | scheduler/src/cfg
34 | scheduler/trace*.csv
35 | scheduler/.cook_kubeconfig_*
36 | scheduler/cook.p12
37 | scheduler/datomic/datomic-free-0.9.5561.56/
38 | src/cfg/current.clj
39 | target
40 | test-log
41 | *.orig
42 | venv
43 | dist
44 | .vagrant/
45 | jobclient/python/docs/build/
46 | *.egg-info/
47 | .clj-kondo/
48 | .lsp/
49 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Contributing
2 |
3 | In order to accept your code contributions, please fill out the appropriate Contributor License Agreement in the `cla` folder and submit it to tsos@twosigma.com.
4 |
5 | In your pull request, add a line in the [changelog](CHANGELOG.md) under "unreleased" describing your change.
6 |
--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
1 | Vagrant.configure("2") do |config|
2 | config.vm.box = "hashicorp/bionic64"
3 | config.vm.network "forwarded_port", guest: 5432, host: 5432
4 | config.vm.network "forwarded_port", guest: 12321, host: 12321
5 | config.vm.provider "virtualbox" do |v|
6 | v.memory = 6144
7 | v.cpus = 2
8 | end
9 |
10 | # This runs as root:
11 | config.vm.provision "bootstrap_as_root", type: "shell", path: "scheduler/bin/bootstrap", env: {
12 | "PGPASSWORD" => ENV["PGPASSWORD"],
13 | "GKE_CLUSTER_OWNER" => ENV["USER"],
14 | "GCP_PROJECT_NAME" => ENV["GCP_PROJECT_NAME"]}
15 |
16 | # This runs as vagrant:
17 | $script = <<-SCRIPT
18 | repo_root=/vagrant
19 | bashrc=$HOME/.bashrc
20 |
21 | # Cook java jobclient setup
22 | cd $repo_root/jobclient/java || exit 1
23 | mvn install -DskipTests
24 |
25 | # Python setup
26 | pip3 install --upgrade pip
27 | pip3 install --upgrade setuptools
28 | pip3 install --upgrade wheel
29 | pip3 install --upgrade virtualenv
30 | cd $repo_root || exit 1
31 | venv=$repo_root/venv
32 | rm -rf $venv
33 | $HOME/.local/bin/virtualenv venv --python=python3.6
34 | source $venv/bin/activate
35 | echo "source $venv/bin/activate" | tee -a $bashrc
36 | export PATH=$venv/bin:$PATH
37 | echo 'export PATH='$venv'/bin:$PATH' | tee -a $bashrc
38 |
39 | # Integration tests setup
40 | echo "export COOK_TEST_DOCKER_IMAGE=gcr.io/google-containers/alpine-with-bash:1.0" | tee -a $bashrc
41 | echo "export COOK_TEST_DOCKER_WORKING_DIRECTORY=/mnt/sandbox" | tee -a $bashrc
42 | echo "export COOK_TEST_DISALLOW_POOLS_REGEX='(?!^k8s-(alpha)$)'" | tee -a $bashrc
43 | echo "export COOK_TEST_DEFAULT_SUBMIT_POOL=k8s-alpha" | tee -a $bashrc
44 | echo "export COOK_TEST_COMPUTE_CLUSTER_TYPE=kubernetes" | tee -a $bashrc
45 | echo "export COOK_TEST_DEFAULT_TIMEOUT_MS=480000" | tee -a $bashrc
46 | echo "export COOK_TEST_DEFAULT_WAIT_INTERVAL_MS=8000" | tee -a $bashrc
47 | cd $repo_root/integration || exit 1
48 | pip3 install -r requirements.txt
49 |
50 | # Cook Scheduler CLI setup
51 | cli=$repo_root/cli
52 | cd $cli || exit 1
53 | pip3 install -e .
54 | rm -f $HOME/.cs.json
55 | ln -s $cli/.cs.json $HOME/.cs.json
56 |
57 | sudo service postgresql restart
58 | SCRIPT
59 | config.vm.provision "bootstrap_as_vagrant", type: "shell", inline: $script, privileged: false
60 | end
61 |
--------------------------------------------------------------------------------
/cla/Project Cook_Corporate_Contributor_License_Agreement.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/cla/Project Cook_Corporate_Contributor_License_Agreement.docx
--------------------------------------------------------------------------------
/cla/Project Cook_Individual_Contributor_License_Agreement.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/cla/Project Cook_Individual_Contributor_License_Agreement.docx
--------------------------------------------------------------------------------
/cli/.cs.json:
--------------------------------------------------------------------------------
1 | {
2 | "defaults": {
3 | "submit": {
4 | "mem": 128,
5 | "cpus": 1,
6 | "max-retries": 1,
7 | "cluster": "dev1"
8 | }
9 | },
10 | "clusters": [
11 | {
12 | "name": "dev0",
13 | "url": "http://127.0.0.1:12321/",
14 | "disabled": false
15 | },
16 | {
17 | "name": "dev1",
18 | "url": "http://127.0.0.1:22321/",
19 | "disabled": true
20 | }
21 | ],
22 | "metrics": {
23 | "disabled": true,
24 | "host": "localhost",
25 | "port": 8125,
26 | "line-formats": {
27 | "count": "{namespace}.{name}:{value}|c"
28 | }
29 | }
30 | }
--------------------------------------------------------------------------------
/cli/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 | *.spec
4 | build
5 | dist
6 | venv
7 | Pipfile
8 | virtualenv*
9 |
--------------------------------------------------------------------------------
/cli/cook/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/cli/cook/__init__.py
--------------------------------------------------------------------------------
/cli/cook/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Module implementing a CLI for the Cook scheduler API. """
3 |
4 | import logging
5 | import signal
6 | import sys
7 |
8 | from cook import util
9 | from cook.cli import run
10 | from cook.util import print_error
11 |
12 |
13 | def main(args=None, plugins={}):
14 | if args is None:
15 | args = sys.argv[1:]
16 |
17 | try:
18 | result = run(args, plugins)
19 | sys.exit(result)
20 | except Exception as e:
21 | logging.exception('exception when running with %s' % args)
22 | print_error(str(e))
23 | sys.exit(1)
24 |
25 |
26 | def sigint_handler(_, __):
27 | """
28 | Sets util.quit_running to True (which is read by other
29 | threads to determine when to stop), and then exits.
30 | """
31 | util.quit_running = True
32 | print('Exiting...')
33 | sys.exit(0)
34 |
35 |
36 | signal.signal(signal.SIGINT, sigint_handler)
37 |
38 | if __name__ == '__main__':
39 | main()
40 |
--------------------------------------------------------------------------------
/cli/cook/dateparser.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import logging
3 |
4 | PATTERN_TO_TIMEDELTA_FN = (
5 | (r'^(\d+) sec(?:ond)?s? ago$', lambda x: datetime.timedelta(seconds=x)),
6 | (r'^(\d+) min(?:ute)?s? ago$', lambda x: datetime.timedelta(minutes=x)),
7 | (r'^(\d+) hours? ago$', lambda x: datetime.timedelta(hours=x)),
8 | (r'^(\d+) days? ago$', lambda x: datetime.timedelta(days=x)),
9 | (r'^(\d+) weeks? ago$', lambda x: datetime.timedelta(weeks=x))
10 | )
11 |
12 |
13 | def parse(date_time_string, time_zone):
14 | """
15 | Parses the given date_time_string and constructs a datetime object.
16 | Accepts strings in the following formats, where x is any integer:
17 |
18 | - now
19 | - today
20 | - yesterday
21 | - x seconds ago
22 | - x minutes ago
23 | - x hours ago
24 | - x days ago
25 | - x weeks ago
26 | - any format supported by dateutil's parser
27 |
28 | Why did we roll our own datetime parsing function?
29 | The existing libraries that do this sort of parsing also provide
30 | additional features such as multi-language support which:
31 |
32 | - add complexity we don't want
33 | - slow them down
34 | - make pyinstaller compatibility hard or impossible
35 | """
36 | date_time_string = date_time_string.strip()
37 | date_time_string_lower = date_time_string.lower()
38 | now = datetime.datetime.now(tz=time_zone)
39 |
40 | if date_time_string_lower in ('now', 'today'):
41 | return now
42 |
43 | if date_time_string_lower == 'yesterday':
44 | return now - datetime.timedelta(days=1)
45 |
46 | import re
47 | for pattern, timedelta_fn in PATTERN_TO_TIMEDELTA_FN:
48 | match = re.match(pattern, date_time_string_lower)
49 | if match:
50 | return now - timedelta_fn(int(match.groups()[0]))
51 |
52 | try:
53 | from dateutil import parser
54 | dt = parser.parse(date_time_string, ignoretz=False)
55 | if dt:
56 | if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
57 | dt = time_zone.localize(dt)
58 | return dt
59 | except ValueError as ve:
60 | logging.exception(ve)
61 |
62 | return None
63 |
--------------------------------------------------------------------------------
/cli/cook/exceptions.py:
--------------------------------------------------------------------------------
1 | class CookRetriableException(Exception):
2 | pass
3 |
--------------------------------------------------------------------------------
/cli/cook/format.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import humanfriendly
4 |
5 | from cook import terminal
6 | from cook.util import millis_to_timedelta, millis_to_date_string
7 |
8 |
9 | def format_dict(d):
10 | """Formats the given dictionary for display in a table"""
11 | return ' '.join(['%s=%s' % (k, v) for k, v in sorted(d.items())]) if len(d) > 0 else '(empty)'
12 |
13 |
14 | def format_list(l):
15 | """Formats the given list for display in a table"""
16 | return '; '.join([format_dict(x) if isinstance(x, dict) else str(x) for x in l]) if len(l) > 0 else '(empty)'
17 |
18 |
19 | def format_state(state):
20 | """Capitalizes and colorizes the given state"""
21 | state = state.capitalize()
22 | if state == 'Running':
23 | text = terminal.running(state)
24 | elif state == 'Waiting':
25 | text = terminal.waiting(state)
26 | elif state == 'Failed':
27 | text = terminal.failed(state)
28 | elif state == 'Success':
29 | text = terminal.success(state)
30 | else:
31 | text = state
32 | return text
33 |
34 |
35 | def format_instance_status(instance):
36 | """Formats the instance status field"""
37 | status_text = format_state(instance['status'])
38 |
39 | if 'reason_string' in instance:
40 | reason_text = f' ({terminal.reason(instance["reason_string"])})'
41 | else:
42 | reason_text = ''
43 |
44 | if 'progress' in instance and instance['progress'] > 0:
45 | if 'progress_message' in instance:
46 | progress_text = f' ({instance["progress"]}% {terminal.bold(instance["progress_message"])})'
47 | else:
48 | progress_text = f' ({instance["progress"]}%)'
49 | else:
50 | progress_text = ''
51 |
52 | return f'{status_text}{reason_text}{progress_text}'
53 |
54 |
55 | def format_instance_run_time(instance):
56 | """Formats the instance run time field"""
57 | if 'end_time' in instance:
58 | end = instance['end_time']
59 | else:
60 | end = int(round(time.time() * 1000))
61 | run_time = millis_to_timedelta(end - instance['start_time'])
62 | return '%s (started %s)' % (run_time, millis_to_date_string(instance['start_time']))
63 |
64 |
65 | def format_job_status(job):
66 | """Formats the job status field"""
67 | return format_state(job['state'])
68 |
69 |
70 | def format_memory_amount(mebibytes):
71 | """Formats an amount, in MiB, to be human-readable"""
72 | return humanfriendly.format_size(mebibytes * 1024 * 1024, binary=True)
73 |
74 |
75 | def format_job_memory(job):
76 | """Formats the job memory field"""
77 | return format_memory_amount(job['mem'])
78 |
79 |
80 | def format_job_attempts(job):
81 | """Formats the job attempts field (e.g. 2 / 5)"""
82 | return '%s / %s' % (job['max_retries'] - job['retries_remaining'], job['max_retries'])
83 |
--------------------------------------------------------------------------------
/cli/cook/plugins.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | __plugins = {}
4 |
5 | class SubCommandPlugin:
6 | """Base class to implement custom subcommands."""
7 |
8 | def __init__(self):
9 | pass
10 |
11 | def register(self, add_parser, add_defaults):
12 | """Register this subcommand with argparse.
13 |
14 | Must be implemented by the subclass extending SubCommandPlugin.
15 | """
16 | raise NotImplementedError
17 |
18 | def run(self, clusters, args, config_path):
19 | """Run the subcommand.
20 |
21 | Must be implemented by the subclass extending SubCommandPlugin.
22 | """
23 | raise NotImplementedError
24 |
25 | def name():
26 | """Return the shortname of the subcommand.
27 |
28 | This shortname is used to register this subcommand in the list
29 | of supported actions. It cannot clash with an existing core
30 | subcommand or other plugin based subcommands.
31 |
32 | Must be implemented by the subclass extended SubCommandPlugin.
33 | """
34 | raise NotImplementedError
35 |
36 | def configure(plugins):
37 | """Configures global plugins to the plugins map"""
38 | global __plugins
39 | __plugins = plugins
40 | logging.debug('plugins: %s', __plugins)
41 |
42 |
43 | def get_fn(plugin_name, default_fn):
44 | """Returns the plugin function corresponding to the given plugin name if found, otherwise, default_fn"""
45 | return __plugins.get(plugin_name, default_fn)
46 |
--------------------------------------------------------------------------------
/cli/cook/progress.py:
--------------------------------------------------------------------------------
1 | import threading
2 |
3 | from cook import terminal
4 | from cook.util import print_info
5 |
6 | data = []
7 | lock = threading.Lock()
8 |
9 |
10 | def __print_state(lines_to_move_up):
11 | """
12 | "Refreshes" the state on the terminal by moving the cursor up
13 | lines_to_move_up lines and then printing the current state of the data
14 | list, which contains [item, status] pairs.
15 | """
16 | print_info(terminal.MOVE_UP * lines_to_move_up, end='')
17 | print_info('\n'.join([f'{item} ... {state}' for [item, state] in data]))
18 |
19 |
20 | def add(item):
21 | """
22 | Adds a new item (with blank status) and prints the new state.
23 | """
24 | with lock:
25 | index = len(data)
26 | data.append([item, ''])
27 | __print_state(index)
28 | return index
29 |
30 |
31 | def update(index, status):
32 | """
33 | Updates the status of the item with the given index and prints the new state.
34 | """
35 | with lock:
36 | data[index][1] = status
37 | __print_state(len(data))
38 |
--------------------------------------------------------------------------------
/cli/cook/subcommands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/cli/cook/subcommands/__init__.py
--------------------------------------------------------------------------------
/cli/cook/subcommands/wait.py:
--------------------------------------------------------------------------------
1 | from cook.querying import print_no_data, parse_entity_refs, query_with_stdin_support
2 | from cook.util import print_info, seconds_to_timedelta, guard_no_cluster
3 |
4 |
5 | def all_jobs_completed(jobs):
6 | """Returns jobs if they are all completed, otherwise False."""
7 | if all(j.get('status') == 'completed' for j in jobs):
8 | return jobs
9 | else:
10 | return False
11 |
12 |
13 | def all_instances_completed(instances):
14 | """Returns instances if they are all completed, otherwise False."""
15 | if all(i.get('status') == 'completed' for i in instances):
16 | return instances
17 | else:
18 | return False
19 |
20 |
21 | def all_groups_completed(groups):
22 | """Returns groups if they are all completed, otherwise False."""
23 | if all(len(g.get('jobs')) == g.get('completed') for g in groups):
24 | return groups
25 | else:
26 | return False
27 |
28 |
29 | def wait(clusters, args, _):
30 | """Waits for jobs / instances / groups with the given UUIDs to complete."""
31 | guard_no_cluster(clusters)
32 | timeout = args.get('timeout')
33 | interval = args.get('interval')
34 | entity_refs, _ = parse_entity_refs(clusters, args.get('uuid'))
35 | timeout_text = ('up to %s' % seconds_to_timedelta(timeout)) if timeout else 'indefinitely'
36 | print_info('Will wait %s.' % timeout_text)
37 | query_result, clusters_of_interest = query_with_stdin_support(clusters, entity_refs, all_jobs_completed,
38 | all_instances_completed, all_groups_completed,
39 | timeout, interval)
40 | if query_result['count'] > 0:
41 | return 0
42 | else:
43 | print_no_data(clusters_of_interest)
44 | return 1
45 |
46 |
47 | def register(add_parser, add_defaults):
48 | """Adds this sub-command's parser and returns the action function"""
49 | default_timeout = None
50 | default_timeout_text = 'wait indefinitely'
51 | default_interval = 15
52 | wait_parser = add_parser('wait', help='wait for jobs / instances / groups to complete by uuid')
53 | wait_parser.add_argument('uuid', nargs='*')
54 | wait_parser.add_argument('--timeout', '-t',
55 | help=f'maximum time (in seconds) to wait (default = {default_timeout_text})', type=int)
56 | wait_parser.add_argument('--interval', '-i',
57 | help=f'time (in seconds) to wait between polling (default = {default_interval})', type=int)
58 |
59 | add_defaults('wait', {'timeout': default_timeout, 'interval': default_interval})
60 |
61 | return wait
62 |
--------------------------------------------------------------------------------
/cli/cook/terminal.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import textwrap
5 |
6 |
7 | MOVE_UP = '\033[F'
8 |
9 |
10 | class Color:
11 | PURPLE = '\033[95m'
12 | CYAN = '\033[96m'
13 | DARKCYAN = '\033[36m'
14 | BLUE = '\033[94m'
15 | GREEN = '\033[92m'
16 | YELLOW = '\033[93m'
17 | RED = '\033[91m'
18 | BOLD = '\033[1m'
19 | UNDERLINE = '\033[4m'
20 | END = '\033[0m'
21 |
22 |
23 | def failed(s):
24 | return colorize(s, Color.BOLD + Color.RED)
25 |
26 |
27 | def success(s):
28 | return colorize(s, Color.GREEN)
29 |
30 |
31 | def running(s):
32 | return colorize(s, Color.CYAN)
33 |
34 |
35 | def waiting(s):
36 | return colorize(s, Color.YELLOW)
37 |
38 |
39 | def reason(s):
40 | return colorize(s, Color.RED)
41 |
42 |
43 | def bold(s):
44 | return colorize(s, Color.BOLD)
45 |
46 |
47 | wrap = textwrap.wrap
48 |
49 |
50 | def colorize(s, color):
51 | """Formats the given string with the given color"""
52 | return color + s + Color.END if tty() else s
53 |
54 |
55 | def __ls_color(s, code, fallback_fn):
56 | """
57 | Parses the LS_COLORS environment variable to get consistent colors with the
58 | user's current setup, falling back to default formatting if the parsing fails
59 | """
60 | if tty() and 'LS_COLORS' in os.environ:
61 | split_pairs = [p.split('=') for p in os.environ['LS_COLORS'].split(':')]
62 | matched_pairs = [p for p in split_pairs if len(p) == 2 and p[0] == code]
63 | if len(matched_pairs) > 0:
64 | return f'\033[{matched_pairs[0][1]}m{s}\033[0;0m'
65 |
66 | return fallback_fn(s)
67 |
68 |
69 | def tty():
70 | """Returns true if running in a real terminal (as opposed to being piped or redirected)"""
71 | return sys.stdout.isatty()
72 |
73 |
74 | def directory(s):
75 | """Attempts to use the "di" entry in LS_COLORS, falling back to cyan"""
76 | return __ls_color(s, 'di', lambda t: colorize(t, Color.CYAN))
77 |
78 |
79 | def executable(s):
80 | """Attempts to use the "ex" entry in LS_COLORS, falling back to green"""
81 | return __ls_color(s, 'ex', lambda t: colorize(t, Color.GREEN))
82 |
--------------------------------------------------------------------------------
/cli/cook/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '3.9.5'
2 |
--------------------------------------------------------------------------------
/cli/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 | cli: Cook CLI tests
4 |
--------------------------------------------------------------------------------
/cli/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from setuptools import setup
4 |
5 | from cook import version
6 |
7 | requirements = [
8 | 'arrow',
9 | 'blessed',
10 | 'humanfriendly',
11 | 'python-dateutil>=2.8.1',
12 | 'pytz',
13 | 'requests',
14 | 'tabulate',
15 | 'tenacity',
16 | 'tzlocal',
17 | ]
18 |
19 | test_requirements = [
20 | 'freezegun',
21 | 'pytest',
22 | 'requests-mock',
23 | ]
24 |
25 | setup(
26 | name='cook_client',
27 | version=version.VERSION,
28 | description="Two Sigma's Cook CLI",
29 | long_description="This package contains Two Sigma's Cook Scheduler command line interface, cs. cs allows you to "
30 | "submit jobs and view jobs, job instances, and job groups across multiple Cook clusters.",
31 | packages=['cook', 'cook.subcommands'],
32 | entry_points={'console_scripts': ['cs = cook.__main__:main']},
33 | install_requires=requirements,
34 | tests_require=test_requirements
35 | )
36 |
--------------------------------------------------------------------------------
/cli/tests/subcommands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/cli/tests/subcommands/__init__.py
--------------------------------------------------------------------------------
/cli/tests/subcommands/test_querying.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import unittest
3 | import uuid
4 |
5 | import pytest
6 | import requests
7 | import requests.adapters
8 | import requests_mock
9 | from cook import http
10 |
11 | from cook.querying import query_cluster, make_job_request
12 |
13 |
14 | @pytest.mark.cli
15 | class CookCliTest(unittest.TestCase):
16 | _multiprocess_can_split_ = True
17 |
18 | def setUp(self):
19 | self.logger = logging.getLogger(__name__)
20 |
21 | def test_query_cluster_should_gracefully_handle_json_parsing_failures(self):
22 | http_plugins = {
23 | 'http-adapter-factory': requests.adapters.HTTPAdapter,
24 | 'http-session-factory': requests.Session,
25 | }
26 | http.configure(config={}, plugins=http_plugins)
27 | cluster = {'url': 'http://localhost'}
28 | uuids = [uuid.uuid4()]
29 | with requests_mock.mock() as m:
30 | m.get('http://localhost/rawscheduler', text='this is not json')
31 | self.assertEqual([], query_cluster(cluster, uuids, None, None, None, make_job_request, 'job'))
32 |
--------------------------------------------------------------------------------
/cli/travis/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cli_dir="$(dirname "$( dirname "${BASH_SOURCE[0]}" )" )"
4 | cd "$cli_dir"
5 |
6 | # Don't use --user in virtualenv
7 | if [[ "$(pip -V)" != *${HOME}* ]]; then
8 | pip_flags=--user
9 | else
10 | pip_flags=
11 | fi
12 |
13 | # Parse dependencies from setup.py
14 | dependencies="$(sed -nE "s/^\\s+'([^']+)',\$/\\1/p" < setup.py)"
15 |
16 | pip install $pip_flags $dependencies
17 |
--------------------------------------------------------------------------------
/executor/.dockerignore:
--------------------------------------------------------------------------------
1 | .cache
2 | .eggs
3 | .idea
4 | bin
5 | build
6 | cook_executor.egg-info
7 | cook-executor.spec
8 | dist
9 | executor.iml
10 | help.spec
11 | tests
12 | travis
13 |
--------------------------------------------------------------------------------
/executor/.gitignore:
--------------------------------------------------------------------------------
1 | # JetBrains IDE files
2 | .idea
3 | *.iml
4 |
5 | # Python setup
6 | build
7 | dist
8 | .cache
9 | .eggs
10 | __pycache__
11 | *.egg-info
12 | *.spec
13 |
14 | # Log file
15 | executor.log
16 |
17 |
18 | # virtualenv's
19 | virtualenv*
20 | venv*
21 |
--------------------------------------------------------------------------------
/executor/Dockerfile.build:
--------------------------------------------------------------------------------
1 | FROM python:3.5.9-stretch
2 |
3 | RUN pip install pyinstaller==3.3
4 |
5 | RUN mkdir /opt/cook
6 | WORKDIR /opt/cook
7 |
8 | COPY requirements.txt /opt/cook/
9 | RUN pip install -r requirements.txt
10 |
11 | COPY . /opt/cook
12 |
13 | # Create a one-folder bundle containing an executable (instead of using the one-file version).
14 | # Allows us to avoid the extraction to a temporary folder needed by the PyInstaller bootloader.
15 | CMD ["pyinstaller", "--onedir", "--name", "cook-executor", "--paths", "cook", "cook/__main__.py"]
16 |
--------------------------------------------------------------------------------
/executor/RELEASING.md:
--------------------------------------------------------------------------------
1 | Releasing Cook Executor
2 | =======================
3 |
4 | Cook Executor is released on [PyPI](https://pypi.org/project/cook-executor/)
5 |
6 | Prerequisites
7 | -------------
8 | Ensure you can build the executor followng the instructions in README.md
9 |
10 | Install `twine`:
11 | ```bash
12 | pip3 install twine
13 | ```
14 |
15 | Test Release
16 | ------------
17 | Since PyPI does not allow modifying releases, it can be useful to test a release using their test instance.
18 | ```bash
19 | rm -rf dist/*
20 | python3 setup.py sdist bdist_wheel
21 | python3 -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*
22 | ```
23 | Then, in a separate virtualenv for testing:
24 | ```bash
25 | pip3 install --index-url https://test.pypi.org/simple/ --no-deps cook-executor==$VERSION
26 | pip3 install pymesos==0.3.9 # install any other required dependencies from the main pypi repo
27 | cook-executor
28 | ```
29 | If there is an issue with the release, you can just release another version. They are GC-ed periodically from the test instance.
30 |
31 | Production Release
32 | ------------------
33 | When you're ready to release the final version, just build and upload to the standard PyPI repo.
34 | ```bash
35 | rm -rf dist/*
36 | python3 setup.py sdist bdist_wheel
37 | python3 -m twine upload dist/*
38 | ```
39 |
--------------------------------------------------------------------------------
/executor/bin/build-docker.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: ./bin/build-docker.sh
4 | # Builds the version of cook executor that can execute inside a docker container.
5 |
6 | set -e
7 |
8 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
9 | NAME=cook-executor-build
10 |
11 | EXECUTOR_DIR="$(dirname ${DIR})"
12 |
13 | mkdir -p ${EXECUTOR_DIR}/dist
14 |
15 | # build cook-executor inside docker image to avoid local python environment and architecture hassles
16 | cd ${EXECUTOR_DIR}
17 | docker build -t ${NAME} -f ${EXECUTOR_DIR}/Dockerfile.build .
18 | docker run --name ${NAME} ${NAME}
19 | rm -rf ${EXECUTOR_DIR}/dist/cook-executor
20 | docker cp ${NAME}:/opt/cook/dist/cook-executor ${EXECUTOR_DIR}/dist/cook-executor
21 | docker rm ${NAME}
22 |
--------------------------------------------------------------------------------
/executor/bin/build-local.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: ./bin/build-local.sh
4 | # Builds the version of cook executor that can execute locally.
5 |
6 | set -e
7 |
8 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
9 | NAME=cook-executor-build
10 |
11 | EXECUTOR_DIR="$(dirname ${DIR})"
12 |
13 | mkdir -p ${EXECUTOR_DIR}/dist
14 | rm -rf ${EXECUTOR_DIR}/dist/cook-executor-local
15 |
16 | # Create a one-folder bundle containing an executable (instead of using the one-file version).
17 | # Allows us to avoid the extraction to a temporary folder needed by the PyInstaller bootloader.
18 | cd ${EXECUTOR_DIR}
19 | pyinstaller --onedir --name cook-executor-local --paths cook cook/__main__.py
20 |
--------------------------------------------------------------------------------
/executor/bin/prepare-executor.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # USAGE: ./bin/prepare-executor.sh MODE TARGET_DIR
4 | # Builds the cook executor and then copies it to TARGET_DIR
5 | # Examples:
6 | # ./bin/prepare-executor.sh docker /target/directory
7 | # ./bin/prepare-executor.sh local /target/directory
8 |
9 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
10 | EXECUTOR_DIR="$(dirname ${DIR})"
11 | MODE=${1}
12 | TARGET_DIR=${2}
13 |
14 | set -e
15 |
16 | if [ -z "${MODE}" ]; then
17 | echo "ERROR: mode has not been specified!"
18 | exit 1
19 | fi
20 |
21 | if [[ ! "${MODE}" =~ ^(docker|local)$ ]]; then
22 | echo "ERROR: invalid mode (${MODE}) specified!"
23 | exit 1
24 | fi
25 |
26 | if [ -z "${TARGET_DIR}" ]; then
27 | echo "ERROR: target directory has not been specified!"
28 | exit 1
29 | fi
30 |
31 | COOK_EXECUTOR_NAME="cook-executor-${MODE}"
32 | if [[ "${MODE}" == docker ]]; then
33 | COOK_EXECUTOR_NAME="cook-executor"
34 | fi
35 |
36 | COOK_EXECUTOR_PATH="${EXECUTOR_DIR}/dist/${COOK_EXECUTOR_NAME}"
37 | if [ ! -d ${COOK_EXECUTOR_PATH} ]; then
38 | echo "${COOK_EXECUTOR_NAME} not found at ${COOK_EXECUTOR_PATH}"
39 | DO_EXECUTOR_REBUILD=true
40 | elif ! ${EXECUTOR_DIR}/bin/check-version.sh -q ${COOK_EXECUTOR_NAME}; then
41 | echo "${COOK_EXECUTOR_NAME} appears to be out of date"
42 | DO_EXECUTOR_REBUILD=true
43 | else
44 | DO_EXECUTOR_REBUILD=false
45 | fi
46 |
47 | COOK_EXECUTOR_ZIP_NAME="${COOK_EXECUTOR_NAME}.tar.gz"
48 | COOK_EXECUTOR_ZIP_FILE="${EXECUTOR_DIR}/dist/${COOK_EXECUTOR_ZIP_NAME}"
49 | if $DO_EXECUTOR_REBUILD; then
50 | echo "Triggering build of ${COOK_EXECUTOR_NAME} before proceeding."
51 | ${EXECUTOR_DIR}/bin/build-${MODE}.sh
52 | echo "Zipping contents of ${COOK_EXECUTOR_PATH}"
53 | pushd ${EXECUTOR_DIR}/dist
54 | tar -cvzf ${COOK_EXECUTOR_ZIP_FILE} ${COOK_EXECUTOR_NAME}
55 | popd
56 | else
57 | echo "Not triggering build of ${COOK_EXECUTOR_NAME}"
58 | fi
59 |
60 |
61 | if [ "${COOK_EXECUTOR_ZIP_FILE}" -nt "${TARGET_DIR}/${COOK_EXECUTOR_ZIP_NAME}" ]; then
62 | echo "Copying ${COOK_EXECUTOR_ZIP_NAME} from ${COOK_EXECUTOR_ZIP_FILE} to ${TARGET_DIR}"
63 | mkdir -p ${TARGET_DIR}
64 | cp -f ${COOK_EXECUTOR_ZIP_FILE} ${TARGET_DIR}
65 | else
66 | echo "Not copying ${COOK_EXECUTOR_ZIP_NAME} to ${TARGET_DIR}"
67 | fi
68 |
--------------------------------------------------------------------------------
/executor/cook/__init__.py:
--------------------------------------------------------------------------------
1 | """Cook Executor
2 |
3 | The Cook executor is a custom executor written in Python.
4 | It replaces the default command executor in order to enable a number of
5 | features for both operators and end users.
6 | For more information on Mesos executors, see the "Working with Executors"
7 | section at http://mesos.apache.org/documentation/latest/app-framework-development-guide/
8 | """
9 |
10 | DAEMON_GRACE_SECS = 1
11 | TERMINATE_GRACE_SECS = 0.1
12 |
13 | REASON_CONTAINER_LIMITATION_MEMORY = 'REASON_CONTAINER_LIMITATION_MEMORY'
14 | REASON_EXECUTOR_TERMINATED = 'REASON_EXECUTOR_TERMINATED'
15 | REASON_TASK_INVALID = 'REASON_TASK_INVALID'
16 |
17 | TASK_ERROR = 'TASK_ERROR'
18 | TASK_FAILED = 'TASK_FAILED'
19 | TASK_FINISHED = 'TASK_FINISHED'
20 | TASK_KILLED = 'TASK_KILLED'
21 | TASK_RUNNING = 'TASK_RUNNING'
22 | TASK_STARTING = 'TASK_STARTING'
23 |
--------------------------------------------------------------------------------
/executor/cook/_version.py:
--------------------------------------------------------------------------------
1 | # This file is read by setup.py to obtain the version.
2 | # Be aware that changing the format may break the parsing logic.
3 |
4 | __version__ = "0.1.16"
5 |
--------------------------------------------------------------------------------
/executor/cook/io_helper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """This module ensures atomic writes to stdout."""
4 |
5 | import logging
6 | import sys
7 | from threading import Lock
8 |
9 | import os
10 |
11 | __stdout_lock__ = Lock()
12 |
13 |
14 | def print_to_buffer(lock, buffer, data, flush=False, newline=True):
15 | """Helper function that prints data to the specified buffer in a thread-safe manner using the lock.
16 |
17 | Parameters
18 | ----------
19 | lock: threading.Lock
20 | The lock to use
21 | buffer: byte buffer
22 | The buffer to write to
23 | data: string or bytes
24 | The data to output
25 | flush: boolean
26 | Flag determining whether to trigger a sys.stdout.flush()
27 | newline: boolean
28 | Flag determining whether to output a newline at the end
29 |
30 | Returns
31 | -------
32 | Nothing.
33 | """
34 | with lock:
35 | if isinstance(data, str):
36 | buffer.write(data.encode())
37 | else:
38 | buffer.write(data)
39 | if newline:
40 | buffer.write(os.linesep.encode())
41 | if flush:
42 | buffer.flush()
43 |
44 |
45 | def print_out(data, flush=False, newline=True):
46 | """Wrapper function that prints to stdout in a thread-safe manner using the __stdout_lock__ lock.
47 |
48 | Parameters
49 | ----------
50 | data: string or bytes
51 | The data to output
52 | flush: boolean
53 | Flag determining whether to trigger a sys.stdout.flush()
54 | newline: boolean
55 | Flag determining whether to output a newline at the end
56 |
57 | Returns
58 | -------
59 | Nothing.
60 | """
61 | print_to_buffer(__stdout_lock__, sys.stdout.buffer, data, flush=flush, newline=newline)
62 |
63 |
64 | def print_and_log(string_data, newline=True):
65 | """Wrapper function that prints and flushes to stdout in a locally thread-safe manner ensuring newline at the start.
66 | The function also outputs the same message via logging.info().
67 |
68 | Parameters
69 | ----------
70 | string_data: string
71 | The string to output
72 | newline: boolean
73 | Flag determining whether to output a newline at the end
74 |
75 | Returns
76 | -------
77 | Nothing.
78 | """
79 | print_out('{}{}'.format(os.linesep, string_data), flush=True, newline=newline)
80 | logging.info(string_data)
81 |
--------------------------------------------------------------------------------
/executor/cook/util.py:
--------------------------------------------------------------------------------
1 | import errno
2 | import logging
3 | import resource
4 | import sys
5 | import threading
6 | import traceback
7 |
8 | __rusage_denom_mb = 1024.0
9 | if sys.platform == 'darwin':
10 | # in OSX the output is in different units
11 | __rusage_denom_mb = __rusage_denom_mb * 1024
12 |
13 |
14 | def print_memory_usage():
15 | """Logs the memory usage of the executor."""
16 | try:
17 | max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
18 | logging.info('Executor Memory usage: {} MB'.format(max_rss / __rusage_denom_mb))
19 | except Exception:
20 | logging.exception('Error in logging memory usage')
21 |
22 |
23 | def is_out_of_memory_error(exception):
24 | """Returns true iff exception is an instance of OSError and error code represents an out of memory error."""
25 | return isinstance(exception, OSError) and exception.errno == errno.ENOMEM
26 |
27 |
28 | def log_thread_stack_traces():
29 | """Logs the stack traces for all threads."""
30 | try:
31 | logging.info('Logging stack traces for all threads')
32 | for th in threading.enumerate():
33 | logging.info(th)
34 | logging.info(''.join(traceback.format_stack(sys._current_frames()[th.ident])))
35 | except:
36 | logging.exception('Error in logging thread stack traces')
37 |
--------------------------------------------------------------------------------
/executor/requirements.txt:
--------------------------------------------------------------------------------
1 | psutil==5.4.1
2 | pyinstaller==3.3
3 | pymesos==0.3.9
4 |
--------------------------------------------------------------------------------
/executor/setup.cfg:
--------------------------------------------------------------------------------
1 | [tool:pytest]
2 | addopts = -n 1 -v --timeout-method=thread
3 | timeout = 1200
4 |
--------------------------------------------------------------------------------
/executor/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from setuptools import setup
5 |
6 | test_deps=[
7 | 'pytest==5.2.0',
8 | 'pytest-timeout==1.3.3',
9 | 'pytest-xdist==1.30.0'
10 | ]
11 |
12 | extras = { 'test': test_deps }
13 |
14 | setup(
15 | name='cook-executor',
16 | version=open("cook/_version.py").readlines()[-1].split('"')[1],
17 | description='Custom Mesos executor for Cook written in Python',
18 | url='https://github.com/twosigma/Cook',
19 | license="Apache Software License 2.0",
20 | keywords='cook-executor',
21 | packages=['cook'],
22 | test_suite='tests',
23 | tests_require=test_deps,
24 | extras_require=extras,
25 | install_requires=['psutil==5.4.1', 'pymesos==0.3.9'],
26 | entry_points={
27 | 'console_scripts': [
28 | 'cook-executor = cook.__main__:main'
29 | ]
30 | },
31 | classifiers=[
32 | "Programming Language :: Python :: 3.5"
33 | ]
34 | )
35 |
--------------------------------------------------------------------------------
/executor/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/executor/tests/__init__.py
--------------------------------------------------------------------------------
/executor/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # This file is automatically loaded and run by pytest during its setup process,
2 | # meaning it happens before any of the tests in this directory are run.
3 | # See the pytest documentation on conftest files for more information:
4 | # https://docs.pytest.org/en/2.7.3/plugins.html#conftest-py-plugins
5 |
6 | # Please see: https://github.com/twosigma/Cook/issues/749
7 |
8 | import pymesos as pm
9 |
10 | pm.encode_data((str({'foo': 'bar'}).encode('utf8')))
11 |
--------------------------------------------------------------------------------
/executor/travis/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Runs the Cook Executor tests
4 |
5 | export PROJECT_DIR=`pwd`
6 | cd ${PROJECT_DIR}
7 |
8 | python --version
9 | pytest --version
10 |
11 | pytest -n4
12 |
--------------------------------------------------------------------------------
/executor/travis/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Sets up the travis worker to be able to run executor tests.
4 |
5 | export PROJECT_DIR=`pwd`
6 | cd ${PROJECT_DIR}
7 |
8 | python --version
9 | pip install -e '.[test]'
10 |
--------------------------------------------------------------------------------
/integration/.dockerignore:
--------------------------------------------------------------------------------
1 | .eggs
2 | .idea
3 | .cache
4 | .minimesos
5 | .pytest_cache
6 | bin
7 | build
8 | cook_integration.egg-info
9 | dist
10 | integration.iml
11 | travis
12 | **/__pycache__
13 | virtualenv*
14 | venv*
15 |
--------------------------------------------------------------------------------
/integration/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | *.egg-info
4 | *.egg*
5 | .cache
6 | venv/
7 | virtualenv-integrationtest*
8 | .pytest_cache
9 | /.cs.json
10 |
--------------------------------------------------------------------------------
/integration/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9
2 |
3 | WORKDIR /opt/cook/integration
4 | COPY requirements.txt /opt/cook/integration
5 | ADD cli.tar.gz /opt/cook/cli/
6 | RUN pip install -r requirements.txt
7 |
8 | # Don't need to copy over the integration test files --- they're bind-mounted.
9 | ENTRYPOINT ["pytest"]
--------------------------------------------------------------------------------
/integration/bin/build-docker-image.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: build-docker-image.sh
4 | # Builds a docker image containing the cook scheduler integration tests.
5 |
6 | INTEGRATION_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
7 | NAME=cook-integration
8 |
9 | echo "Building docker images for ${NAME} IN $(dirname ${INTEGRATION_DIR})/cli"
10 | cd $(dirname ${INTEGRATION_DIR})/cli
11 | tar -c . | gzip -n >${INTEGRATION_DIR}/cli.tar.gz
12 | cd ${INTEGRATION_DIR}
13 | docker build -t ${NAME} ${INTEGRATION_DIR}
14 | rm cli.tar.gz
15 |
--------------------------------------------------------------------------------
/integration/bin/only-run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os, pytest, sys
4 |
5 | if len(sys.argv) < 2:
6 | sys.exit("USAGE: {} TEST_NAMES...".format(sys.argv[0]))
7 |
8 | target_names = sys.argv[1:]
9 |
10 | class FindTestsPlugin(object):
11 |
12 | def __init__(self):
13 | self.matched = []
14 |
15 | def pytest_collection_modifyitems(self, items):
16 | for target_name in target_names:
17 | suffix = ':' + target_name
18 | for item in items:
19 | if item.nodeid.endswith(suffix):
20 | self.matched.append(item.nodeid)
21 |
22 | find_tests = FindTestsPlugin()
23 | pytest.main(['-c/dev/null', '--collect-only', '-p', 'no:terminal'], plugins=[find_tests])
24 |
25 | if not find_tests.matched:
26 | sys.exit("No tests found with the given names.")
27 |
28 | os.execvp('pytest', ['-c/dev/null', '-n0', '-sv'] + find_tests.matched)
29 |
--------------------------------------------------------------------------------
/integration/requirements.txt:
--------------------------------------------------------------------------------
1 | beakerx==1.3.0
2 | tornado==6.1.0
3 | jupyter_client==7.1.0
4 | nbconvert==6.3.0
5 | nbformat==5.1.3
6 | numpy==1.21.0
7 | pip==9.0.1; python_version >= '3.6'
8 | pytest==5.2.0
9 | pytest-timeout==1.3.3
10 | pytest-xdist==1.30.0
11 | python-dateutil==2.8.1
12 | requests==2.20.0
13 | retrying==1.3.3
14 | file:../cli#egg=cook_client
15 | pygit2==1.7.2
16 |
--------------------------------------------------------------------------------
/integration/setup.cfg:
--------------------------------------------------------------------------------
1 | [tool:pytest]
2 | addopts = -n10 -v --timeout-method=thread --maxfail=3 --log-level=DEBUG --durations=25
3 | timeout = 1200
4 | usefixtures = record_test_metric
5 | markers =
6 | cli: marks tests as testing the cs CLI
7 | memlimit: marks tests as checking that exceeding the memory limit works as expected
8 | multi_user: marks tests as using multiple users (e.g. one admin and one non-admin)
9 | scheduler_not_in_docker: marks tests that should be skipped when Cook itself runs in Docker
10 | serial: marks tests as needing to run in series rather than in parallel with other tests
11 | travis_skip: marks tests that should be skipped in Travis CI
12 |
--------------------------------------------------------------------------------
/integration/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import logging
3 |
4 | logging.basicConfig(format='%(asctime)s [%(levelname)s] [%(process)d] %(message)s', level=logging.DEBUG)
5 |
--------------------------------------------------------------------------------
/integration/tests/cook/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/integration/tests/cook/__init__.py
--------------------------------------------------------------------------------
/integration/tests/cook/conftest.py:
--------------------------------------------------------------------------------
1 | # This file is automatically loaded and run by pytest during its setup process,
2 | # meaning it happens before any of the tests in this directory are run.
3 | # See the pytest documentation on conftest files for more information:
4 | # https://docs.pytest.org/en/2.7.3/plugins.html#conftest-py-plugins
5 | import logging
6 | import os
7 | import socket
8 | import subprocess
9 | import threading
10 | import time
11 |
12 | from tests.cook import util
13 |
14 |
15 | def _sudo_check(user):
16 | """
17 | Check if the current user can sudo as a test user.
18 | This is necessary to obtain Kerberos auth headers for multi-user tests.
19 | """
20 | sudo_ok = (0 == subprocess.call(f'sudo -nu {user} echo CACHED SUDO', shell=True))
21 | assert sudo_ok, "You need to pre-cache your sudo credentials. (Run a simple sudo command as a test user.)"
22 |
23 |
24 | def _sudo_checker_task(user):
25 | """Periodically check sudo ability to ensure the credentials stay cached."""
26 | while True:
27 | _sudo_check(user)
28 | time.sleep(60)
29 |
30 |
31 | def _ssh_check(user):
32 | """
33 | Check if the current user can ssh as a test user.
34 | This is necessary to obtain Kerberos auth headers for multi-user tests.
35 | """
36 | hostname = os.getenv('COOK_SWITCH_USER_SSH_HOST', socket.gethostname())
37 | logging.info(f'Checking ssh as {user} to {hostname}')
38 | ssh_ok = (0 == subprocess.call(f'ssh {user}@{hostname} echo SSH', shell=True))
39 | assert ssh_ok, f'Unable to ssh as {user} to {hostname}'
40 |
41 |
42 | logging.info('Checking if multi-user switching needs to be enabled')
43 | if util.kerberos_enabled() and os.getenv('COOK_MAX_TEST_USERS'):
44 | switch_user_mode = os.getenv('COOK_SWITCH_USER_MODE', 'sudo')
45 | logging.info(f'Multi-user switching mode is {switch_user_mode}')
46 | if switch_user_mode == 'sudo':
47 | username = next(util._test_user_names())
48 | _sudo_check(username)
49 | threading.Thread(target=_sudo_checker_task, args=[username], daemon=True).start()
50 | elif switch_user_mode == 'ssh':
51 | for username in util._test_user_names():
52 | _ssh_check(username)
53 | else:
54 | assert False, f'{switch_user_mode} is not a valid value for COOK_SWITCH_USER_MODE'
55 | else:
56 | logging.info('Multi-user switching is not getting enabled')
57 |
--------------------------------------------------------------------------------
/integration/tests/cook/reasons.py:
--------------------------------------------------------------------------------
1 | # Named constants for failure reason codes from cook or mesos.
2 | # See scheduler/src/cook/mesos/schema.clj for the reason code names.
3 | REASON_KILLED_BY_USER = 1001
4 | REASON_TASK_KILLED_DURING_LAUNCH = 1004
5 | MAX_RUNTIME_EXCEEDED = 2003
6 | CONTAINER_INITIALIZATION_TIMED_OUT = 1007
7 | EXECUTOR_UNREGISTERED = 6002
8 | UNKNOWN_MESOS_REASON = 99001
9 | CMD_NON_ZERO_EXIT = 99003
10 |
11 | # Named constants for unscheduled job reason strings from cook or fenzo.
12 | UNDER_INVESTIGATION = 'The job is now under investigation. Check back in a minute for more details!'
13 | COULD_NOT_PLACE_JOB = 'The job couldn\'t be placed on any available hosts.'
14 | JOB_WOULD_EXCEED_QUOTA = 'The job would cause you to exceed resource quotas.'
15 | JOB_IS_RUNNING_NOW = 'The job is running now.'
16 | JOB_LAUNCH_RATE_LIMIT = 'You are currently rate limited on how many jobs you launch per minute.'
17 | PLUGIN_IS_BLOCKING = 'The launch filter plugin is blocking the job launch.'
18 |
--------------------------------------------------------------------------------
/integration/tests/cook/test_master_slave.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import unittest
4 |
5 | import pytest
6 | from retrying import retry
7 |
8 | from tests.cook import util
9 |
10 |
11 | @unittest.skipUnless(os.getenv('COOK_MASTER_SLAVE') is not None,
12 | 'Requires setting the COOK_MASTER_SLAVE environment variable')
13 | @pytest.mark.timeout(util.DEFAULT_TEST_TIMEOUT_SECS) # individual test timeout
14 | class MasterSlaveTest(unittest.TestCase):
15 |
16 | @classmethod
17 | def setUpClass(cls):
18 | cls.master_url = util.retrieve_cook_url()
19 | cls.slave_url = util.retrieve_cook_url('COOK_SLAVE_URL', 'http://localhost:12322')
20 | cls.logger = logging.getLogger(__name__)
21 | util.init_cook_session(cls.master_url, cls.slave_url)
22 |
23 | def setUp(self):
24 | self.master_url = type(self).master_url
25 | self.slave_url = type(self).slave_url
26 | self.logger = logging.getLogger(__name__)
27 |
28 | def test_get_queue(self):
29 | bad_constraint = [["HOSTNAME",
30 | "EQUALS",
31 | "lol won't get scheduled"]]
32 | uuid, resp = util.submit_job(self.master_url, command='sleep 30', constraints=bad_constraint)
33 | self.assertEqual(201, resp.status_code, resp.content)
34 | try:
35 | slave_queue = util.session.get('%s/queue' % self.slave_url, allow_redirects=False)
36 | self.assertEqual(307, slave_queue.status_code)
37 | default_pool = util.default_pool(self.master_url)
38 | pool = default_pool or 'no-pool'
39 | self.logger.info(f'Checking the queue endpoint for pool {pool}')
40 |
41 | @retry(stop_max_delay=30000, wait_fixed=1000) # Need to wait for a rank cycle
42 | def check_queue():
43 | master_queue = util.session.get(slave_queue.headers['Location'])
44 | self.assertEqual(200, master_queue.status_code, master_queue.content)
45 | pool_queue = master_queue.json()[pool]
46 | self.assertTrue(any([job['job/uuid'] == uuid for job in pool_queue]), pool_queue)
47 |
48 | check_queue()
49 | finally:
50 | util.kill_jobs(self.master_url, [uuid])
51 |
--------------------------------------------------------------------------------
/integration/tests/cook/test_multi_cluster.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import unittest
4 |
5 | import logging
6 |
7 | from tests.cook import util
8 |
9 |
10 | @unittest.skipUnless(os.getenv('COOK_MULTI_CLUSTER') is not None,
11 | 'Requires setting the COOK_MULTI_CLUSTER environment variable')
12 | @pytest.mark.timeout(util.DEFAULT_TEST_TIMEOUT_SECS) # individual test timeout
13 | class MultiClusterTest(unittest.TestCase):
14 |
15 | @classmethod
16 | def setUpClass(cls):
17 | cls.cook_url_1 = util.retrieve_cook_url()
18 | cls.cook_url_2 = util.retrieve_cook_url('COOK_SCHEDULER_URL_2', 'http://localhost:22321')
19 | util.init_cook_session(cls.cook_url_1, cls.cook_url_2)
20 |
21 | def setUp(self):
22 | self.cook_url_1 = type(self).cook_url_1
23 | self.cook_url_2 = type(self).cook_url_2
24 | self.logger = logging.getLogger(__name__)
25 |
26 | def test_federated_query(self):
27 | # Submit to cluster #1
28 | job_uuid_1, resp = util.submit_job(self.cook_url_1)
29 | self.assertEqual(resp.status_code, 201)
30 |
31 | # Submit to cluster #2
32 | job_uuid_2, resp = util.submit_job(self.cook_url_2)
33 | self.assertEqual(resp.status_code, 201)
34 |
35 | # Ask for both jobs from cluster #1, expect to get the first
36 | resp = util.query_jobs(self.cook_url_1, uuid=[job_uuid_1, job_uuid_2], partial=True)
37 | self.assertEqual(200, resp.status_code, resp.json())
38 | self.assertEqual(1, len(resp.json()))
39 | self.assertEqual([job_uuid_1], [job['uuid'] for job in resp.json()])
40 |
41 | # Ask for both jobs from cluster #2, expect to get the second
42 | resp = util.query_jobs(self.cook_url_2, uuid=[job_uuid_1, job_uuid_2], partial=True)
43 | self.assertEqual(200, resp.status_code, resp.json())
44 | self.assertEqual(1, len(resp.json()))
45 | self.assertEqual([job_uuid_2], [job['uuid'] for job in resp.json()])
46 |
--------------------------------------------------------------------------------
/integration/travis/prepare_integration.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -ev
4 |
5 | PROJECT_DIR=`pwd` ../travis/prepare.sh
6 | python --version
7 |
8 | # Explicitly uninstall cli
9 | if [[ $(pip list --format=columns | grep cook-client) ]];
10 | then
11 | pip uninstall -y cook-client
12 | fi
13 |
14 | pip install -r requirements.txt
15 |
--------------------------------------------------------------------------------
/jobclient/README.md:
--------------------------------------------------------------------------------
1 | # Cook Java Client
2 |
3 | Please run `mvn javadoc:javadoc` to build the docs for this project.
4 | The main entrypoint is `com.twosigma.cook.jobclient`; read the Javadocs for details.
5 |
6 | # Example Usage
7 |
8 | Submitting two jobs that should run in the same AWS region:
9 |
10 | ```java
11 | public class HostPlacementExample {
12 | @Test
13 | public void twoJobsInTheSameRegion() throws URISyntaxException, JobClientException {
14 | // Create a host placement constraint where the region attribute must equal across hosts
15 | HostPlacement.Builder hostPlacementBuilder = new HostPlacement.Builder();
16 | hostPlacementBuilder.setType(HostPlacement.Type.ATTRIBUTE_EQUALS);
17 | hostPlacementBuilder.setParameter("attribute", "region");
18 | HostPlacement hostPlacement = hostPlacementBuilder.build();
19 |
20 | // Create a job group with the host placement constraint
21 | Group.Builder groupBuilder = new Group.Builder();
22 | groupBuilder.setUUID(UUID.randomUUID());
23 | groupBuilder.setName("testing");
24 | groupBuilder.setHostPlacement(hostPlacement);
25 | Group group = groupBuilder.build();
26 |
27 | // Create two jobs and place them in the job group
28 | Job.Builder jobBuilder = new Job.Builder();
29 | jobBuilder.setCommand("echo hello");
30 | jobBuilder.setCpus(1.0);
31 | jobBuilder.setMemory(128.0);
32 | jobBuilder.setGroup(group);
33 | jobBuilder.setUUID(UUID.randomUUID());
34 | Job job1 = jobBuilder.build();
35 | jobBuilder.setUUID(UUID.randomUUID());
36 | Job job2 = jobBuilder.build();
37 |
38 | // Create a job client and submit our jobs and job group
39 | JobClient.Builder clientBuilder = new JobClient.Builder();
40 | clientBuilder.setHost("localhost");
41 | clientBuilder.setPort(12321);
42 | clientBuilder.setJobEndpoint("rawscheduler");
43 | JobClient client = clientBuilder.build();
44 | client.submitWithGroups(Arrays.asList(job1, job2), Collections.singletonList(group));
45 | }
46 | }
47 | ```
48 |
49 | # Running the Tests
50 |
51 | The easiest way to run the JobClient unit tests is to use Maven:
52 |
53 | ```bash
54 | mvn dependency:resolve
55 | mvn test
56 | ```
57 |
58 | © Two Sigma Open Source, LLC
59 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/Executor.java:
--------------------------------------------------------------------------------
1 | package com.twosigma.cook.jobclient;
2 |
3 | /**
4 | * Enum representing valid options for the executor field in a job and instance.
5 | */
6 |
7 | public enum Executor {
8 | COOK,
9 | EXECUTOR;
10 |
11 | public static Executor fromString(final String name) {
12 | for (final Executor executor : values()) {
13 | if (executor.name().toLowerCase().equals(name)) {
14 | return executor;
15 | }
16 | }
17 | return null;
18 | }
19 |
20 | public String displayName() {
21 | return name().toLowerCase();
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/GroupListener.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.twosigma.cook.jobclient;
18 |
19 |
20 | /**
21 | * Interface for group listener.
22 | *
23 | * Created: November 28, 2016
24 | *
25 | * @author diego
26 | */
27 | public interface GroupListener {
28 | /**
29 | * The following method will be invoked in any of the following transitions:
30 | * INITIALIZED -> 1+ JOBS STILL RUNNING -> COMPLETED where it will receive a
31 | * {@link Group} object.
32 | *
33 | * Note that if any exception when {@link JobClient} invokes this method for a group status
34 | * update, it will just simply log this exception. It won't invoke this method for the
35 | * particular status update again.
36 | *
37 | * @param group
38 | */
39 | public void onStatusUpdate(Group group);
40 | }
41 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/InstanceDecorator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.twosigma.cook.jobclient;
18 |
19 | /**
20 | * The interface of instance decorator which will take an instance builder as input and return a decorated instance
21 | * builder.
22 | *
23 | * Created: June 23, 2016
24 | * @author wzhao
25 | */
26 | public interface InstanceDecorator {
27 |
28 | /**
29 | * @param builder The {@link Instance.Builder} expected to decorate.
30 | * @return a decorated instance builder.
31 | */
32 | Instance.Builder decorate(Instance.Builder builder);
33 | }
34 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/JobClientException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 |
18 | package com.twosigma.cook.jobclient;
19 |
20 | /**
21 | * Job client exception.
22 | *
23 | * Created: March 14, 2015
24 | *
25 | * @author wzhao
26 | */
27 | public class JobClientException extends Exception {
28 | private static final long serialVersionUID = 1L;
29 |
30 | private final Integer httpResponseCode;
31 |
32 | JobClientException(final String msg) {
33 | this(msg, (Integer) null);
34 | }
35 |
36 | JobClientException(final String msg, final Throwable cause) {
37 | this(msg, cause, null);
38 | }
39 |
40 | JobClientException(final String msg, final Integer httpResponseCode) {
41 | super(msg);
42 | this.httpResponseCode = httpResponseCode;
43 | }
44 |
45 |
46 | JobClientException(final String msg, final Throwable cause, final Integer httpResponseCode) {
47 | super(msg, cause);
48 | this.httpResponseCode = httpResponseCode;
49 | }
50 |
51 | public Integer getHttpResponseCode() {
52 | return httpResponseCode;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/JobListener.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.twosigma.cook.jobclient;
18 |
19 |
20 | /**
21 | * Interface for job listener.
22 | *
23 | * Created: March 14, 2015
24 | *
25 | * @author wzhao
26 | */
27 | public interface JobListener {
28 | /**
29 | * The following method will be invoked in any of the following job status transitions:
30 | * INITIALIZED -> WAITING, WAITING -> RUNNING, RUNNING -> COMPLETED where it will receive a
31 | * {@link Job} object with a possible status WAITING, RUNNING and COMPLETED respectively.
32 | *
33 | * Note that if any exception when {@link JobClient} invokes this method for a job status
34 | * update, it will just simply log this exception. It won't invoke this method for the
35 | * particular status update again.
36 | *
37 | * @param job
38 | */
39 | public void onStatusUpdate(Job job);
40 | }
41 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/auth/spnego/GSSCredentialProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.twosigma.cook.jobclient.auth.spnego;
18 |
19 | import org.ietf.jgss.GSSCredential;
20 |
21 | /**
22 | * A simple {@link GSSCredential} provider could be used to hold or provide the latest valid
23 | * credential.
24 | *
25 | * Created: January 14, 2016
26 | *
27 | * @author wzhao
28 | */
29 | public class GSSCredentialProvider {
30 | private GSSCredential _credential = null;
31 |
32 | /**
33 | * @return the {@link GSSCredential} held in this provider. If there is no credential held in
34 | * this hold, it will simply return null.
35 | */
36 | public synchronized GSSCredential getCredential() {
37 | return _credential;
38 | }
39 |
40 | /**
41 | * @return update {@link GSSCredential} held in this provider.
42 | */
43 | public synchronized void setCredential(GSSCredential credential) {
44 | _credential = credential;
45 | }
46 |
47 | /**
48 | * Clean the {@link GSSCredential} held in this provider.
49 | */
50 | public synchronized void clear() {
51 | _credential = null;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/constraint/Constraint.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.twosigma.cook.jobclient.constraint;
18 |
19 | import org.json.JSONArray;
20 | import org.json.JSONException;
21 |
22 | /**
23 | * The interface to specify a constraint in Cook
24 | *
25 | * A constraint in Cook could be one of the following three forms
26 | *
27 | * - attribute, operator
28 | *
- attribute, operator, value
29 | *
- attribute, operator, list of values
30 | *
31 | * Examples of constraints are
32 | *
33 | * - "host", UNIQUE
34 | *
- "host", EQUALS, foo.bar.com
35 | *
- "host", IN, [foo1.bar.com,foo2.bar.com]
36 | *
37 | */
38 | public interface Constraint {
39 | /**
40 | * @return this constraint as a JSONArray.
41 | */
42 | JSONArray toJson() throws JSONException;
43 |
44 | /**
45 | * @return the attribute of this constraint.
46 | */
47 | String getAttribute();
48 |
49 | /**
50 | * @return the operator of this constraint.
51 | */
52 | Operator getOperator();
53 | }
54 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/constraint/OneToOneConstraint.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.twosigma.cook.jobclient.constraint;
17 |
18 | import org.json.JSONArray;
19 | import org.json.JSONException;
20 |
21 | import java.util.Objects;
22 |
23 | /**
24 | * A constraint of form
25 | *
26 | * attribute, operator, value
27 | *
28 | */
29 | final class OneToOneConstraint implements Constraint {
30 | private Operator _operator;
31 | private String _attribute;
32 | private String _value;
33 |
34 | OneToOneConstraint(Operator operator, String attribute, String value) {
35 | _operator = operator;
36 | _attribute = attribute.trim();
37 | _value = value.trim();
38 | }
39 |
40 | @Override
41 | public boolean equals(Object o) {
42 | if (o == null) return false;
43 | if (o == this) return true;
44 |
45 | if (!(o instanceof OneToOneConstraint)) return false;
46 |
47 | OneToOneConstraint other = (OneToOneConstraint) o;
48 |
49 | if (!Objects.equals(this._operator, other._operator)) return false;
50 | if (!Objects.equals(this._attribute, other._attribute)) return false;
51 | if (!Objects.equals(this._value, other._value)) return false;
52 |
53 | return true;
54 | }
55 |
56 | @Override
57 | public int hashCode() {
58 | return Objects.hash(_operator, _attribute, _value);
59 | }
60 |
61 | @Override
62 | public JSONArray toJson()
63 | throws JSONException {
64 | JSONArray jsonArray = new JSONArray();
65 | jsonArray.put(0, _attribute);
66 | jsonArray.put(1, _operator.toString());
67 | jsonArray.put(2, _value);
68 | return jsonArray;
69 | }
70 |
71 | @Override
72 | public String getAttribute() {
73 | return _attribute;
74 | }
75 |
76 | @Override
77 | public Operator getOperator() {
78 | return _operator;
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/jobclient/java/src/main/java/com/twosigma/cook/jobclient/constraint/Operator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.twosigma.cook.jobclient.constraint;
17 |
18 | public enum Operator {
19 | EQUALS("EQUALS");
20 |
21 | Operator(String name) {
22 | }
23 |
24 | /**
25 | * Parse an operator from its string representation.
26 | *
27 | * @param op specifies a string representation of operator.
28 | * @return an operator for the specified name.
29 | */
30 | public static Operator fromString(String op) {
31 | return Enum.valueOf(Operator.class, op.trim().toUpperCase());
32 | }
33 | }
--------------------------------------------------------------------------------
/jobclient/java/src/test/java/com/twosigma/ConstraintTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Two Sigma Open Source, LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.twosigma;
17 |
18 | import com.twosigma.cook.jobclient.constraint.Constraints;
19 | import com.twosigma.cook.jobclient.constraint.Constraint;
20 | import com.twosigma.cook.jobclient.constraint.Operator;
21 | import org.json.JSONArray;
22 | import org.junit.Assert;
23 | import org.junit.Test;
24 |
25 | public class ConstraintTest {
26 |
27 | @Test
28 | public void testScope() {
29 | Constraint c = Constraints.buildEqualsConstraint("bar", "foo");
30 | Assert.assertEquals(c.getAttribute(), "bar");
31 | Assert.assertEquals(c.getOperator(), Operator.EQUALS);
32 | Assert.assertEquals(c.toJson().getString(2), "foo");
33 | }
34 |
35 | @Test
36 | public void testParseFrom() {
37 | String constraintString = "[bar,EQUALS,foo]";
38 | Constraint parsedConstraint = Constraints.parseFrom(new JSONArray(constraintString));
39 | Constraint expectedConstraint = Constraints.buildEqualsConstraint("bar", "foo");
40 | Assert.assertEquals(parsedConstraint, expectedConstraint);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/jobclient/java/src/test/java/com/twosigma/cook/jobclient/FetchableURITest.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * Copyright (c) Two Sigma Open Source, LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.twosigma.cook.jobclient;
19 |
20 | import org.json.JSONException;
21 | import org.json.JSONObject;
22 | import org.junit.Assert;
23 | import org.junit.Before;
24 | import org.junit.Test;
25 |
26 | /**
27 | * Unit tests for {@link FetchableURI}.
28 | *
29 | * @author dgrnbrg
30 | */
31 | public class FetchableURITest {
32 |
33 | private FetchableURI _uri;
34 |
35 | @Before
36 | public void setup() {
37 | final FetchableURI.Builder builder = new FetchableURI.Builder();
38 | builder.setValue("http://example.com/myresource.sh");
39 | builder.setExecutable(true);
40 | builder.setExtract(false);
41 | builder.setCache(true);
42 | _uri = builder.build();
43 | }
44 |
45 | @Test
46 | public void testJsonizeURI() throws JSONException {
47 | final JSONObject json = FetchableURI.jsonizeUri(_uri);
48 | Assert.assertEquals(json.getString("value"), _uri.getValue());
49 | Assert.assertEquals(json.getBoolean("executable"), _uri.isExecutable());
50 | }
51 |
52 | @Test
53 | public void testParseFromJSON() throws JSONException {
54 | final JSONObject json = FetchableURI.jsonizeUri(_uri);
55 | Assert.assertEquals(FetchableURI.parseFromJSON(json), _uri);
56 | }
57 | }
58 |
59 |
--------------------------------------------------------------------------------
/jobclient/python/README.md:
--------------------------------------------------------------------------------
1 | # The Cook Scheduler Python Client API
2 |
3 | This package defines a client API for Cook Scheduler, allowing Python applications to easily integrate with Cook.
4 |
5 | ## Quickstart
6 |
7 | The code below shows how to use the client API to connect to a Cook cluster listening on `localhost:12321`, submit a job to the cluster, and query its information.
8 |
9 | ```python
10 | from cookclient import JobClient
11 |
12 | client = JobClient('localhost:12321')
13 |
14 | uuid = client.submit(command='ls')
15 | job = client.query(uuid)
16 | print(str(job))
17 | ```
18 |
--------------------------------------------------------------------------------
/jobclient/python/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/jobclient/python/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/jobclient/python/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../..'))
16 |
17 | from cookclient import CLIENT_VERSION
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = 'Cook Python Client API'
23 | copyright = '2020, Two Sigma'
24 | author = 'Two Sigma'
25 |
26 | # The full version, including alpha/beta/rc tags
27 | release = CLIENT_VERSION
28 |
29 |
30 | # -- General configuration ---------------------------------------------------
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | 'sphinx.ext.autodoc',
37 | 'sphinx_rtd_theme'
38 | ]
39 |
40 | # Add any paths that contain templates here, relative to this directory.
41 | templates_path = ['_templates']
42 |
43 | # List of patterns, relative to source directory, that match files and
44 | # directories to ignore when looking for source files.
45 | # This pattern also affects html_static_path and html_extra_path.
46 | exclude_patterns = []
47 |
48 |
49 | # -- Options for HTML output -------------------------------------------------
50 |
51 | # The theme to use for HTML and HTML Help pages. See the documentation for
52 | # a list of builtin themes.
53 | #
54 | html_theme = 'sphinx_rtd_theme'
55 |
56 | # Add any paths that contain custom static files (such as style sheets) here,
57 | # relative to this directory. They are copied after the builtin static files,
58 | # so a file named "default.css" will overwrite the builtin "default.css".
59 | html_static_path = ['_static']
60 |
61 | # Set the master_doc value, as readthedocs uses an older version of Sphinx
62 | # which will default to `contents` instead of `index`.
63 | master_doc = 'index'
64 |
--------------------------------------------------------------------------------
/jobclient/python/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Cook Python Client API documentation master file, created by
2 | sphinx-quickstart on Mon Jun 8 10:47:38 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Cook Python Client API
7 | ======================
8 |
9 | This is the official Python client library for interacting with Cook Scheduler.
10 |
11 | Quickstart
12 | ----------
13 |
14 | The code below shows how to use the client API to connect to a Cook cluster
15 | listening on http://localhost:12321, submit a job to the cluster, and query its
16 | information.
17 |
18 | .. highlight:: python
19 |
20 | ::
21 |
22 | from cookclient import JobClient
23 |
24 | client = JobClient('localhost:12321')
25 |
26 | uuid = client.submit(command='ls')
27 | job = client.query(uuid)
28 | print(str(job))
29 |
30 |
31 |
32 | .. toctree::
33 | :maxdepth: 2
34 | :caption: Contents:
35 |
36 | usage
37 | api
38 |
39 |
40 |
41 | Indices and tables
42 | ==================
43 |
44 | * :ref:`genindex`
45 | * :ref:`modindex`
46 | * :ref:`search`
47 |
--------------------------------------------------------------------------------
/jobclient/python/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | sphinx ~= 3.0.4
3 | sphinx-rtd-theme ~= 0.4.3
4 |
--------------------------------------------------------------------------------
/jobclient/python/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from setuptools import setup
4 |
5 | from cookclient import CLIENT_VERSION
6 |
7 | with open('README.md') as fd:
8 | readme = fd.read()
9 |
10 | requirements = [
11 | 'requests'
12 | ]
13 |
14 | setup(name='cook-client-api',
15 | version=CLIENT_VERSION,
16 | description="Cook Scheduler Client API for Python",
17 | long_description=readme,
18 | long_description_content_type='text/markdown',
19 | packages=['cookclient'],
20 | url='https://github.com/twosigma/Cook',
21 | install_requires=requirements,
22 | classifiers=[
23 | "Development Status :: 3 - Alpha",
24 | "Programming Language :: Python :: 3",
25 | "License :: OSI Approved :: Apache Software License",
26 | "Operating System :: OS Independent"
27 | ],
28 | python_requires='>=3.6')
29 |
--------------------------------------------------------------------------------
/scheduler/.dockerignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .lein-failures
3 | .minimesos
4 | .nrepl-port
5 | bin
6 | classes
7 | datomic/datomic*/data
8 | datomic/datomic*/log
9 | datomic/datomic*/lib/cook*.jar
10 | docs
11 | gclog.*
12 | log
13 | simulator_files
14 | target
15 | test
16 | test-log
17 | test-resources
18 | virtualenv*
19 | venv*
20 |
--------------------------------------------------------------------------------
/scheduler/.gitignore:
--------------------------------------------------------------------------------
1 | .pytest_cache
2 | gclog*
3 | .calva/
4 |
--------------------------------------------------------------------------------
/scheduler/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mesosphere/mesos:1.3.0
2 |
3 |
4 | # Removing docker.list because docker APT repo has been deleted:
5 | # https://www.docker.com/blog/changes-dockerproject-org-apt-yum-repositories/
6 | RUN rm /etc/apt/sources.list.d/docker.list && \
7 | apt-get -y update && apt-get -y install software-properties-common && \
8 | sudo apt-get install --reinstall ca-certificates && \
9 | add-apt-repository ppa:openjdk-r/ppa && apt-get -y update && \
10 | apt-get --no-install-recommends -y install \
11 | curl \
12 | openjdk-11-jdk \
13 | unzip && apt-get clean && rm -Rf /var/lib/apt/lists/*
14 |
15 | # Env setup
16 | ENV HOME "/root/"
17 | ENV LEIN_ROOT true
18 | ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so
19 | ENV JAVA_CMD=/usr/lib/jvm/java-11-openjdk-amd64/bin/java
20 |
21 | # Generate SSL certificate
22 | RUN mkdir /opt/ssl
23 | RUN keytool -genkeypair -keystore /opt/ssl/cook.p12 -storetype PKCS12 -storepass cookstore -dname "CN=cook, OU=Cook Developers, O=Two Sigma Investments, L=New York, ST=New York, C=US" -keyalg RSA -keysize 2048
24 |
25 | # Lein setup
26 | RUN mkdir $HOME/bin
27 | ENV PATH $PATH:$HOME/bin
28 | RUN curl -o $HOME/bin/lein https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein && chmod a+x $HOME/bin/lein && lein
29 |
30 | # Create and set the cook dir, copying project file
31 | COPY project.clj /opt/cook/
32 | WORKDIR /opt/cook
33 |
34 | # Fetch dependencies
35 | ## Only copy the project.clj so that we can use the cached layer
36 | ## with fetched dependencies as long as project.clj isn't modified
37 | RUN lein deps
38 |
39 | # Datomic setup
40 | COPY datomic /opt/cook/datomic
41 | RUN unzip -uo /opt/cook/datomic/datomic-free-0.9.5561.56.zip
42 |
43 | # Copy the whole scheduler into the container
44 | COPY docker /opt/cook/docker
45 | COPY resources /opt/cook/resources
46 | COPY java /opt/cook/java
47 | COPY src /opt/cook/src
48 |
49 | RUN lein uberjar
50 | RUN cp "target/cook-$(lein print :version | tr -d '"').jar" datomic-free-0.9.5561.56/lib/cook-$(lein print :version | tr -d '"').jar
51 | COPY config* /opt/cook/
52 |
53 | # Ugly hack. Our .cook_kubeconfig lookup assumes it can be found in ../scheduler/ so make a symlink
54 | RUN ln -s /opt/cook /opt/scheduler
55 | COPY .cook_kubeconfig_* /opt/cook/
56 |
57 | # Run cook
58 | EXPOSE \
59 | 4334 \
60 | 4335 \
61 | 4336 \
62 | 12321 \
63 | 12322
64 | ENTRYPOINT ["/opt/cook/docker/run-cook.sh"]
65 | CMD ["config.edn"]
66 |
--------------------------------------------------------------------------------
/scheduler/api-only-config.edn:
--------------------------------------------------------------------------------
1 | {:api-only? true
2 | :authorization {:one-user #config/env "USER"}
3 | :authorization-config {;; These users have admin privileges when using configfile-admins-auth;
4 | ;; e.g., they can view and modify other users' jobs.
5 | :admins #{"admin" "root"}
6 | ;; What function should be used to perform user authorization?
7 | ;; See the docstring in cook.rest.authorization for details.
8 | :authorization-fn cook.rest.authorization/configfile-admins-auth-open-gets
9 | ;; users that are allowed to do things on behalf of others
10 | :impersonators #{"poser" "other-impersonator"}}
11 | :cors-origins ["https?://cors.example.com"]
12 | :database {:datomic-uri "datomic:mem://cook-jobs"}
13 | :hostname "cook-scheduler-12321"
14 | :log {:file "log/cook-12321.log"
15 | :levels {"datomic.db" :warn
16 | "datomic.kv-cluster" :warn
17 | "datomic.peer" :warn
18 | :default :info}}
19 | :metrics {:jmx true
20 | :user-metrics-interval-seconds 60}
21 | :nrepl {:enabled? true
22 | :port 8888}
23 | :pools {:default "mesos-gamma"}
24 | :port 12321
25 | :rate-limit {:user-limit-per-m 1000000}
26 | :unhandled-exceptions {:log-level :error}}
27 |
--------------------------------------------------------------------------------
/scheduler/bin/build-docker-image.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: build-docker-image.sh
4 | # Builds a docker image containing the cook scheduler.
5 |
6 | set -e
7 |
8 | SCHEDULER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
9 | NAME=cook-scheduler
10 |
11 | EXECUTOR_DIR="$(dirname ${SCHEDULER_DIR})/executor"
12 | EXECUTOR_NAME=cook-executor
13 | COOK_EXECUTOR_FILE=${EXECUTOR_DIR}/dist/${EXECUTOR_NAME}
14 | SCHEDULER_EXECUTOR_DIR=${SCHEDULER_DIR}/resources/public
15 | SCHEDULER_EXECUTOR_FILE=${SCHEDULER_EXECUTOR_DIR}/${EXECUTOR_NAME}
16 |
17 | ${EXECUTOR_DIR}/bin/prepare-executor.sh docker ${SCHEDULER_EXECUTOR_DIR}
18 |
19 | echo "Building docker images for ${NAME}"
20 | docker build -t ${NAME} ${SCHEDULER_DIR}
21 |
--------------------------------------------------------------------------------
/scheduler/bin/help-delete-temporary-clusters:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: ./bin/help-delete-temporary-clusters
4 | # Delete all temporary clusters within a zone. The sibling scripts here mark clusters they create.
5 | # This is intended to be used by other scripts and not directly.
6 | # is a gcloud project.
7 | # can be a zone. E.g., us-central1-a
8 |
9 |
10 | set -e
11 |
12 | PROJECT=$1
13 | ZONE=$2
14 | GKE_CLUSTER_OWNER=${GKE_CLUSTER_OWNER:-$USER}
15 |
16 | gcloud="gcloud --project $PROJECT"
17 |
18 | # Nuke all existing temporary clusters; don't want to keep on making more idle clusters each time you invoke this.
19 | echo "---- Deleting any existing temporary clusters with owner $GKE_CLUSTER_OWNER"
20 | filter="resourceLabels.longevity=temporary AND resourceLabels.owner=$GKE_CLUSTER_OWNER"
21 | $gcloud container clusters list --filter "$filter"
22 | for i in $($gcloud container clusters list --filter "$filter" --format="value(name)")
23 | do
24 | echo "Deleting $i"
25 | $gcloud --quiet container clusters delete "$i" --zone "$ZONE" &
26 | done
27 | wait
28 |
--------------------------------------------------------------------------------
/scheduler/bin/make-gke-test-cluster:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: ./bin/make-gke-test-cluster
4 | # Configure a kubernetes cluster for running pool-based integration tests and running pools in general.
5 | # NOTE: This script labels any clusters it creates and will DELETE old clusters it created.
6 | # is a gcloud project.
7 | # can be a zone. E.g., us-central1-a
8 | # is the name of a cluster. E.g., 'test-cluster-1'
9 |
10 | # Prerequesites:
11 | # - Install gcloud (https://cloud.google.com/sdk/docs/quickstarts)
12 | # - Log in: gcloud auth login
13 | # - Install kubectl: gcloud components install kubectl
14 |
15 | set -e
16 |
17 | if [ $# -eq 0 ]
18 | then
19 | echo "You must provide the GCP project to use!"
20 | exit 1
21 | fi
22 |
23 | PROJECT=$1
24 | ZONE=${2:-us-central1-a}
25 | CLUSTERNAME=${3:-$USER-test-cluster-$(date '+%Y%m%d-%H%M%S')}
26 |
27 | gcloud="gcloud --project $PROJECT"
28 |
29 | bin/help-delete-temporary-clusters "$PROJECT" "$ZONE"
30 | bin/help-make-cluster "$PROJECT" "$ZONE" "$CLUSTERNAME" .cook_kubeconfig_1
31 |
32 | echo "---- Showing all of the clusters we generated"
33 | $gcloud container clusters list
34 |
--------------------------------------------------------------------------------
/scheduler/bin/make-gke-test-clusters:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: ./bin/make-gke-test-clusters [] [] []
4 | # Configure two kubernetes clusters for running pool-based integration tests and running pools in general.
5 | # NOTE: This script labels any clusters it creates and will DELETE old clusters it created.
6 | # is a gcloud project and defaults to $GCP_PROJECT_NAME.
7 | # can be a zone. E.g., us-central1-a
8 | # is the name of a cluster. E.g., 'test-cluster-1'
9 |
10 | # Prerequesites:
11 | # - Install gcloud (https://cloud.google.com/sdk/docs/quickstarts)
12 | # - Log in: gcloud auth login
13 | # - Install kubectl: gcloud components install kubectl
14 |
15 | set -e
16 |
17 | GKE_CLUSTER_OWNER=${GKE_CLUSTER_OWNER:-$USER}
18 | PROJECT=${1:-$GCP_PROJECT_NAME}
19 | ZONE=${2:-us-central1-a}
20 | CLUSTERNAME=${3:-$GKE_CLUSTER_OWNER-test-$(date '+%m%d-%H%M%S')}
21 |
22 | gcloud="gcloud --project $PROJECT"
23 |
24 | bin/help-delete-temporary-clusters "$PROJECT" "$ZONE"
25 | rm -f .cook_kubeconfig_1
26 | rm -f .cook_kubeconfig_2
27 |
28 | # Make 2 clusters.
29 | bin/help-make-cluster "$PROJECT" "$ZONE" "${CLUSTERNAME}"-a .cook_kubeconfig_1 &
30 | bin/help-make-cluster "$PROJECT" "$ZONE" "${CLUSTERNAME}"-b .cook_kubeconfig_2 &
31 | wait
32 |
33 | echo "---- Showing all of the clusters we generated"
34 | filter="resourceLabels.longevity=temporary AND resourceLabels.owner=$GKE_CLUSTER_OWNER"
35 | $gcloud container clusters list --filter "$filter"
36 |
--------------------------------------------------------------------------------
/scheduler/bin/priority-class-cook-workload.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: scheduling.k8s.io/v1
2 | kind: PriorityClass
3 | metadata:
4 | name: cook-workload
5 | value: 1000
6 | globalDefault: false
7 | description: "This priority class should be used for Cook scheduled workloads."
8 |
--------------------------------------------------------------------------------
/scheduler/bin/priority-class-synthetic-pod.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: scheduling.k8s.io/v1
2 | kind: PriorityClass
3 | metadata:
4 | name: synthetic-pod
5 | value: 1
6 | globalDefault: false
7 | description: "This priority class should be used for Cook synthetic pods (trigger autoscaling)."
8 |
--------------------------------------------------------------------------------
/scheduler/bin/run-local-kubernetes.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Usage: ./bin/run-local-kubernetes.sh
4 | # Runs the cook scheduler locally.
5 |
6 | set -e
7 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
8 | SCHEDULER_DIR="$( dirname "${DIR}" )"
9 |
10 | # Defaults (overridable via environment)
11 | : ${COOK_DATOMIC_URI="datomic:mem://cook-jobs"}
12 | : ${COOK_FRAMEWORK_ID:=cook-framework-$(date +%s)}
13 | : ${COOK_KEYSTORE_PATH:="${SCHEDULER_DIR}/cook.p12"}
14 | : ${COOK_NREPL_PORT:=${2:-8888}}
15 | : ${COOK_PORT:=${1:-12321}}
16 | : ${COOK_SSL_PORT:=${3:-12322}}
17 | : ${MASTER_IP:="127.0.0.2"}
18 | : ${ZOOKEEPER_IP:="127.0.0.1"}
19 | : ${MESOS_NATIVE_JAVA_LIBRARY:="/usr/local/lib/libmesos.dylib"}
20 |
21 |
22 | if [ "${COOK_ZOOKEEPER_LOCAL}" = false ] ; then
23 | COOK_ZOOKEEPER="${ZOOKEEPER_IP}:2181"
24 | echo "Cook ZooKeeper configured to ${COOK_ZOOKEEPER}"
25 | else
26 | COOK_ZOOKEEPER=""
27 | COOK_ZOOKEEPER_LOCAL=true
28 | echo "Cook will use local ZooKeeper"
29 | fi
30 |
31 | if [ ! -f "${COOK_KEYSTORE_PATH}" ];
32 | then
33 | keytool -genkeypair -keystore "${COOK_KEYSTORE_PATH}" -storetype PKCS12 -storepass cookstore -dname "CN=cook, OU=Cook Developers, O=Two Sigma Investments, L=New York, ST=New York, C=US" -keyalg RSA -keysize 2048
34 | fi
35 |
36 | echo "Creating environment variables..."
37 | export COOK_DATOMIC_URI="${COOK_DATOMIC_URI}"
38 | export COOK_FRAMEWORK_ID="${COOK_FRAMEWORK_ID}"
39 | export COOK_ONE_USER_AUTH=$(whoami)
40 | export COOK_HOSTNAME="cook-scheduler-${COOK_PORT}"
41 | export COOK_LOG_FILE="log/cook-${COOK_PORT}.log"
42 | export COOK_NREPL_PORT="${COOK_NREPL_PORT}"
43 | export COOK_PORT="${COOK_PORT}"
44 | export COOK_ZOOKEEPER="${COOK_ZOOKEEPER}"
45 | export COOK_ZOOKEEPER_LOCAL="${COOK_ZOOKEEPER_LOCAL}"
46 | export LIBPROCESS_IP="${MASTER_IP}"
47 | export MESOS_MASTER="${MASTER_IP}:5050"
48 | export MESOS_NATIVE_JAVA_LIBRARY="${MESOS_NATIVE_JAVA_LIBRARY}"
49 | export COOK_SSL_PORT="${COOK_SSL_PORT}"
50 | export COOK_KEYSTORE_PATH="${COOK_KEYSTORE_PATH}"
51 |
52 | echo "Getting GKE credentials..."
53 | filter="resourceLabels.longevity=temporary AND resourceLabels.owner=$GKE_CLUSTER_OWNER"
54 | gcloud container clusters list --filter "$filter"
55 | i=1
56 | for cluster_zone in $(gcloud container clusters list --filter "$filter" --format="csv(name,zone)" | tail -n +2)
57 | do
58 | cluster=$(echo "$cluster_zone" | cut -d',' -f1)
59 | zone=$(echo "$cluster_zone" | cut -d',' -f2)
60 | echo "Getting credentials for cluster $cluster in zone $zone ($i)"
61 | KUBECONFIG=.cook_kubeconfig_$i gcloud container clusters get-credentials "$cluster" --zone "$zone"
62 | ((i++))
63 | done
64 | KUBECONFIG=.cook_kubeconfig_1 kubectl get pods --namespace cook
65 | KUBECONFIG=.cook_kubeconfig_2 kubectl get pods --namespace cook
66 |
67 | echo "Starting cook..."
68 | rm -f "$COOK_LOG_FILE"
69 | lein run config-k8s.edn
70 |
--------------------------------------------------------------------------------
/scheduler/bin/sample_launch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | uuid=$(uuidgen)
3 | curl -u vagrant:password -H "content-type: application/json" -XPOST http://localhost:12321/rawscheduler -d '{"jobs": [{"max_retries": 3, "max_runtime": 86400000, "mem": 1000, "cpus": 1.5, "uuid": "'${uuid}'", "command": "echo hello my friend", "name": "test", "priority": 0}]}'
4 | printf "\n"
5 |
--------------------------------------------------------------------------------
/scheduler/bin/start-datomic.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -euf -o pipefail
4 |
5 | PROJECT_DIR="$(dirname $0)/.."
6 | DATOMIC_VERSION="0.9.5561.56"
7 | DATOMIC_DIR="${PROJECT_DIR}/datomic/datomic-free-${DATOMIC_VERSION}"
8 |
9 | if [ ! -d "${DATOMIC_DIR}" ];
10 | then
11 | unzip "${PROJECT_DIR}/datomic/datomic-free-${DATOMIC_VERSION}.zip" -d "${PROJECT_DIR}/datomic"
12 | fi
13 |
14 | COOK_VERSION=$(lein print :version | tr -d '"')
15 |
16 | if [ ! -f "${DATOMIC_DIR}/lib/cook-${COOK_VERSION}.jar" ];
17 | then
18 | lein uberjar
19 | # `lein print :version` would not have worked if nothing was built, so need to
20 | # get version again after building
21 | COOK_VERSION=$(lein print :version | tr -d '"')
22 | cp "${PROJECT_DIR}/target/cook-${COOK_VERSION}.jar" "${DATOMIC_DIR}/lib/"
23 | fi
24 |
25 | "${DATOMIC_DIR}/bin/transactor" $(realpath "${PROJECT_DIR}/datomic/datomic_transactor.properties")
26 |
27 |
28 |
--------------------------------------------------------------------------------
/scheduler/bin/submit-docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | UUID=$(uuidgen)
4 |
5 | curl -XPOST -H"Content-Type: application/json" http://localhost:12321/rawscheduler -d"{\"jobs\": [{\"uuid\": \"$UUID\", \"env\": {\"EXECUTOR_TEST_EXIT\": \"1\"}, \"executor\": \"cook\", \"mem\": 128, \"cpus\": 1, \"command\": \"echo progress: 50 test_progress && exit 0\", \"max_retries\": 1, \"container\": {\"type\": \"DOCKER\", \"docker\": {\"image\": \"python:3.5.9-stretch\", \"network\": \"HOST\", \"force-pull-image\": false}, \"volumes\": [{\"container-path\": \"/Users/paul/src/Cook/executor/dist\", \"host-path\": \"/Users/paul/src/Cook/executor/dist\"}]}}]}"
6 |
--------------------------------------------------------------------------------
/scheduler/datomic/data/seed_k8s_pools.clj:
--------------------------------------------------------------------------------
1 | (ns data.seed-k8s-pools
2 | (:require [cook.datomic :as datomic]
3 | [cook.postgres :as pg]
4 | [cook.quota :as quota]
5 | [datomic.api :as d]))
6 |
7 | (def uri (second *command-line-args*))
8 | (println "Datomic URI is" uri)
9 |
10 | (defn create-pool
11 | [conn name state]
12 | (println "Creating pool" name)
13 | @(d/transact conn [{:db/id (d/tempid :db.part/user)
14 | :pool/name name
15 | :pool/purpose "This is a pool for testing purposes"
16 | :pool/state state
17 | :pool/dru-mode :pool.dru-mode/default}]))
18 |
19 | (defn pools
20 | [db]
21 | (->> (d/q '[:find [?p ...]
22 | :in $ [?state ...]
23 | :where
24 | [?p :pool/state ?state]]
25 | db [:pool.state/active :pool.state/inactive])
26 | (map (partial d/entity db))
27 | (map d/touch)))
28 |
29 | (try
30 | (let [conn (datomic/create-connection {:settings {:mesos-datomic-uri uri}})]
31 | (->> (System/getenv "COOK_DB_TEST_PG_SCHEMA")
32 | (pg/make-database-connection-dictionary-from-env-vars)
33 | (reset! pg/saved-pg-config-dictionary))
34 | (println "Connected to Datomic:" conn)
35 | (create-pool conn "k8s-alpha" :pool.state/active)
36 | (create-pool conn "k8s-beta" :pool.state/inactive)
37 | (create-pool conn "k8s-gamma" :pool.state/active)
38 | (create-pool conn "k8s-delta" :pool.state/inactive)
39 | (create-pool conn "k8s-quota" :pool.state/active)
40 | (quota/set-quota! conn "default" "k8s-alpha" "For quota-related testing." :cpus 8 :mem 1024)
41 | (quota/set-quota! conn "default" "k8s-gamma" "For quota-related testing." :cpus 9 :mem 2048)
42 | (println "Pools & Quotas:")
43 | (run! (fn [{:keys [pool/name] :as p}]
44 | (clojure.pprint/pprint p)
45 | (clojure.pprint/pprint (quota/get-quota (d/db conn) "default" name)))
46 | (pools (d/db conn)))
47 | (System/exit 0))
48 | (catch Throwable t
49 | (println "Failed to seed pools:" t)
50 | (System/exit 1)))
51 |
--------------------------------------------------------------------------------
/scheduler/datomic/datomic-free-0.9.5561.56.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/scheduler/datomic/datomic-free-0.9.5561.56.zip
--------------------------------------------------------------------------------
/scheduler/datomic/datomic_transactor.properties:
--------------------------------------------------------------------------------
1 | protocol=free
2 | host=0.0.0.0
3 | port=4334
4 |
5 | memory-index-threshold=32m
6 | memory-index-max=256m
7 | object-cache-max=128m
8 |
--------------------------------------------------------------------------------
/scheduler/dev-config.edn:
--------------------------------------------------------------------------------
1 | {:port 12321
2 | :hostname "localhost"
3 | ;; We'll set the user to vagrant, since that's the default for many Vagrant-based Mesos setups
4 | :authorization {:one-user "vagrant"}
5 | :database {:datomic-uri "datomic:mem://cook-jobs"}
6 | :zookeeper {:local? true
7 | ;:local-port 3291 ; Uncomment to change the default port
8 | }
9 | :scheduler {:offer-incubate-ms 15000
10 | :mea-culpa-failure-limit {:default 5
11 | :mesos-master-disconnected 8
12 | ; -1 means no limit
13 | :preempted-by-rebalancer -1}
14 | :task-constraints {:timeout-hours 1
15 | :timeout-interval-minutes 1
16 | :memory-gb 48
17 | :retry-limit 15
18 | :cpus 6}}
19 | :rebalancer {:dru-scale 1}
20 | :mesos {:master "zk://localhost:2181/mesos" ; Assuming Mesos is configured to use Zookeeper and is running locally
21 | :failover-timeout-ms nil ; When we close the instance of Cook, all its tasks are killed by Mesos
22 | :leader-path "/cook-scheduler"}
23 | :unhandled-exceptions {:log-level :error}
24 | :metrics {:jmx true}
25 | :nrepl {:enabled? true
26 | :port 8888}
27 | :log {:file "log/cook.log"
28 | :levels {"datomic.db" :warn
29 | "datomic.peer" :warn
30 | "datomic.kv-cluster" :warn
31 | :default :info}}}
32 |
--------------------------------------------------------------------------------
/scheduler/docker/run-cook.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | DATOMIC_PROPERTIES_FILE=/opt/cook/datomic/datomic_transactor.properties
4 |
5 | echo "alt-host=$(hostname -i | cut -d' ' -f2)" >> ${DATOMIC_PROPERTIES_FILE}
6 | /opt/cook/datomic-free-0.9.5561.56/bin/transactor ${DATOMIC_PROPERTIES_FILE} &
7 | echo "Seeding test data..."
8 | # Needed because seeding pools uses codepaths that access the database.
9 | export COOK_DB_TEST_PG_DB="cook_local"
10 | export COOK_DB_TEST_PG_USER="cook_scheduler"
11 | export COOK_DB_TEST_PG_SERVER="cook-postgres"
12 | export COOK_DB_TEST_PG_SCHEMA="cook_local"
13 | lein exec -p /opt/cook/datomic/data/seed_k8s_pools.clj ${COOK_DATOMIC_URI}
14 | lein exec -p /opt/cook/datomic/data/seed_running_jobs.clj ${COOK_DATOMIC_URI}
15 | lein with-profiles +docker run $1
16 |
--------------------------------------------------------------------------------
/scheduler/docs/clj-http-async-pool.md:
--------------------------------------------------------------------------------
1 | clj-http-async-pool
2 | ===================
3 |
4 | pooling middleware for async clj-http requests
5 |
6 | Usage
7 | -----
8 |
9 | (use '[clj-http-async-pool.router :as http-router])
10 | (def router (http-router/make-router {:hosts #{"www.random.org:80"}}))
11 | (use '[clj-http-async-pool.client :as http])
12 | (http/get router "https://www.random.org/sequences/?min=1&max=42&col=1&format=plain")
13 |
14 | © Two Sigma Open Source, LLC
15 |
--------------------------------------------------------------------------------
/scheduler/docs/dev-getting-started.md:
--------------------------------------------------------------------------------
1 | # Setting up your Cook dev environment
2 |
3 | This document tells you how to set up a Cook dev environment from
4 | scratch. We have to install Clojure itself, Datomic, Docker, and Mesos.
5 |
6 | Prerequisites
7 | =============
8 |
9 | Before beginning, you should already have working installations of Clojure and [Leiningen](https://leiningen.org/).
10 | Refer to those projects' getting started guides for information on how to set
11 | them up.
12 |
13 |
14 | Installing Cook-specific Infrastructure
15 | ========================================
16 |
17 |
18 | Docker
19 | -----
20 |
21 | Install docker by following the instructions on:
22 |
23 | https://docs.docker.com/engine/installation/linux/ubuntulinux/
24 |
25 | There is install docs for all common OSs.
26 |
27 | Minimesos
28 | -----
29 |
30 | Then, install minimesos by following the instructions at
31 | http://minimesos.readthedocs.io/en/latest/ .
32 |
33 |
34 | Once minimesos is installed, you can download and run minimesos itself:
35 |
36 |
37 | ```
38 | mkdir minimesos
39 | cd minimesos
40 |
41 | minimesos init
42 | minimesos up --num-agents 2
43 | ```
44 |
45 | Big Mesos
46 | ---------
47 |
48 | Even if you are using minimesos, you still have to build regular Mesos
49 | to get the `libmesos.so` library (called `libmesos.dylib` on Mac).
50 |
51 | You can either install it directly on your machine or use a docker container with
52 | mesos installed. Here we will only talk about using a docker container.
53 | If you instead want to install mesos on your machine, you can follow the docs here:
54 | http://mesos.apache.org/gettingstarted/
55 |
56 | The following repo contains a DockerFile that will set up Cook,
57 | use this as a starting point to get set up:
58 | https://github.com/wyegelwel/cook-docker
59 |
60 |
61 | Command Line Usage
62 | ==================
63 |
64 | To build and run the project at the command line, copy
65 | `$COOK_DIR/scheduler/dev-config.edn` and edit the copy so that the Mesos master ZooKeeper URL matches
66 | the one returned by `minimesos info`.
67 |
68 | Then run the following, replacing `$MESOS_DIR` with the actual path to your local
69 | Mesos build:
70 |
71 |
72 | ```
73 | cd $COOK_DIR/scheduler
74 | lein uberjar
75 | MESOS_NATIVE_JAVA_LIBRARY=$MESOS_DIR/build/src/.libs/libmesos.so lein run ./local-dev-config.edn
76 | ```
77 |
78 | Test that the server is running properly with:
79 |
80 | ```
81 | curl http://localhost:12321/rawscheduler
82 | ```
83 |
84 | If you get a reply like `"must supply at least one job query param"`, that means Cook is running.
85 |
86 |
87 | Interactive development
88 | =======================
89 |
90 | The dev config will open a nrepl port on the running cook server.
91 | You can connect to this port and then develop, eval and test on the running server.
92 | We have found this greatly speeds up development and is just generally pleasant.
93 |
--------------------------------------------------------------------------------
/scheduler/docs/kubernetes-state.dot:
--------------------------------------------------------------------------------
1 | # A graph showing valid transitions from expected states to expected states. Edges are annotated with
2 | # which kubernetes states the system may be in when it makes the transition.
3 |
4 | digraph g {
5 | Starting -> Starting [label=":waiting\n:missing"]
6 | Starting -> Running [label=":running"]
7 | // (Starting, missing) -> Completed happens for some failed pod submissions
8 | Starting -> Completed [label=":succeeded\n:failed\n:unknown\n:deleting"]
9 |
10 | Running -> Running [label=":running"]
11 | Running -> Completed [label=":waiting\n:succeeded\n:failed\n:unknown\n:missing\n:deleting"]
12 |
13 | Completed -> Completed [label=":waiting\n:running\n:unknown\n:succeeded\n:failed"]
14 | Completed -> Missing [label=":missing\n:deleting"]
15 |
16 | Killed -> Killed [label=":waiting\n:running\n:unknown\n"]
17 | Killed -> Completed [label=":succeeded\n:failed\n:missing\n:deleting"]
18 |
19 | Missing [peripheries=2]
20 | Missing -> Missing [label=":waiting\n:running\n:succeeded\n:failed\n:unknown\n:missing\n:deleting"]
21 | }
22 |
--------------------------------------------------------------------------------
/scheduler/docs/make-kubernetes-namespace.json:
--------------------------------------------------------------------------------
1 | {
2 | "apiVersion": "v1",
3 | "kind": "Namespace",
4 | "metadata": {
5 | "name": "cook",
6 | "labels": {
7 | "name": "cook"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/scheduler/docs/metatransactions.md:
--------------------------------------------------------------------------------
1 | # metatransaction
2 |
3 | ## What is a metatransaction?
4 |
5 | A metatransaction is a way to link datomic transactions in a single logical transaction. Metatransactions have simple symantics, link a datomic transaction to a metatransaction and commit a metatransaction. The library also supplies a [db filter](https://support.cognitect.com/entries/25976096-Filtering-Databases) to remove transactions that have not had their metatransaction committed.
6 |
7 | ## Why use metatransactions?
8 |
9 | Metatransactions allow you to craft smaller transactions and handle streaming data while maintaining the semantics of a transaction.
10 |
11 | ## Usage
12 |
13 | Simple example where a user can submit any number of jobs, one at a time to the server and once all the jobs are sent, a end message is sent to the server. Here we can use metatransactions to logically transact our jobs and only acknowledge them once they are committed.:
14 |
15 | ```Clojure
16 |
17 | (require '[datomic.api :as d]
18 | '[metatransaction.core :as mt])
19 |
20 | (def conn (d/connect uri))
21 |
22 | (mt/setup-metatransaction conn)
23 |
24 | (def job1 (d/squuid)) ; We suggest using squuids to improve indexing
25 |
26 | (d/transact conn [[:metatransaction/include-in job1]
27 | {:db/id (d/tempid :db.part/user)
28 | :job/id 1
29 | :job/uuid job1}])
30 | (d/transact conn [[:metatransaction/commit job1]])
31 |
32 | (def job2 (d/squuid))
33 |
34 | (d/transact conn [[:metatransaction/include-in job2]
35 | {:db/id (d/tempid :db.part/user)
36 | :job/id 1
37 | :job/uuid job2}])
38 | (d/transact conn [[:metatransaction/include-in job2]
39 | {:db/id (d/tempid :db.part/user)
40 | :job/id 2
41 | :job/uuid job2}])
42 |
43 | (d/q '[:find ?job-id ?job-uuid
44 | :where
45 | [?e :job/id ?job-id]
46 | [?e :job/uuid ?job-uuid]]
47 | (d/db conn))
48 | ;; Will print #{[1 job1] [1 job2] [2 job2]}
49 |
50 |
51 |
52 | (d/q '[:find ?job-id ?job-uuid
53 | :where
54 | [?e :job/id ?job-id]
55 | [?e :job/uuid ?job-uuid]]
56 | (mt/db conn))
57 |
58 | ; Will print #{[1 job1]}
59 | ```
60 |
61 | ## License
62 |
63 | © Two Sigma Open Source, LLC
64 |
--------------------------------------------------------------------------------
/scheduler/docs/optimizer.md:
--------------------------------------------------------------------------------
1 | Optimizer
2 | =========
3 |
4 | The optimizer is intended to provide a longer term, holistic plan for the cluster that other components in Cook can consume to inform their operation.
5 | Cook will provide a no-op implementation of an optimizer and allow for plugging in different implementations.
6 |
7 | The optimizer is provided with the current queue, the jobs that are running, the offers that are available and a pluggable feed of hosts that can be purchased.
8 | There are plans to support more plug-ins such as expected demand in the future.
9 | With these inputs, the optimizer produces a 'schedule' of suggestions of what hosts to purchase and matches of jobs and hosts at different time horizons.
10 |
11 | There are plans to have the schedule be fed to the matcher so that it may treat the suggestions of the optimizer as soft constraints.
12 |
13 | The specification of pluggable pieces can be found in [optimizer.clj](scheduler/src/cook/mesos/optimizer.clj).
14 |
--------------------------------------------------------------------------------
/scheduler/docs/reason-code:
--------------------------------------------------------------------------------
1 | 01xxx: Normal
2 | 01000: Normal exit
3 | 01001: Killed by user
4 | 01002: Preempted by rebalancer
5 | 01003: REASON_CONTAINER_PREEMPTED
6 | 01004: REASON_TASK_KILLED_DURING_LAUNCH
7 | 01005: Running
8 | 01006: Scheduling failed on host
9 | 01007: Container initialization timed out
10 | 01008: Killed externally
11 | 01009: Container readiness timed out
12 | 01010: Kubernetes pod submission API error
13 |
14 | 02xxx: Job Misconfiguration
15 | 02000: REASON_CONTAINER_LIMITATION
16 | 02001: REASON_CONTAINER_LIMITATION_DISK
17 | 02002: REASON_CONTAINER_LIMITATION_MEMORY
18 | 02003: Max runtime exceeded
19 | 02004: Task was a straggler
20 |
21 | 03xxx: Cook Error
22 | 03000: REASON_RECONCILIATION
23 | 03001: REASON_INVALID_FRAMEWORKID
24 | 03002: REASON_INVALID_OFFERS
25 | 03003: REASON_RESOURCES_UNKNOWN
26 | 03004: REASON_TASK_INVALID
27 | 03005: REASON_TASK_UNAUTHORIZED
28 | 03006: REASON_TASK_UNKNOWN
29 | 03007: REASON_SLAVE_UNKNOWN
30 | 03008: Could not reconstruct pod
31 |
32 | 04xxx: Mesos Slave Error
33 | 04000: REASON_SLAVE_REMOVED
34 | 04001: REASON_SLAVE_RESTARTED
35 | 04002: REASON_GC_ERROR
36 | 04003: REASON_CONTAINER_LAUNCH_FAILED
37 | 04004: REASON_CONTAINER_UPDATE_FAILED
38 | 04005: REASON_SLAVE_DISCONNECTED
39 | 04006: Cook heartbeat lost
40 |
41 | 05xxx: Mesos Master Error
42 | 05000: REASON_FRAMEWORK_REMOVED
43 | 05001: REASON_MASTER_DISCONNECTED
44 |
45 | 06xxx: Executor Error
46 | 06000: REASON_EXECUTOR_REGISTRATION_TIMEOUT
47 | 06001: REASON_EXECUTOR_REREGISTRATION_TIMEOUT
48 | 06002: REASON_EXECUTOR_UNREGISTERED
49 |
50 | 99xxx: General Error
51 | 99000: unknown reason
52 | 99001: unknown mesos reason
53 | 99002: REASON_EXECUTOR_TERMINATED
54 | 99003: Exited non-zero
55 |
--------------------------------------------------------------------------------
/scheduler/example-prod-config.edn:
--------------------------------------------------------------------------------
1 | {:port 12321
2 | :hostname "cook.example.com"
3 | :authorization {:http-basic true}
4 | :database {:datomic-uri "datomic:free://example.com:4334/cook-jobs"}
5 | :authorization-config {
6 | ;; What function should be used to perform user authorization?
7 | ;; See the docstring in cook.rest.authorization for details.
8 | :authorization-fn cook.rest.authorization/configfile-admins-auth
9 |
10 |
11 | ;; These users have admin privileges when using
12 | ;; configfile-admins-auth -- that is, they can view and modify other
13 | ;; users' jobs.
14 | :admins #{"admin" "other-admin"}}
15 | :zookeeper {:connection "zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181/cook"}
16 | :scheduler {:offer-incubate-ms 15000
17 | :mea-culpa-failure-limit 5
18 | :task-constraints {:timeout-hours 24
19 | :timeout-interval-minutes 10
20 | :memory-gb 96
21 | :retry-limit 15
22 | :cpus 20}}
23 | :executor {:command "./cook-executor"
24 | :uri {:cache true
25 | :executable true
26 | :extract false
27 | :value "http://example.com:12321/resources/cook-executor"}}
28 | :mesos {:master "zk://zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181/cook"
29 | :failover-timeout-ms 1209600000
30 | :leader-path "/cook-scheduler"}
31 | :unhandled-exceptions {:log-level :error
32 | :email {:to ["admin@example.com"]
33 | :from "cook@example.com"
34 | :subject "Unhandled exception in cook"}}
35 | :metrics {:jmx true}
36 | :nrepl {:enabled? true
37 | :port 8888}
38 | :log {:file "log/cook.log"
39 | :levels {"datomic.db" :warn
40 | "datomic.peer" :warn
41 | "datomic.kv-cluster" :warn
42 | :default :info}}}
43 |
--------------------------------------------------------------------------------
/scheduler/java/com/twosigma/cook/kubernetes/FinalizerHelper.java:
--------------------------------------------------------------------------------
1 | package com.twosigma.cook.kubernetes;
2 |
3 | import io.kubernetes.client.custom.V1Patch;
4 | import io.kubernetes.client.openapi.ApiClient;
5 | import io.kubernetes.client.openapi.ApiException;
6 | import io.kubernetes.client.openapi.apis.CoreV1Api;
7 | import io.kubernetes.client.openapi.models.V1Pod;
8 | import io.kubernetes.client.util.PatchUtils;
9 | import org.joda.time.DateTime;
10 |
11 | import java.util.List;
12 |
13 | public class FinalizerHelper {
14 | /** A finalizer that is attached to a pod to ensure that it is not GC'ed by K8s before cook
15 | * has had a chance to collect the completion result (success or failed) */
16 | static public final String collectResultsFinalizer = "cook/prevent-pod-gc";
17 |
18 | /** Remove the collectResultsFinalizer from a pod if it exists on a pod and the pod is morked for
19 | * deletion. */
20 | static public void removeFinalizer(ApiClient apiClient, V1Pod pod) throws ApiException {
21 | CoreV1Api api = new CoreV1Api(apiClient);
22 |
23 | DateTime deletionTimestamp = pod.getMetadata().getDeletionTimestamp();
24 | if (deletionTimestamp != null) {
25 | List finalizers = pod.getMetadata().getFinalizers();
26 | if (finalizers != null) {
27 | for (int ii = 0; ii < finalizers.size(); ii++) {
28 | if (collectResultsFinalizer.equals(finalizers.get(ii))) {
29 | String jsonPatchStr = "[{\"op\": \"remove\", \"path\": \"/metadata/finalizers/" + ii + "\"}]";
30 | String podName = pod.getMetadata().getName();
31 | String namespaceName = pod.getMetadata().getNamespace();
32 | PatchUtils.patch(
33 | V1Pod.class,
34 | () ->
35 | api.patchNamespacedPodCall(
36 | podName,
37 | namespaceName,
38 | new V1Patch(jsonPatchStr),
39 | null,
40 | null,
41 | null, // field-manager is optional
42 | null,
43 | null),
44 | V1Patch.PATCH_FORMAT_JSON_PATCH,
45 | apiClient);
46 | return; // Early abort if we've found the finalizer.
47 | }
48 | }
49 | }
50 | }
51 | }
52 | }
53 |
54 |
--------------------------------------------------------------------------------
/scheduler/java/com/twosigma/cook/kubernetes/WatchHelper.java:
--------------------------------------------------------------------------------
1 | package com.twosigma.cook.kubernetes;
2 |
3 | import com.google.common.reflect.TypeToken;
4 | import io.kubernetes.client.openapi.ApiClient;
5 | import io.kubernetes.client.openapi.ApiException;
6 | import io.kubernetes.client.openapi.apis.CoreV1Api;
7 | import io.kubernetes.client.openapi.models.CoreV1Event;
8 | import io.kubernetes.client.openapi.models.V1Node;
9 | import io.kubernetes.client.openapi.models.V1Pod;
10 | import io.kubernetes.client.util.Watch;
11 |
12 | public class WatchHelper {
13 |
14 | public static Watch createPodWatch(ApiClient apiClient, String resourceVersion) throws ApiException {
15 | CoreV1Api api = new CoreV1Api(apiClient);
16 | return Watch.createWatch(apiClient,
17 | api.listPodForAllNamespacesCall(null, null, null, null, null, null,
18 | resourceVersion, null, null, true, null),
19 | new TypeToken>() {}.getType());
20 | }
21 |
22 | public static Watch createNodeWatch(ApiClient apiClient, String resourceVersion) throws ApiException {
23 | CoreV1Api api = new CoreV1Api(apiClient);
24 | return Watch.createWatch(apiClient,
25 | api.listNodeCall(null, null, null, null, null, null, resourceVersion, null, null, true, null),
26 | new TypeToken>() {}.getType());
27 | }
28 |
29 | public static Watch createEventWatch(ApiClient apiClient, String resourceVersion) throws ApiException {
30 | CoreV1Api api = new CoreV1Api(apiClient);
31 | return Watch.createWatch(apiClient,
32 | api.listEventForAllNamespacesCall(null, null, null, null, null,
33 | null, resourceVersion, null, null, true, null),
34 | new TypeToken>() {}.getType());
35 | }
36 | }
--------------------------------------------------------------------------------
/scheduler/liquibase/changelog/com/twosigma/cook/changelogs/setup.postgresql.sql:
--------------------------------------------------------------------------------
1 | --liquibase formatted sql
2 |
3 | -- Initialize a cook database from scratch --- creating the schemas and such.
4 | -- Assumes we already have an appropriately configured postgresql database and
5 | -- have psql connected to it.
6 |
7 | -- If you get a crazy error where 'No schema has been selected to
8 | -- create in' when running the first CREATE TABLE. It can be caused by there being a capital
9 | -- letter in cook_schema. Schema names are lowercased when created, but case-sensitive when
10 | -- in the search path.
--------------------------------------------------------------------------------
/scheduler/postgresql/bin/make-launch-postgres-docker.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ###
4 | ### Reset any existing postgres docker container and make and configure one afresh.
5 | ###
6 | ### Sets the password to $PGPASSWORD
7 | ###
8 |
9 | if [[ a"$PGPASSWORD" == a ]];
10 | then
11 | echo "Need to set PGPASSWORD."
12 | exit 1
13 | fi
14 |
15 |
16 | ## Copied from run-docker.sh
17 | echo "About to: Setup and check docker networking"
18 | if [ -z "$(docker network ls -q -f name=cook_nw)" ];
19 | then
20 | # Using a separate network allows us to access hosts by name (cook-scheduler-12321)
21 | # instead of IP address which simplifies configuration
22 | echo "Creating cook_nw network"
23 | docker network create -d bridge --subnet 172.25.0.0/16 cook_nw
24 | fi
25 |
26 |
27 | echo "#### Flushing existing docker containers `date`"
28 |
29 | # Flush any existing containers.
30 | docker kill cook-postgres || true
31 | docker container rm cook-postgres || true
32 |
33 | echo "#### Launching database `date`"
34 |
35 | # This launches the database. We give it a hostname of cook-postgres so that we can connect to
36 | # the container using psql -h ...., later.
37 | docker run --name cook-postgres --hostname cook-postgres --publish=5432:5432 --rm --network cook_nw -e POSTGRES_PASSWORD="${PGPASSWORD}" -d postgres:13
38 |
39 | echo "#### Pausing for the DB to restart before setting it up."
40 | sleep 4
41 |
42 | export COOK_SCHEMA=cook_local
43 |
44 | #
45 | # Finish postgres setup in the container.
46 | #
47 |
48 | # Create the initial cook account and database.
49 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd)"
50 | ${DIR}/setup-database.sh
51 |
52 | # See the README.txt to see how to access this interactively.
53 |
--------------------------------------------------------------------------------
/scheduler/postgresql/bin/setup-database.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd)"
4 |
5 | # Create the initial cook account and database.
6 | echo "#### Initializing new account and database."
7 | psql --set=cook_user_password="$PGPASSWORD" -h 127.0.0.1 -U postgres -f ${DIR}/../sql/docker_init_new_database.sql
8 |
9 | echo "#### Running script to create convenience SQL schema cook_local"
10 | export COOK_DB_TEST_PG_DATABASE=cook_local
11 | export COOK_DB_TEST_PG_USER=cook_scheduler
12 | export COOK_DB_TEST_PG_SERVER=cook-postgres
13 | ${DIR}/setup-new-schema.sh ${COOK_SCHEMA}
14 |
15 | echo "#### Setting up rows for opensource integration tests."
16 | psql --set=cook_schema="${COOK_SCHEMA}" -h 127.0.0.1 -U cook_scheduler -d cook_local -f ${DIR}/../sql/insert_rows_for_opensource_integration_tests.sql
17 |
18 |
--------------------------------------------------------------------------------
/scheduler/postgresql/bin/setup-new-schema.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd)"
4 |
5 | COOK_SCHEMA=${1}
6 |
7 | echo "### Started script to create schema '${COOK_SCHEMA}' out of directory $DIR/../sql"
8 |
9 | # Liquibase setup:
10 | echo "## Running PSQL to create schema"
11 | psql --set=cook_schema="${COOK_SCHEMA}" -h 127.0.0.1 -U cook_scheduler -d cook_local -f ${DIR}/../sql/init_cook_database.sql
12 |
13 | echo "## Liquibase setup."
14 | LIQUIBASE="${DIR}/../../liquibase"
15 |
16 | export COOK_DB_TEST_PG_DATABASE=cook_local
17 | export COOK_DB_TEST_PG_USER=cook_scheduler
18 | export COOK_DB_TEST_PG_SERVER=cook-postgres
19 |
20 | PG_JDBC_URL="jdbc:postgresql://${COOK_DB_TEST_PG_SERVER}/${COOK_DB_TEST_PG_DATABASE}?user=${COOK_DB_TEST_PG_USER}&password=${PGPASSWORD}¤tSchema=${COOK_SCHEMA}"
21 |
22 | # Note that --changeLogFile is relative to /liquibase in the container, so comes from the -v volume mountpoint, and MUST be a relative path.
23 | docker run --network cook_nw --rm -v ${LIQUIBASE}/changelog:/liquibase/changelog liquibase/liquibase:4.6 --changeLogFile=./changelog/com/twosigma/cook/changelogs/setup.postgresql.sql --url ${PG_JDBC_URL} --liquibase-schema-name=${COOK_SCHEMA} update
24 |
25 | echo "### Finished script creating schema ${COOK_SCHEMA}"
26 |
--------------------------------------------------------------------------------
/scheduler/postgresql/bin/vagrant-setup-database.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd)"
4 |
5 | # Create the initial cook account and database.
6 | echo "#### Initializing new account and database."
7 | sudo -u postgres psql --set=cook_user_password="$PGPASSWORD" -f ${DIR}/../sql/docker_init_new_database.sql
8 |
9 | echo "#### Running script to create convenience SQL schema cook_local"
10 | ${DIR}/vagrant-setup-new-schema.sh ${COOK_SCHEMA}
11 |
12 | echo "#### Setting up rows for opensource integration tests."
13 | psql --set=cook_schema="${COOK_SCHEMA}" -h 127.0.0.1 -U cook_scheduler -d cook_local -f ${DIR}/../sql/insert_rows_for_opensource_integration_tests.sql
14 |
15 |
--------------------------------------------------------------------------------
/scheduler/postgresql/bin/vagrant-setup-new-schema.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd)"
4 |
5 | COOK_SCHEMA=${1}
6 |
7 | echo "### Started script to create schema '${COOK_SCHEMA}' out of directory $DIR/../sql"
8 |
9 | # Liquibase setup:
10 | echo "## Running PSQL to create schema"
11 | psql --set=cook_schema="${COOK_SCHEMA}" -h 127.0.0.1 -U cook_scheduler -d cook_local -f ${DIR}/../sql/init_cook_database.sql
12 |
13 | echo "## Liquibase setup."
14 |
15 | PG_JDBC_URL="jdbc:postgresql://${COOK_DB_TEST_PG_SERVER}/${COOK_DB_TEST_PG_DATABASE}?user=${COOK_DB_TEST_PG_USER}&password=${PGPASSWORD}¤tSchema=${COOK_SCHEMA}"
16 |
17 | # Note that liquibase must run from scheduler/liquibase and --changeLogFile is relative to scheduler/liquibase and MUST be a relative path.
18 | cd ${DIR}/../../liquibase
19 | liquibase --classpath=/usr/share/java/postgresql.jar --changeLogFile=changelog/com/twosigma/cook/changelogs/setup.postgresql.sql --url ${PG_JDBC_URL} --liquibaseSchemaName=${COOK_SCHEMA} update
20 |
21 | echo "### Finished script creating schema ${COOK_SCHEMA}"
22 |
--------------------------------------------------------------------------------
/scheduler/postgresql/sql/docker_init_new_database.sql:
--------------------------------------------------------------------------------
1 | -- Create a cook scheduler database from a branch new database, including
2 | -- creating the initial user. Intended to be run for docker setup in opensource only.
3 |
4 | -- When in docker-land, we use the database username cook_scheduler
5 | DROP DATABASE IF EXISTS cook_local;
6 | ALTER DEFAULT PRIVILEGES REVOKE ALL ON TABLES FROM cook_scheduler;
7 | ALTER DEFAULT PRIVILEGES REVOKE ALL ON SCHEMAS FROM cook_scheduler;
8 | DROP ROLE IF EXISTS cook_scheduler;
9 | CREATE ROLE cook_scheduler with password :'cook_user_password' LOGIN;
10 | CREATE DATABASE cook_local WITH owner cook_scheduler;
11 |
12 | -- Ensure that all schemas on this database are writeable by cook_scheduler user.
13 | ALTER DEFAULT PRIVILEGES GRANT ALL ON SCHEMAS TO cook_scheduler;
14 | ALTER DEFAULT PRIVILEGES GRANT ALL ON TABLES TO cook_scheduler;
15 |
--------------------------------------------------------------------------------
/scheduler/postgresql/sql/init_cook_database.sql:
--------------------------------------------------------------------------------
1 | -- Initialize a cook database from scratch --- creating the schemas and such.
2 | -- Assumes we already have an appropriately configured postgresql database and
3 | -- have psql connected to it.
4 |
5 | BEGIN TRANSACTION;
6 | -- Always run this in the transaction so that if the set schema fails for any reason, we abort instead of possibly writing to the wrong schema's tables.
7 | CREATE SCHEMA :cook_schema;
8 | SET SCHEMA :'cook_schema';
9 |
10 | COMMIT
11 |
12 | -- Just show the tables at the end.
13 | \dt :'cook_schema'.
14 |
--------------------------------------------------------------------------------
/scheduler/postgresql/sql/insert_rows_for_opensource_integration_tests.sql:
--------------------------------------------------------------------------------
1 | -- Insert some rows for development and running tests in open source, including initial quotas and pools
2 | -- for the integration tests.
3 |
4 | --- DO NOT RUN IN PRODUCTION.
5 | begin transaction;
6 | -- Always run this in the transaction so that if the set schema fails for any reason, we abort instead of possibly writing to the wrong schema's tables.
7 | SET SCHEMA :'cook_schema';
8 |
9 |
10 | insert into pools VALUES ('k8s-alpha',true,'');
11 | insert into pools VALUES ('k8s-beta',false,'');
12 | insert into pools VALUES ('k8s-gamma',true,'');
13 | insert into pools VALUES ('k8s-delta',false,'');
14 |
15 | insert into resource_limits VALUES ('quota','k8s-alpha','default','mem',1000000, '');
16 | insert into resource_limits VALUES ('quota','k8s-alpha','default','cpus',1000000, '');
17 | insert into resource_limits VALUES ('quota','k8s-beta','default','mem',1000000, '');
18 | insert into resource_limits VALUES ('quota','k8s-beta','default','cpus',1000000, '');
19 | end transaction;
20 |
--------------------------------------------------------------------------------
/scheduler/postgresql/sql/reset_cook_database.sql:
--------------------------------------------------------------------------------
1 | -- WIPE THE DATABASE!
2 | -- Assumes we already have an appropriately configured postgresql database and
3 | -- have psql connected to it.
4 |
5 | -- Drops the schema of a cook database so it can be recreated.
6 | DROP SCHEMA IF EXISTS :cook_schema CASCADE;
7 |
--------------------------------------------------------------------------------
/scheduler/postgresql/sql/reset_init_cook_database.sql:
--------------------------------------------------------------------------------
1 | -- WIPES THE DATABASE
2 | -- Reinitialize a cook database, wiping all of the contents first.
3 | -- Assumes we already have an appropriately configured postgresql database and
4 | -- have psql connected to it.
5 |
6 | \ir reset_cook_database.sql
7 | \ir init_cook_database.sql
8 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg*
2 | *.egg-info
3 | *.ipynb_checkpoints*
4 | *pyc
5 | __pycache__
6 | venv/
7 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/README.md:
--------------------------------------------------------------------------------
1 | # Cook analysis
2 |
3 | Tools to analyze a trace of tasks run in Cook.
4 |
5 | The python notebook included here provides samples for how to use the functions.
6 |
7 |
8 | ## Credits
9 |
10 | This package was created with (Cookiecutter)[https://github.com/audreyr/cookiecutter] and the (audreyr/cookiecutter-pypackage)[https://github.com/audreyr/cookiecutter-pypackage] project template.
11 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | pip==8.1.2
2 | bumpversion==0.5.3
3 | wheel==0.29.0
4 | watchdog==0.8.3
5 | flake8==2.6.0
6 | coverage==4.1
7 | Sphinx==1.4.8
8 | cryptography==1.7
9 | PyYAML==5.1
10 | pandas>=0.19.2
11 | matplotlib==2.0.0
12 | numpy==1.12.1
13 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/setup.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.1.0
3 | commit = True
4 | tag = True
5 |
6 | [bumpversion:file:setup.py]
7 | search = version='{current_version}'
8 | replace = version='{new_version}'
9 |
10 | [bumpversion:file:cook_integration/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 |
14 | [bdist_wheel]
15 | universal = 1
16 |
17 | [flake8]
18 | exclude = docs
19 |
20 | [nosetests]
21 | processes=10
22 | process-timeout=900
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from setuptools import setup
5 |
6 | requirements = [
7 | 'pandas',
8 | 'matplotlib',
9 | 'numpy'
10 | ]
11 |
12 | test_requirements = []
13 |
14 | setup(
15 | name='cook_analysis',
16 | version='0.1.0',
17 | description="Functions to analyze trace output from cook scheduler",
18 | url='https://github.com/twosigma/Cook',
19 | include_package_data=True,
20 | install_requires=requirements,
21 | license="Apache Software License 2.0",
22 | zip_safe=False,
23 | keywords='cook_analysis',
24 | classifiers=[
25 | 'Development Status :: 2 - Pre-Alpha',
26 | 'Intended Audience :: Developers',
27 | 'License :: OSI Approved :: Apache Software License',
28 | 'Natural Language :: English',
29 | "Programming Language :: Python :: 2",
30 | 'Programming Language :: Python :: 2.6',
31 | 'Programming Language :: Python :: 2.7',
32 | 'Programming Language :: Python :: 3',
33 | 'Programming Language :: Python :: 3.3',
34 | 'Programming Language :: Python :: 3.4',
35 | 'Programming Language :: Python :: 3.5',
36 | ],
37 | test_suite='tests',
38 | tests_require=test_requirements,
39 | setup_requires=[]
40 | )
41 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/analysis/tests/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', level=logging.DEBUG)
4 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/example-config.edn:
--------------------------------------------------------------------------------
1 | {:shares [{:user "default" :mem 60000.0 :cpus 600.0 :gpus 1.0}]
2 | :cycle-step-ms 30000
3 | :scheduler-config {:rebalancer-config {:max-preemption 10.0}
4 | :fenzo-config {:fenzo-max-jobs-considered 200}}}
5 |
--------------------------------------------------------------------------------
/scheduler/simulator_files/example-hosts.json:
--------------------------------------------------------------------------------
1 | [ {
2 | "hostname" : "0",
3 | "attributes" : { },
4 | "resources" : {
5 | "cpus" : {
6 | "*" : 10
7 | },
8 | "mem" : {
9 | "*" : 10000
10 | },
11 | "ports" : {
12 | "*" : [ {
13 | "begin" : 1,
14 | "end" : 100
15 | } ]
16 | }
17 | },
18 | "slave-id" : "a05a4ac2-7eb2-40a1-8259-111b961874c1"
19 | }, {
20 | "hostname" : "1",
21 | "attributes" : { },
22 | "resources" : {
23 | "cpus" : {
24 | "*" : 10
25 | },
26 | "mem" : {
27 | "*" : 10000
28 | },
29 | "ports" : {
30 | "*" : [ {
31 | "begin" : 1,
32 | "end" : 100
33 | } ]
34 | }
35 | },
36 | "slave-id" : "eba83456-8596-44c2-9de5-fb29cfaf1647"
37 | }, {
38 | "hostname" : "2",
39 | "attributes" : { },
40 | "resources" : {
41 | "cpus" : {
42 | "*" : 10
43 | },
44 | "mem" : {
45 | "*" : 10000
46 | },
47 | "ports" : {
48 | "*" : [ {
49 | "begin" : 1,
50 | "end" : 100
51 | } ]
52 | }
53 | },
54 | "slave-id" : "4696a9c8-fc9f-46b4-8a83-23b384f3f616"
55 | }, {
56 | "hostname" : "3",
57 | "attributes" : { },
58 | "resources" : {
59 | "cpus" : {
60 | "*" : 10
61 | },
62 | "mem" : {
63 | "*" : 10000
64 | },
65 | "ports" : {
66 | "*" : [ {
67 | "begin" : 1,
68 | "end" : 100
69 | } ]
70 | }
71 | },
72 | "slave-id" : "a45be252-35f7-434a-8ec3-1e85265213ac"
73 | }, {
74 | "hostname" : "4",
75 | "attributes" : { },
76 | "resources" : {
77 | "cpus" : {
78 | "*" : 10
79 | },
80 | "mem" : {
81 | "*" : 10000
82 | },
83 | "ports" : {
84 | "*" : [ {
85 | "begin" : 1,
86 | "end" : 100
87 | } ]
88 | }
89 | },
90 | "slave-id" : "67194c23-61d6-4901-87a8-38d84af57f95"
91 | } ]
--------------------------------------------------------------------------------
/scheduler/src/cook/cached_queries.clj:
--------------------------------------------------------------------------------
1 | (ns cook.cached-queries
2 | (:require [cook.cache :as ccache]
3 | [cook.caches :as caches]
4 | [cook.config :as config]
5 | [cook.datomic :as datomic]
6 | [datomic.api :as d :refer [q]]))
7 |
8 | (let [miss-fn
9 | (fn [{:keys [job/pool]}]
10 | (or (:pool/name pool)
11 | (config/default-pool)
12 | "no-pool"))]
13 | (defn job->pool-name
14 | "Return the pool name of the job. Guaranteed non nil."
15 | [job]
16 | (caches/lookup-cache-datomic-entity! caches/job-ent->pool-cache miss-fn job)))
17 |
18 | (defn job-ent->user
19 | "Given a job entity, return the user the job runs as."
20 | [job-ent]
21 | (caches/lookup-cache-datomic-entity! caches/job-ent->user-cache :job/user job-ent))
22 |
23 | (defn instance-uuid->job-uuid-datomic-query
24 | "Queries for the job uuid from an instance uuid.
25 | Returns nil if the instance uuid doesn't correspond
26 | to a job"
27 | [db instance-uuid]
28 | (->> (d/entity db [:instance/task-id (str instance-uuid)])
29 | :job/_instance
30 | :job/uuid))
31 |
32 | (let [miss-fn
33 | (fn [instance-uuid]
34 | (str (instance-uuid->job-uuid-datomic-query (d/db datomic/conn) instance-uuid)))]
35 | (defn instance-uuid->job-uuid-cache-lookup
36 | "Get job-uuid from cache if it is present, else search datomic for it"
37 | [instance-uuid]
38 | (ccache/lookup-cache! caches/instance-uuid->job-uuid identity miss-fn instance-uuid)))
39 |
40 | (let [miss-fn
41 | (fn [job-uuid]
42 | (d/entity (d/db datomic/conn) [:job/uuid job-uuid]))]
43 | (defn job-uuid->job-map-cache-lookup
44 | "Get job-map from cache if it is present, else search datomic for it"
45 | [job-uuid]
46 | (ccache/lookup-cache! caches/job-uuid->job-map identity miss-fn job-uuid)))
47 |
--------------------------------------------------------------------------------
/scheduler/src/cook/caches.clj:
--------------------------------------------------------------------------------
1 | (ns cook.caches
2 | (:require [chime]
3 | [cook.cache :as ccache]
4 | [cook.config :as config]
5 | [mount.core :as mount])
6 | (:import (com.google.common.cache Cache CacheBuilder)
7 | (java.util.concurrent TimeUnit)))
8 |
9 | (defn new-cache [config]
10 | "Build a new cache"
11 | (-> (CacheBuilder/newBuilder)
12 | (.maximumSize (get-in config [:settings :cache-working-set-size]))
13 | ;; if its not been accessed in 2 hours, whatever is going on, its not being visted by the
14 | ;; scheduler loop anymore. E.g., its probably failed/done and won't be needed. So,
15 | ;; lets kick it out to keep cache small.
16 | (.expireAfterAccess 2 TimeUnit/HOURS)
17 | (.build)))
18 |
19 | (defn passport-cache [config]
20 | "Build a new passport-related cache"
21 | (-> (CacheBuilder/newBuilder)
22 | (.maximumSize (get-in config [:settings :passport :job-cache-set-size]))
23 | (.expireAfterAccess (get-in config [:settings :passport :job-cache-expiry-time-hours]) TimeUnit/HOURS)
24 | (.build)))
25 |
26 | (defn lookup-cache-datomic-entity!
27 | "Specialized function for caching where datomic entities are the key.
28 | Extracts :db/id so that we don't keep the entity alive in the cache."
29 | [cache miss-fn entity]
30 | (ccache/lookup-cache! cache :db/id miss-fn entity))
31 |
32 | (mount/defstate ^Cache job-ent->resources-cache :start (new-cache config/config))
33 | (mount/defstate ^Cache job-ent->pool-cache :start (new-cache config/config))
34 | (mount/defstate ^Cache task-ent->user-cache :start (new-cache config/config))
35 | (mount/defstate ^Cache job-ent->user-cache :start (new-cache config/config))
36 | (mount/defstate ^Cache task->feature-vector-cache :start (new-cache config/config))
37 | (mount/defstate ^Cache user->group-ids-cache :start (new-cache config/config))
38 | (mount/defstate ^Cache recent-synthetic-pod-job-uuids :start
39 | (-> (CacheBuilder/newBuilder)
40 | (.maximumSize (:synthetic-pod-recency-size (config/kubernetes)))
41 | ; We blocklist a given job from being autoscaled soon after a prior autoscaling.
42 | (.expireAfterWrite (:synthetic-pod-recency-seconds (config/kubernetes)) TimeUnit/SECONDS)
43 | (.build)))
44 | (mount/defstate ^Cache pool-name->exists?-cache :start (new-cache config/config))
45 | (mount/defstate ^Cache pool-name->accepts-submissions?-cache :start (new-cache config/config))
46 | (mount/defstate ^Cache pool-name->db-id-cache :start (new-cache config/config))
47 | (mount/defstate ^Cache user-and-pool-name->quota :start (new-cache config/config))
48 | (mount/defstate ^Cache instance-uuid->job-uuid :start (passport-cache config/config))
49 | (mount/defstate ^Cache job-uuid->job-map :start (passport-cache config/config))
--------------------------------------------------------------------------------
/scheduler/src/cook/compute_cluster/metrics.clj:
--------------------------------------------------------------------------------
1 | (ns cook.compute-cluster.metrics
2 | (:require [metrics.timers :as timers]))
3 |
4 | (defn calculate-name
5 | "Given a metric name and compute cluster name, come up with the metric path to use."
6 | [metric-name compute-cluster-name]
7 | ["cook"
8 | metric-name
9 | (str "compute-cluster-" compute-cluster-name)])
10 |
11 | (defn timer
12 | "Given a metric name and a compute cluster name, returns a timer metric."
13 | [metric-name compute-cluster-name]
14 | (timers/timer (calculate-name metric-name compute-cluster-name)))
15 |
--------------------------------------------------------------------------------
/scheduler/src/cook/curator.clj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/scheduler/src/cook/curator.clj
--------------------------------------------------------------------------------
/scheduler/src/cook/kubernetes/metrics.clj:
--------------------------------------------------------------------------------
1 | (ns cook.kubernetes.metrics
2 | (:require [metrics.core :as core]
3 | [metrics.counters :as counters]
4 | [metrics.meters :as meters]
5 | [metrics.timers :as timers])
6 | (:import (com.codahale.metrics Histogram MetricRegistry MetricRegistry$MetricSupplier SlidingTimeWindowArrayReservoir)
7 | (java.util.concurrent TimeUnit)))
8 |
9 | (defn calculate-name
10 | "Given a metric name and compute cluster name, come up with the metric path to use."
11 | [metric-name compute-cluster-name]
12 | ["cook-k8s"
13 | metric-name
14 | (str "compute-cluster-" compute-cluster-name)])
15 |
16 | (defn counter
17 | "Given a metric name and a compute cluster name, returns a counter metric."
18 | [metric-name compute-cluster-name]
19 | (counters/counter (calculate-name metric-name compute-cluster-name)))
20 |
21 | (defn meter
22 | "Given a metric name and a compute cluster name, returns a meter metric."
23 | [metric-name compute-cluster-name]
24 | (meters/meter (calculate-name metric-name compute-cluster-name)))
25 |
26 | (defn timer
27 | "Given a metric name and a compute cluster name, returns a timer metric."
28 | [metric-name compute-cluster-name]
29 | (timers/timer (calculate-name metric-name compute-cluster-name)))
30 |
31 | (def histogram-supplier
32 | (reify
33 | MetricRegistry$MetricSupplier
34 | (newMetric [_]
35 | (Histogram.
36 | ; The default implementation of `Reservoir` in dropwizard metrics is
37 | ; `ExponentiallyDecayingReservoir`, which stores data samples for some
38 | ; time. When new samples stop arriving, it uses the historical data and
39 | ; returns the same characteristics for the data distribution again and
40 | ; again, simply because the data distribution doesn’t change. Here we
41 | ; switch from the default `ExponentiallyDecayingReservoir` to a sliding
42 | ; time window reservoir, which gives zeros when there is no data. See
43 | ; https://engineering.salesforce.com/be-careful-with-reservoirs-708884018daf
44 | ; for more information.
45 | (SlidingTimeWindowArrayReservoir. 300 TimeUnit/SECONDS)))))
46 |
47 | (defn histogram
48 | "Given a metric name and a compute cluster name, returns a histogram metric."
49 | [metric-name compute-cluster-name]
50 | (.histogram
51 | ^MetricRegistry core/default-registry
52 | (core/metric-name
53 | (calculate-name metric-name compute-cluster-name))
54 | histogram-supplier))
55 |
--------------------------------------------------------------------------------
/scheduler/src/cook/mesos/reason.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.mesos.reason
17 | (:require [clojure.tools.logging :as log]
18 | [datomic.api :as d]))
19 |
20 | (defn reason-code->reason-entity
21 | [db reason-code]
22 | (d/entity db [:reason/code reason-code]))
23 |
24 | (defn reason-code->reason-string
25 | [db reason-code]
26 | (:reason/string (reason-code->reason-entity db reason-code)))
27 |
28 | (defn mesos-reason->cook-reason-entity-id
29 | [db task-id mesos-reason]
30 | (if-let [reason-entity-id (:db/id (d/entity db [:reason/mesos-reason mesos-reason]))]
31 | reason-entity-id
32 | (do
33 | (log/warn "Unknown mesos reason:" mesos-reason "for task" task-id)
34 | (:db/id (d/entity db [:reason/name :mesos-unknown])))))
35 |
36 | (defn instance-entity->reason-entity
37 | [db instance]
38 | (or (:instance/reason instance)
39 | (reason-code->reason-entity db (:instance/reason-code instance))))
40 |
41 | (defn all-known-reasons
42 | "Returns a list of Datomic entities corresponding to all
43 | of the currently defined failure reasons."
44 | [db]
45 | (map (partial d/entity db)
46 | (d/q '[:find [?e ...]
47 | :in $
48 | :where
49 | [?e :reason/code]]
50 | db)))
51 |
52 | (defn default-failure-limit
53 | [db]
54 | (:scheduler.config/mea-culpa-failure-limit (d/entity db :scheduler/config)))
55 |
--------------------------------------------------------------------------------
/scheduler/src/cook/passport.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.passport
17 | (:require [clojure.data.json :as json]
18 | [clojure.tools.logging :as log]
19 | [cook.config :as config]))
20 |
21 | (defn log-event
22 | "Log event to cook-passport log file"
23 | [{:keys [event-type] :as log-data}]
24 | (when (:enabled? (config/passport))
25 | (log/log config/passport-logger-ns :info nil (json/write-str
26 | (assoc
27 | log-data
28 | :source :cook-scheduler
29 | :event-type (str "cook-scheduler/" (name event-type)))))))
30 |
31 | (def checkpoint-volume-mounts-key-selected :checkpoint-volume-mounts-key-selected)
32 | (def default-image-selected :default-image-selected)
33 | (def init-container-image-selected :init-container-image-selected)
34 | (def job-created :job-created)
35 | (def job-submitted :job-submitted)
36 | (def pod-completed :pod-completed)
37 | (def pod-submission-succeeded :pod-submission-succeeded)
38 | (def sidecar-image-selected :sidecar-image-selected)
39 | (def synthetic-pod-submission-succeeded :synthetic-pod-submission-succeeded)
40 | (def pod-submission-failed :pod-submission-failed)
41 | (def synthetic-pod-submission-failed :synthetic-pod-submission-failed)
42 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/adjustment.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.plugins.adjustment
17 | (:require [clojure.tools.logging :as log]
18 | [cook.config :as config]
19 | [cook.plugins.definitions :refer [JobAdjuster]]
20 | [cook.plugins.util]
21 | [mount.core :as mount]))
22 |
23 | (def no-op
24 | (reify JobAdjuster
25 | (adjust-job [_ job-map _] job-map)))
26 |
27 | (defn create-default-plugin-object
28 | "Returns the configured JobAdjuster, or a no-op if none is defined."
29 | [config]
30 | (let [factory-fn (get-in config [:settings :plugins :job-adjuster :factory-fn])]
31 | (if factory-fn
32 | (do
33 | (log/info "Creating job adjuster plugin with" factory-fn)
34 | (if-let [resolved-fn (cook.plugins.util/resolve-symbol (symbol factory-fn))]
35 | (resolved-fn config)
36 | (throw (ex-info (str "Unable to resolve factory fn " factory-fn) {}))))
37 | no-op)))
38 |
39 | (mount/defstate plugin
40 | :start (create-default-plugin-object config/config))
41 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/completion.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.plugins.completion
17 | (:require [clojure.tools.logging :as log]
18 | [cook.config :as config]
19 | [cook.plugins.definitions :refer [InstanceCompletionHandler]]
20 | [cook.plugins.util]
21 | [mount.core :as mount]))
22 |
23 | (def no-op
24 | (reify InstanceCompletionHandler
25 | (on-instance-completion [_ _ _])))
26 |
27 | (defn create-default-plugin-object
28 | "Returns the configured InstanceCompletionHandler, or a no-op if none is defined."
29 | [config]
30 | (let [factory-fn (get-in config [:settings :plugins :instance-completion :factory-fn])]
31 | (if factory-fn
32 | (do
33 | (log/info "Creating instance completion plugin with" factory-fn)
34 | (if-let [resolved-fn (cook.plugins.util/resolve-symbol (symbol factory-fn))]
35 | (resolved-fn config)
36 | (throw (ex-info (str "Unable to resolve factory fn " factory-fn)))))
37 | no-op)))
38 |
39 | (mount/defstate plugin
40 | :start (create-default-plugin-object config/config))
41 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/demo_plugin.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.plugins.demo-plugin
17 | (:require [clj-time.core :as t]
18 | [clojure.string :as str]
19 | [cook.plugins.definitions :as chd]))
20 |
21 | (def uuid-seen-counts (atom {}))
22 |
23 | (defn- generate-result
24 | [result message]
25 | {:status result :message message :cache-expires-at (-> 1 t/seconds t/from-now)})
26 |
27 | ; Demo validation plugin, designed to match with the integration tests.
28 | (defrecord DemoValidateSubmission []
29 | chd/JobSubmissionValidator
30 | (chd/check-job-submission
31 | [this {:keys [name] :as job-map} _]
32 | (if (and name (str/starts-with? name "plugin_test.submit_fail"))
33 | (generate-result :rejected "Message1- Fail to submit")
34 | (generate-result :accepted "Message2"))))
35 |
36 | (defrecord DemoValidateSubmission2 []
37 | chd/JobSubmissionValidator
38 | (chd/check-job-submission [this {:keys [name]} _]
39 | (if (and name (str/starts-with? name "plugin_test.submit_fail2"))
40 | (generate-result :rejected "Message5- Plugin2 failed")
41 | (generate-result :accepted "Message6"))))
42 |
43 | (defrecord DemoFilterLaunch []
44 | chd/JobLaunchFilter
45 | (chd/check-job-launch
46 | [this {:keys [:job/name :job/uuid] :as job-map}]
47 | (let [newdict (swap! uuid-seen-counts update-in [uuid] (fnil inc 0))
48 | seen (get newdict uuid)]
49 | (if (and name
50 | (str/starts-with? name "plugin_test.launch_defer")
51 | (<= seen 3))
52 | (generate-result :deferred "Message3")
53 | (generate-result :accepted "Message4")))))
54 |
55 | (defn launch-factory
56 | "Factory method for the launch-plugin to be used in config.edn"
57 | []
58 | (->DemoFilterLaunch))
59 |
60 | (defn submission-factory
61 | "Factory method for the submission plugin to be used in config.edn"
62 | []
63 | (->DemoValidateSubmission))
64 |
65 | (defn submission-factory2
66 | "Factory method for the second submission plugin to be used in config.edn"
67 | []
68 | (->DemoValidateSubmission2))
69 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/file.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.plugins.file
17 | (:require [clojure.tools.logging :as log]
18 | [cook.config :as config]
19 | [cook.plugins.definitions :refer [FileUrlGenerator]]
20 | [cook.plugins.util]
21 | [mount.core :as mount]))
22 |
23 | (defrecord NilFileUrlPlugin []
24 | FileUrlGenerator
25 | (file-url [this instance]
26 | nil))
27 |
28 | (defn create-plugin-object
29 | "Returns the configured FileUrlPlugin, or a NilFileUrlPlugin if none is defined."
30 | [config]
31 | (let [file-url (get-in config [:settings :plugins :file-url])
32 | factory-fn (:factory-fn file-url)]
33 | (if factory-fn
34 | (do
35 | (log/info "Creating file url plugin with" factory-fn)
36 | (if-let [resolved-fn (cook.plugins.util/resolve-symbol (symbol factory-fn))]
37 | (resolved-fn config)
38 | (throw (ex-info (str "Unable to resolve factory fn " factory-fn) {}))))
39 | (NilFileUrlPlugin.))))
40 |
41 | (mount/defstate plugin
42 | :start (create-plugin-object config/config))
43 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/job_submission_modifier.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 |
17 | (ns cook.plugins.job-submission-modifier
18 | (:require [clojure.tools.logging :as log]
19 | [cook.config :as config]
20 | [cook.plugins.definitions :refer [choose-pool-for-job modify-job JobRouter JobSubmissionModifier]]
21 | [cook.plugins.util]
22 | [mount.core :as mount]))
23 |
24 | (defn pool-name->effective-pool-name
25 | "Given a pool name and job from a submission returns the effective pool name"
26 | [pool-name-from-submission job]
27 | (if-let [job-router (config/job-routing-pool-name? pool-name-from-submission)]
28 | (choose-pool-for-job job-router job)
29 | (or pool-name-from-submission (config/default-pool))))
30 |
31 | (defrecord IdentityJobSubmissionModifier []
32 | JobSubmissionModifier
33 | ; The IdentityJobSubmissionModifier doesn't make any changes to what users submit except
34 | ; to add the calculated pool
35 | (modify-job [this job pool-name]
36 | (let [effective-pool-name (pool-name->effective-pool-name pool-name job)]
37 | (assoc job :pool effective-pool-name))))
38 |
39 | (defn create-plugin-object
40 | "Returns the configured JobSubmissionModifier, or a IdentityJobSubmissionModifier if none is defined."
41 | [config]
42 | (let [factory-fn (get-in config [:settings :plugins :job-submission-modifier :factory-fn])]
43 | (if factory-fn
44 | (do
45 | (log/info "Creating job submission modifier plugin with" factory-fn)
46 | (if-let [resolved-fn (cook.plugins.util/resolve-symbol (symbol factory-fn))]
47 | (resolved-fn config)
48 | (throw (ex-info (str "Unable to resolve factory fn " factory-fn) {}))))
49 | (IdentityJobSubmissionModifier.))))
50 |
51 | (mount/defstate plugin
52 | :start (create-plugin-object config/config))
53 |
54 | (defn apply-job-submission-modifier-plugins
55 | "Modify a user-submitted job before passing it further down the submission pipeline."
56 | [raw-job pool-name]
57 | (modify-job plugin raw-job pool-name))
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/pool.clj:
--------------------------------------------------------------------------------
1 | (ns cook.plugins.pool
2 | (:require [clojure.tools.logging :as log]
3 | [cook.config :as config]
4 | [cook.plugins.definitions :refer [PoolSelector]]
5 | [cook.plugins.util]
6 | [mount.core :as mount]))
7 |
8 | (defrecord AttributePoolSelector [attribute-name default-pool]
9 | PoolSelector
10 | (select-pool [this offer]
11 | (or (->> offer :attributes (filter #(= attribute-name (:name %))) first :text)
12 | default-pool)))
13 |
14 | (defn create-plugin-object
15 | "Returns the configured PoolSelector, or an AttributePoolSelector if none is defined."
16 | [config]
17 | (let [pool-selection (get-in config [:settings :plugins :pool-selection])
18 | factory-fn (:factory-fn pool-selection)]
19 | (if factory-fn
20 | (do
21 | (log/info "Creating pool selection plugin with" factory-fn)
22 | (if-let [resolved-fn (cook.plugins.util/resolve-symbol (symbol factory-fn))]
23 | (resolved-fn config)
24 | (throw (ex-info (str "Unable to resolve factory fn " factory-fn) {}))))
25 | (AttributePoolSelector. (:attribute-name pool-selection)
26 | (:default-pool pool-selection)))))
27 |
28 | (mount/defstate plugin
29 | :start (create-plugin-object config/config))
30 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/pool_mover.clj:
--------------------------------------------------------------------------------
1 | (ns cook.plugins.pool-mover
2 | (:require [clojure.tools.logging :as log]
3 | [cook.cached-queries :as cached-queries]
4 | [cook.config :as config]
5 | [cook.plugins.definitions :as chd]
6 | [cook.prometheus-metrics :as prom]
7 | [datomic.api :as d]
8 | [metrics.counters :as counters]))
9 |
10 | (counters/defcounter [cook-mesos plugins pool-mover jobs-migrated])
11 |
12 | (defrecord PoolMoverJobAdjuster [pool-mover-config]
13 | chd/JobAdjuster
14 | (adjust-job [_ {:keys [job/uuid job/pool] :as job-txn} db]
15 | (let [submission-pool (-> db (d/entity pool) :pool/name (or (config/default-pool)))]
16 | (if-let [{:keys [users destination-pool]} (get pool-mover-config submission-pool)]
17 | (let [user (cached-queries/job-ent->user job-txn)]
18 | (if-let [{:keys [portion]} (get users user)]
19 | (if (and (number? portion)
20 | (> (* portion 100) (-> uuid hash (mod 100))))
21 | (try
22 | (log/info "Moving job" uuid "(" user ") from" submission-pool "pool to"
23 | destination-pool "pool due to pool-mover configuration")
24 | (prom/inc prom/pool-mover-jobs-updated)
25 | (counters/inc! jobs-migrated)
26 | (assoc job-txn :job/pool (-> db (d/entity [:pool/name destination-pool]) :db/id))
27 | (catch Throwable t
28 | (log/error t "Error when moving pool to" destination-pool)
29 | job-txn))
30 | job-txn)
31 | job-txn))
32 | job-txn))))
33 |
34 | (defn make-pool-mover-job-adjuster
35 | [config]
36 | (let [pool-mover-config (get-in config [:settings :plugins :pool-mover])]
37 | (log/info "Configuring PoolMoverJobAdjuster" pool-mover-config)
38 | (->PoolMoverJobAdjuster pool-mover-config)))
39 |
--------------------------------------------------------------------------------
/scheduler/src/cook/plugins/util.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.plugins.util
17 | (:require [clj-time.core :as t]))
18 |
19 | (defn resolve-symbol
20 | "Resolve the given symbol to the corresponding Var."
21 | [sym]
22 | (resolve (some-> sym namespace symbol require) sym))
23 |
24 | (def positive-infinity-date
25 | (t/date-time 2999 12 31))
26 |
--------------------------------------------------------------------------------
/scheduler/src/cook/rate_limit.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.rate-limit
17 | (:require [clojure.tools.logging :as log]
18 | [cook.config :refer [config]]
19 | [cook.rate-limit.generic :as rtg]
20 | [mount.core :as mount]))
21 |
22 | ; Import from cook.rate-limit.generic some relevant functions.
23 | (def spend! rtg/spend!)
24 | (def time-until-out-of-debt-millis! rtg/time-until-out-of-debt-millis!)
25 | (def get-token-count! rtg/get-token-count!)
26 | (def enforce? rtg/enforce?)
27 | (def flush! rtg/flush!)
28 | (def AllowAllRateLimiter rtg/AllowAllRateLimiter)
29 |
30 | (defn create-job-submission-rate-limiter
31 | "From the configuration map, extract the keys that setup the job-submission rate limiter and return
32 | the constructed object. If the configuration map is not found, the AllowAllRateLimiter is returned."
33 | [config]
34 | (let [{:keys [settings]} config
35 | {:keys [rate-limit]} settings
36 | {:keys [expire-minutes job-submission]} rate-limit]
37 | (if (seq job-submission)
38 | (rtg/make-tbf-rate-limiter (assoc job-submission :expire-minutes expire-minutes))
39 | AllowAllRateLimiter)))
40 |
41 | (mount/defstate job-submission-rate-limiter
42 | :start (create-job-submission-rate-limiter config))
43 |
44 | (defn create-compute-cluster-launch-rate-limiter
45 | "From the configuration map, extract the keys that setup the job-launch rate limiter and return
46 | the constructed object. If the configuration map is not found, the AllowAllRateLimiter is returned."
47 | [compute-cluster-name compute-cluster-launch-rate-limits]
48 | (if (seq compute-cluster-launch-rate-limits)
49 | (do
50 | (log/info "For compute cluster" compute-cluster-name "configuring global rate limit config" compute-cluster-launch-rate-limits)
51 | (rtg/make-tbf-rate-limiter compute-cluster-launch-rate-limits))
52 | (do
53 | (log/info "For compute cluster" compute-cluster-name "not configuring global rate limit because no configuration set")
54 | AllowAllRateLimiter)))
55 |
56 | (def compute-cluster-launch-rate-limiter-key "*DEF*")
57 |
58 |
--------------------------------------------------------------------------------
/scheduler/src/cook/regexp_tools.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.regexp-tools
17 | (:require [clojure.tools.logging :as log]))
18 |
19 | (defn match-based-on-regexp
20 | "Given a list of dictionaries [{: : } {: : } ...], match-list,
21 | a key and name, return the first matching where the matches the key."
22 | [regexp-name field-name match-list key]
23 | (try
24 | (-> match-list
25 | (->> (filter (fn [map]
26 | (let [regexp (get map regexp-name)
27 | pattern (re-pattern regexp)]
28 | (re-find pattern key)))))
29 | first
30 | (get field-name))
31 | (catch Exception e
32 | (throw (ex-info "Failed matching key" {:regexp-name regexp-name :field-name field-name :match-list match-list :key key} e)))))
33 |
34 | (defn match-based-on-pool-name
35 | "Given a list of dictionaries [{:pool-regexp .. :field ...} {:pool-regexp .. :field ...}
36 | a pool name and a name, return the first matching where the regexp matches the pool name."
37 | [match-list effective-pool-name field & {:keys [default-value] :or {default-value nil}}]
38 | (let [value (match-based-on-regexp
39 | :pool-regex
40 | field
41 | match-list
42 | effective-pool-name)]
43 | (if (some? value)
44 | value
45 | default-value)))
46 |
--------------------------------------------------------------------------------
/scheduler/src/cook/reporter.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.reporter
17 | (:require [clojure.tools.logging :as log]
18 | [datomic.api :refer [q]]
19 | [metatransaction.core :refer [db]]
20 | [metrics.core :as metrics])
21 | (:import (com.codahale.metrics ConsoleReporter MetricFilter)
22 | (com.codahale.metrics.graphite Graphite GraphiteReporter PickledGraphite)
23 | (java.net InetSocketAddress)
24 | (java.util.concurrent TimeUnit)))
25 |
26 | ;; the default registry
27 | (def registry metrics/default-registry)
28 |
29 | (defn jmx-reporter
30 | []
31 | (.. (com.codahale.metrics.jmx.JmxReporter/forRegistry metrics/default-registry)
32 | (build)
33 | (start)))
34 |
35 | (defn graphite-reporter
36 | [{:keys [prefix host port pickled?]}]
37 | (log/info "Starting graphite reporter")
38 | (let [addr (InetSocketAddress. host port)
39 | graphite (if pickled?
40 | (PickledGraphite. addr)
41 | (Graphite. addr))]
42 | (doto (.. (GraphiteReporter/forRegistry metrics/default-registry)
43 | (prefixedWith prefix)
44 | (filter MetricFilter/ALL)
45 | (convertRatesTo TimeUnit/SECONDS)
46 | (convertDurationsTo TimeUnit/MILLISECONDS)
47 | (build graphite))
48 | (.start 30 TimeUnit/SECONDS))))
49 |
50 | (defn console-reporter
51 | "Creates and starts a ConsoleReporter for metrics"
52 | []
53 | (doto (.. (ConsoleReporter/forRegistry metrics/default-registry)
54 | (convertRatesTo TimeUnit/SECONDS)
55 | (convertDurationsTo TimeUnit/MILLISECONDS)
56 | (build))
57 | (.start 30 TimeUnit/SECONDS)))
58 |
--------------------------------------------------------------------------------
/scheduler/src/cook/rest/cors.clj:
--------------------------------------------------------------------------------
1 | (ns cook.rest.cors)
2 |
3 | (defn preflight?
4 | [{:keys [request-method]}]
5 | (= :options request-method))
6 |
7 | (defn same-origin?
8 | "Returns true if the request is from the same origin as the provided origin header"
9 | [{:keys [headers scheme]}]
10 | (let [{:strs [host origin x-forwarded-proto]} headers
11 | forwarded-or-scheme (or x-forwarded-proto
12 | (when scheme (name scheme)))]
13 | (when (and host origin forwarded-or-scheme)
14 | (= origin (str forwarded-or-scheme "://" host)))))
15 |
16 | (defn request-allowed?
17 | "Returns true if the request is either from the same origin or matches a pattern in cors-origins.
18 | The request should have a non-nil origin header."
19 | [req cors-origins]
20 | (or (same-origin? req)
21 | (let [origin (get-in req [:headers "origin"])]
22 | (some #(re-matches % origin) cors-origins))))
23 |
24 | (defn wrap-preflight
25 | "Middleware for supporting CORS preflight requests"
26 | [handler cors-origins]
27 | (fn preflight-handler [{:keys [headers] :as req}]
28 | (let [{:strs [origin]} headers]
29 | (if (and (preflight? req) origin)
30 | (if (request-allowed? req cors-origins)
31 | (let [{:strs [access-control-request-headers]} headers]
32 | {:status 200
33 | :headers {"Access-Control-Allow-Credentials" "true"
34 | "Access-Control-Allow-Headers" access-control-request-headers
35 | "Access-Control-Allow-Methods" "PUT, GET, OPTIONS, DELETE"
36 | "Access-Control-Allow-Origin" origin
37 | "Access-Control-Max-Age" "86400"}}) ; 1 day
38 | {:status 403
39 | :body (str "Origin " origin " not allowed")})
40 | (handler req)))))
41 |
42 | (defn wrap-cors
43 | "Middleware for supporting CORS requests"
44 | [handler cors-origins]
45 | (fn cors-handler [{:keys [headers] :as req}]
46 | (let [{:strs [origin]} headers]
47 | (if origin
48 | (if (request-allowed? req cors-origins)
49 | (let [resp (handler req)]
50 | (update-in resp [:headers] assoc
51 | "Access-Control-Allow-Credentials" "true"
52 | "Access-Control-Allow-Origin" origin))
53 | {:status 403
54 | :body (str "Cross origin request denied from " origin)})
55 | (handler req))))) ; If no origin is provided, pass the request through.
56 |
57 | (defn cors-middleware
58 | "Wraps the provided handler with wrap-cors and wrap-preflight in the correct order"
59 | [handler cors-origins]
60 | (-> handler
61 | (wrap-cors cors-origins)
62 | (wrap-preflight cors-origins)))
63 |
--------------------------------------------------------------------------------
/scheduler/src/cook/rest/secret.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.rest.secret
17 | "This namespace is for development. It uses a fake secret."
18 | (:require [ring.middleware.params]))
19 |
20 | (defn wrap-terribly-insecure-auth
21 | [handler]
22 | (fn [{{user "user"} :params :as req}]
23 | (handler (assoc req :authorization/user user))))
24 |
25 | (defn authorization-middleware
26 | [auth]
27 | (-> auth
28 | (wrap-terribly-insecure-auth)))
29 |
--------------------------------------------------------------------------------
/scheduler/src/fork/metrics_clojure/LICENSE.markdown:
--------------------------------------------------------------------------------
1 | MIT/X11 License
2 | ===============
3 |
4 | Copyright (c) 2011-2017 Steve Losh and contributors
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | this software and associated documentation files (the "Software"), to deal in
8 | the Software without restriction, including without limitation the rights to
9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | the Software, and to permit persons to whom the Software is furnished to do so,
11 | subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/scheduler/src/fork/metrics_clojure/README.txt:
--------------------------------------------------------------------------------
1 | This code is copied from https://github.com/metrics-clojure/metrics-clojure
2 | and git hash a1dbacc748a1f8165f0094e2229c84f228efe29b
3 |
4 | We need the unreleased 3.0.0x branch for support for >JDK8. We modified the file's package name.
5 |
--------------------------------------------------------------------------------
/scheduler/src/fork/metrics_clojure/metrics/jvm/core.clj:
--------------------------------------------------------------------------------
1 | (ns fork.metrics-clojure.metrics.jvm.core
2 | (:import (com.codahale.metrics MetricRegistry)
3 | (com.codahale.metrics.jvm ThreadStatesGaugeSet GarbageCollectorMetricSet FileDescriptorRatioGauge
4 | MemoryUsageGaugeSet JvmAttributeGaugeSet))
5 | (:require [metrics.core :refer [add-metric default-registry]]))
6 |
7 | (defn register-jvm-attribute-gauge-set
8 | ([^MetricRegistry reg]
9 | (register-jvm-attribute-gauge-set reg ["jvm" "attribute"]))
10 | ([^MetricRegistry reg title]
11 | (add-metric reg title (new JvmAttributeGaugeSet))))
12 |
13 | (defn register-memory-usage-gauge-set
14 | ([^MetricRegistry reg]
15 | (register-memory-usage-gauge-set reg ["jvm" "memory"]))
16 | ([^MetricRegistry reg title]
17 | (add-metric reg title (new MemoryUsageGaugeSet))))
18 |
19 | (defn register-file-descriptor-ratio-gauge-set
20 | ([^MetricRegistry reg]
21 | (register-file-descriptor-ratio-gauge-set reg ["jvm" "file"]))
22 | ([^MetricRegistry reg title]
23 | (add-metric reg title (new FileDescriptorRatioGauge))))
24 |
25 | (defn register-garbage-collector-metric-set
26 | ([^MetricRegistry reg]
27 | (register-garbage-collector-metric-set reg ["jvm" "gc"]))
28 | ([^MetricRegistry reg title]
29 | (add-metric reg title (new GarbageCollectorMetricSet))))
30 |
31 | (defn register-thread-state-gauge-set
32 | ([^MetricRegistry reg]
33 | (register-thread-state-gauge-set reg ["jvm" "thread"]))
34 | ([^MetricRegistry reg title]
35 | (add-metric reg title (new ThreadStatesGaugeSet))))
36 |
37 | (defn instrument-jvm
38 | ([]
39 | (instrument-jvm default-registry))
40 | ([^MetricRegistry reg]
41 | (doseq [register-metric-set [register-jvm-attribute-gauge-set
42 | register-memory-usage-gauge-set
43 | register-file-descriptor-ratio-gauge-set
44 | register-garbage-collector-metric-set
45 | register-thread-state-gauge-set]]
46 | (register-metric-set reg))))
47 |
--------------------------------------------------------------------------------
/scheduler/test-resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, A1
2 | log4j.appender.A1=org.apache.log4j.RollingFileAppender
3 | log4j.appender.A1.File=test-log/app.log
4 | log4j.appender.A1.MaxFileSize=500MB
5 | log4j.appender.A1.MaxBackupIndex=2
6 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
7 | log4j.appender.A1.layout.ConversionPattern=%d [%t] %-5p%c - %m%n
--------------------------------------------------------------------------------
/scheduler/test/cook/test/components.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.test.components
17 | (:require [clojure.test :refer :all]
18 | [cook.components :as components]))
19 |
20 | (deftest test-health-check-middleware
21 | (let [handler (fn [_] "Called handler!")
22 | debug-request {:uri "/debug"
23 | :request-method :get}]
24 |
25 | (testing "always returns 200 if leader-reports-unhealthy is false"
26 | (let [leadership-atom (atom false)
27 | middleware (components/health-check-middleware handler
28 | leadership-atom
29 | false)]
30 | (is (= 200 (:status (middleware debug-request))))
31 | (swap! leadership-atom (constantly true))
32 | (is (= 200 (:status (middleware debug-request))))))
33 |
34 | (testing "returns 503 when leader"
35 | (let [leadership-atom (atom false)
36 | middleware (components/health-check-middleware handler
37 | leadership-atom
38 | true)]
39 | (is (= 200 (:status (middleware debug-request))))
40 | (swap! leadership-atom (constantly true))
41 | (is (= 503 (:status (middleware debug-request))))))
42 |
43 | (testing "passes other requests to handler"
44 | (let [leadership-atom (atom false)
45 | middleware (components/health-check-middleware handler
46 | leadership-atom
47 | true)]
48 | (is (= "Called handler!" (middleware {:uri "/real-request"})))
49 | (swap! leadership-atom (constantly true))
50 | (is (= "Called handler!" (middleware {:uri "/real-request"})))))))
51 |
52 |
--------------------------------------------------------------------------------
/scheduler/test/cook/test/log_structured.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.test.log_structured
17 | (:require [clojure.test :refer :all]
18 | [cook.log-structured :as log-structured])
19 | (:import (java.util UUID)))
20 |
21 | (deftest test-level-disabled
22 | "Tests that functions are not evaluated at a disabled log level."
23 | (let [fn-was-called (atom false)
24 | shouldnt-be-called (fn [] (reset! fn-was-called true))]
25 | (log-structured/debug (str (shouldnt-be-called)) {:test (shouldnt-be-called)})
26 | (is (= @fn-was-called false))))
27 |
28 | (deftest test-level-enabled
29 | "Tests that functions are evaluated as expected at an enabled log level."
30 | (let [fn-was-called (atom false)
31 | should-be-called (fn [] (reset! fn-was-called true))]
32 | (log-structured/info (str (should-be-called)) {:test (should-be-called)})
33 | (is (= @fn-was-called true))))
34 |
35 | (deftest test-not-json-compatible
36 | "Tests that passing a value that cannot be converted to json works as expected."
37 | (log-structured/info "some message" {:uuid (UUID/randomUUID)}))
38 |
--------------------------------------------------------------------------------
/scheduler/test/cook/test/mesos/reason.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.test.mesos.reason
17 | (:use clojure.test)
18 | (:require [cook.mesos.reason :as r]
19 | [cook.test.postgres]
20 | [cook.test.testutil :refer (restore-fresh-database!)]
21 | [datomic.api :as d :refer (q db)]))
22 |
23 | (use-fixtures :once cook.test.postgres/with-pg-db)
24 |
25 | (deftest reasons-api
26 | (let [conn (restore-fresh-database! "datomic:mem://mesos-api-test")
27 | default-limit 10
28 | ;; set default failure limit.
29 | _ @(d/transact conn [{:db/id :scheduler/config
30 | :scheduler.config/mea-culpa-failure-limit
31 | default-limit}])
32 | db (d/db conn)]
33 |
34 | (testing "all-known-failure-reasons"
35 | (let [reasons (r/all-known-reasons db)]
36 | (doseq [reason reasons]
37 | ;; testing in any more detail is overkill; it would amount
38 | ;; to proving that the schema is the schema
39 | (is (instance? datomic.Entity reason)))))
40 |
41 | (testing "default-failure-limit"
42 | (is (= default-limit (r/default-failure-limit db))))))
43 |
--------------------------------------------------------------------------------
/scheduler/test/cook/test/plugins/job_submission_modifier.clj:
--------------------------------------------------------------------------------
1 | (ns cook.test.plugins.job-submission-modifier
2 | (:require [clojure.test :refer :all]
3 | [cook.plugins.definitions :as plugins]
4 | [cook.plugins.job-submission-modifier :as job-mod]))
5 |
6 | (deftest test-identity-add-pool
7 | (let [mod-plugin (job-mod/->IdentityJobSubmissionModifier)
8 | job {}
9 | pool-name "my-pool"]
10 | (is (= "my-pool" (get (plugins/modify-job mod-plugin job pool-name) :pool) ))))
11 |
12 | (defrecord TestJobModifier []
13 | plugins/JobSubmissionModifier
14 | (modify-job [this job pool-name]
15 | (throw (IllegalArgumentException. "TestJobModifier always throws"))))
16 |
17 | (deftest test-raise-exception
18 | ; On its own, this test has little value. We are more interested in the overall
19 | ; behavior when a real job is submitted and the plugin raises an exception.
20 | (let [mod-plugin (TestJobModifier.)]
21 | (is (thrown? IllegalArgumentException (plugins/modify-job mod-plugin nil nil)))))
--------------------------------------------------------------------------------
/scheduler/test/cook/test/plugins/pool.clj:
--------------------------------------------------------------------------------
1 | (ns cook.test.plugins.pool
2 | (:require [clojure.test :refer :all]
3 | [cook.plugins.definitions :as plugins]
4 | [cook.plugins.pool :as pool]))
5 |
6 | (deftest test-attribute-pool-selector
7 | (let [selector (pool/->AttributePoolSelector "test-attribute" "my-pool")]
8 | (is (= "my-pool" (plugins/select-pool selector {})))
9 | (is (= "my-pool" (plugins/select-pool selector {:attributes [{:name "cook-pool"
10 | :text "a-pool"}]})))
11 | (is (= "a-pool" (plugins/select-pool selector {:attributes [{:name "test-attribute"
12 | :text "a-pool"}]})))
13 | (is (= "b-pool" (plugins/select-pool selector {:attributes [{:name "test-attribute"
14 | :text "b-pool"}]})))))
15 |
--------------------------------------------------------------------------------
/scheduler/test/cook/test/pool.clj:
--------------------------------------------------------------------------------
1 | (ns cook.test.pool
2 | (:require [clojure.test :refer :all]
3 | [cook.config :as config]
4 | [cook.pool :as pool]
5 | [cook.test.postgres])
6 | (:import (clojure.lang ExceptionInfo)))
7 |
8 | (use-fixtures :once cook.test.postgres/with-pg-db)
9 |
10 | (deftest test-guard-invalid-default-pool
11 | (with-redefs [pool/all-pools (constantly [{:pool/name "foo"}])
12 | config/default-pool (constantly "foo")]
13 | (is (nil? (pool/guard-invalid-default-pool nil))))
14 | (with-redefs [pool/all-pools (constantly [])
15 | config/default-pool (constantly nil)]
16 | (is (nil? (pool/guard-invalid-default-pool nil))))
17 | (with-redefs [pool/all-pools (constantly [{}])
18 | config/default-pool (constantly nil)]
19 | (is (thrown-with-msg? ExceptionInfo
20 | #"There are pools in the database, but no default pool is configured"
21 | (pool/guard-invalid-default-pool nil))))
22 | (with-redefs [pool/all-pools (constantly [])
23 | config/default-pool (constantly "foo")]
24 | (is (thrown-with-msg? ExceptionInfo
25 | #"There is no pool in the database matching the configured default pool"
26 | (pool/guard-invalid-default-pool nil))))
27 | (with-redefs [pool/all-pools (constantly [{:pool/name "bar"}])
28 | config/default-pool (constantly "foo")]
29 | (is (thrown-with-msg? ExceptionInfo
30 | #"There is no pool in the database matching the configured default pool"
31 | (pool/guard-invalid-default-pool nil)))))
--------------------------------------------------------------------------------
/scheduler/test/cook/test/rest/basic_auth.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.test.rest.basic-auth
17 | (:require [clojure.test :refer :all]
18 | [cook.rest.basic-auth :as basic-auth]))
19 |
20 | (deftest make-user-password-valid?-test
21 | (testing "none"
22 | (let [user-password-valid? (basic-auth/make-user-password-valid? :none true)]
23 | (is (= true (user-password-valid?)))
24 | (is (= true (user-password-valid? "lol")))
25 | (is (= true (user-password-valid? "lol" :banana)))))
26 | (testing "config-file"
27 | (let [user-password-valid? (basic-auth/make-user-password-valid? :config-file
28 | {:valid-logins #{["abc" "123"]
29 | ["wyegelwe" "lol"]}})]
30 | (is (= true (user-password-valid? "abc" "123")))
31 | (is (= true (user-password-valid? "wyegelwe" "lol")))
32 | (is (= false (user-password-valid? "anything" "else"))))))
33 |
--------------------------------------------------------------------------------
/scheduler/test/cook/test/scheduler/optimizer.clj:
--------------------------------------------------------------------------------
1 | (ns cook.test.scheduler.optimizer
2 | (:require [clojure.test :refer :all]
3 | [cook.scheduler.optimizer :as optimizer]))
4 |
5 | ;; Tests to make sure data flows and validates properly
6 | (deftest test-optimizer-cycle
7 | (let [host {:count 1
8 | :instance-type "small"
9 | :cpus 1
10 | :mem 1000}
11 | host-feed (reify optimizer/HostFeed
12 | (get-available-host-info [this]
13 | [host]))
14 | optimizer (reify optimizer/Optimizer
15 | (produce-schedule [this queue running available [host-info & host-infos]]
16 | {0 {:suggested-matches {host-info (map :job/uuid queue)}}}))
17 | queue [{:job/uuid (java.util.UUID/randomUUID)} {:job/uuid (java.util.UUID/randomUUID)}]
18 | schedule (optimizer/optimizer-cycle! (fn get-queue [] queue)
19 | (fn get-running [] [])
20 | (fn get-offers [] [])
21 | host-feed
22 | optimizer)]
23 | (is (= (count schedule) 1))
24 | (is (= (first (keys (get-in schedule [0 :suggested-matches])))
25 | host))))
26 |
--------------------------------------------------------------------------------
/scheduler/test/cook/test/util.clj:
--------------------------------------------------------------------------------
1 | ;;
2 | ;; Copyright (c) Two Sigma Open Source, LLC
3 | ;;
4 | ;; Licensed under the Apache License, Version 2.0 (the "License");
5 | ;; you may not use this file except in compliance with the License.
6 | ;; You may obtain a copy of the License at
7 | ;;
8 | ;; http://www.apache.org/licenses/LICENSE-2.0
9 | ;;
10 | ;; Unless required by applicable law or agreed to in writing, software
11 | ;; distributed under the License is distributed on an "AS IS" BASIS,
12 | ;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | ;; See the License for the specific language governing permissions and
14 | ;; limitations under the License.
15 | ;;
16 | (ns cook.test.util
17 | (:require [clojure.test :refer :all]
18 | [cook.util :refer :all])
19 | (:import (java.util UUID)))
20 |
21 |
22 | (deftest test-diff-map-keys
23 | (is (= [#{:b} #{:c} #{:a :d}]
24 | (diff-map-keys {:a {:a :a}
25 | :b {:b :b}
26 | :d {:d :d}}
27 | {:a {:a :a}
28 | :c {:c :c}
29 | :d {:d :e}})))
30 | (is (= [nil #{:c} #{:a :d}]
31 | (diff-map-keys {:a {:a :a}
32 | :d {:d :d}}
33 | {:a {:a :a}
34 | :c {:c :c}
35 | :d {:d :e}})))
36 | (is (= [#{:b} nil #{:a :d}]
37 | (diff-map-keys {:a {:a :a}
38 | :b {:b :b}
39 | :d {:d :d}}
40 | {:a {:a :a}
41 | :d {:d :e}}))))
42 |
43 | (deftest test-deep-merge-with
44 | (is (= {:a {:b {:z 3, :c 3, :d {:z 9, :x 1, :y 2}}, :e 103}, :f 4}
45 | (deep-merge-with +
46 | {:a {:b {:c 1 :d {:x 1 :y 2}} :e 3} :f 4}
47 | {:a {:b {:c 2 :d {:z 9} :z 3} :e 100}})))
48 | (is (= {"foo" 2}
49 | (deep-merge-with - {"foo" 3} {"foo" 1})))
50 | (is (thrown? NullPointerException
51 | (deep-merge-with - {"foo" nil} {"foo" 1})))
52 | (is (thrown? NullPointerException
53 | (deep-merge-with - {"foo" 1} {"foo" nil}))))
54 |
55 |
56 | (deftest test-set-atom!
57 | (let [state (atom {})]
58 | (is (= @state {}))
59 | (is (= (set-atom! state "a") {}))
60 | (is (= (set-atom! state {:a :b}) "a"))
61 | (is (= @state {:a :b}))))
62 |
63 | (deftest test-format-map-for-structured-logging
64 | "Tests that the format-map-for-structured logging preserves nested maps."
65 | (let [uuid (UUID/randomUUID)
66 | map {:integer 2 :float 1.2 :string "foo" :uuid uuid :nested-map {:nested-string "bar" :nested-int 3}}
67 | formatted-map (format-map-for-structured-logging map)]
68 | (is (= {:integer 2 :float 1.2 :string "foo" :uuid (str uuid) :nested-map {:nested-string "bar" :nested-int 3}} formatted-map))))
69 |
--------------------------------------------------------------------------------
/scheduler/travis/setup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 |
5 | # Install the current version of the jobclient
6 | pushd ${GITHUB_WORKSPACE}/jobclient/java
7 | mvn install
8 | popd
9 |
10 | # Install lein dependencies
11 | lein with-profiles +test deps
12 |
13 |
--------------------------------------------------------------------------------
/sidecar/.dockerignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | *.egg-info
4 | *.egg/
5 | *.pyc
6 | *.swp
7 | __pycache__
8 |
--------------------------------------------------------------------------------
/sidecar/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.egg-info
3 | dist/
4 | build/
5 |
--------------------------------------------------------------------------------
/sidecar/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7-alpine
2 |
3 | COPY cook /app/cook
4 | COPY Dockerfile /app
5 | COPY README.md /app
6 | COPY setup.py /app
7 | WORKDIR /app
8 | RUN pip3 install -e .
9 | CMD ["cook-sidecar", "--file-server-port", "8080"]
10 |
--------------------------------------------------------------------------------
/sidecar/README.md:
--------------------------------------------------------------------------------
1 | A python file server that replicates part of the Mesos `files` endpoint API for backwards compatibility.
2 |
3 | See http://mesos.apache.org/documentation/latest/endpoints/files/download/
4 | http://mesos.apache.org/documentation/latest/endpoints/files/read/ and
5 | http://mesos.apache.org/documentation/latest/endpoints/files/browse/
6 |
7 | ## Building
8 |
9 | pip install dependencies:
10 |
11 | ```bash
12 | $ pip3 install -e .
13 | ```
14 |
15 | ## Running
16 |
17 | Usage:
18 |
19 | The `COOK_WORKDIR` environment variable must be set. Only files with `COOK_WORKDIR` as the root will be served.
20 |
21 | ```
22 | cook-sidecar --file-server-port PORT
23 | ```
24 |
25 | Run `cook-sidecar --help` for full usage documentation.
26 |
27 | Examples:
28 |
29 | ```bash
30 | $ cook-sidecar --file-server-port 8000
31 | ```
32 |
--------------------------------------------------------------------------------
/sidecar/cook/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/sidecar/cook/__init__.py
--------------------------------------------------------------------------------
/sidecar/cook/sidecar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twosigma/Cook/e43416aea1ff47b667101d275464ba45541f982e/sidecar/cook/sidecar/__init__.py
--------------------------------------------------------------------------------
/sidecar/cook/sidecar/exit_sentinel.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2020 Two Sigma Open Source, LLC
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to
7 | # deal in the Software without restriction, including without limitation the
8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9 | # sell copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 | # IN THE SOFTWARE.
22 | #
23 | """Cook sidecar exit sentinel file watcher thread logic."""
24 |
25 | import logging
26 | import os
27 | import signal
28 | import threading
29 | import time
30 |
31 | def watch_for_file(sentinel_file_path, started_event):
32 | def daemon_routine():
33 | # wait for other components to finish starting
34 | logging.info(f'Waiting for all components to start...')
35 | started_event.wait()
36 | # wait for sentinel file to appear
37 | logging.info(f'Watching for sentinel file: {sentinel_file_path}')
38 | while not os.path.exists(sentinel_file_path):
39 | time.sleep(0.1)
40 | # trigger this process's termination handler
41 | logging.info(f'Sidecar termination triggered by sentinel file: {sentinel_file_path}')
42 | os.kill(os.getpid(), signal.SIGTERM)
43 | threading.Thread(target=daemon_routine, args=(), daemon=True).start()
44 |
--------------------------------------------------------------------------------
/sidecar/cook/sidecar/util.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2020 Two Sigma Open Source, LLC
3 | #
4 | # Permission is hereby granted, free of charge, to any person obtaining a copy
5 | # of this software and associated documentation files (the "Software"), to
6 | # deal in the Software without restriction, including without limitation the
7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 | # sell copies of the Software, and to permit persons to whom the Software is
9 | # furnished to do so, subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be included in
12 | # all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 | # IN THE SOFTWARE.
21 | #
22 |
23 | import logging
24 | import os
25 | import sys
26 |
27 | def init_logging():
28 | log_level = os.environ.get('EXECUTOR_LOG_LEVEL', 'INFO')
29 | logging.basicConfig(level = log_level,
30 | stream = sys.stderr,
31 | format='%(asctime)s %(levelname)s %(message)s')
32 |
--------------------------------------------------------------------------------
/sidecar/cook/sidecar/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '1.2.3'
2 |
--------------------------------------------------------------------------------
/sidecar/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from setuptools import setup
4 |
5 | from cook.sidecar import version
6 |
7 | requirements = [
8 | 'flask~=1.1.0',
9 | 'gunicorn~=20.1.0',
10 | 'requests~=2.27.0',
11 | ]
12 |
13 | test_requirements = [
14 | ]
15 |
16 | setup(
17 | name='cook_sidecar',
18 | version=version.VERSION,
19 | description="Two Sigma's Cook Sidecar",
20 | long_description="The Cook Sidecar provides sandbox file access and progress reporting.",
21 | packages=['cook.sidecar'],
22 | entry_points={'console_scripts': ['cook-sidecar = cook.sidecar.__main__:main']},
23 | install_requires=requirements,
24 | tests_require=test_requirements
25 | )
26 |
--------------------------------------------------------------------------------
/simulator/config/settings.edn:
--------------------------------------------------------------------------------
1 | {:sim-db-uri "datomic:free://localhost:4334/cook-sim"
2 | :cook-db-uri "datomic:free://localhost:4334/cook-jobs"
3 | :cook-api-uri "http://localhost:12321"
4 | :process-count 10
5 | :sim-model {:label "Ten second test"
6 | :duration-seconds 10
7 | :user-profiles
8 | [{:description "Profile 1"
9 | :usernames ["testuser1" "testuser2"]
10 | :docker-tendency 0.5
11 | :group-tendency 0.5
12 | :group-size {:mean 3
13 | :std-dev 1
14 | :floor 2
15 | :ceiling 6}
16 | :seconds-between-jobs {:mean 3
17 | :std-dev 1
18 | :floor 1
19 | :ceiling 120}
20 | :job-duration {:mean 10
21 | :std-dev 2
22 | :floor 1
23 | :ceiling 120}
24 | :job-memory {:mean 512
25 | :std-dev 200
26 | :floor 1
27 | :ceiling 2048}
28 | :job-cpu {:mean 2.0
29 | :std-dev 1.0
30 | :floor 1.0
31 | :ceiling 4.0}}]}}
32 |
--------------------------------------------------------------------------------
/simulator/project.clj:
--------------------------------------------------------------------------------
1 | (defproject cook/sim "0.1.0-SNAPSHOT"
2 | :description "Simulation tests for Cook"
3 | :dependencies [[org.clojure/clojure "1.8.0"]
4 | [clj-time "0.9.0"]
5 | [cheshire "5.5.0"]
6 | [com.datomic/datomic-free "0.9.5344"
7 | :exclusions [org.clojure/clojure joda-time]]
8 | [com.datomic/simulant "0.1.8"]
9 | [org.clojure/math.numeric-tower "0.0.4"]
10 | [com.stuartsierra/component "0.3.1"]
11 | [org.clojure/data.generators "0.1.2"]
12 | [org.clojure/tools.cli "0.3.3"]
13 | [org.clojure/algo.generic "0.1.2"]
14 | ;; [reloaded.repl "0.2.1"]
15 | [clj-http "2.0.1"]
16 | [prismatic/schema "1.1.3"]
17 | [robert/bruce "0.8.0"]
18 | [incanter "1.5.7"]]
19 | :resource-paths ["resources"]
20 | :main cook.sim.cli
21 | :source-paths ["src/main"]
22 | :profiles {:dev {:source-paths ["src/dev"]
23 | :repl-options {:init-ns cook.sim.repl}
24 | :dependencies [[reloaded.repl "0.2.1"]]}})
25 |
--------------------------------------------------------------------------------
/simulator/src/main/cook/sim/database.clj:
--------------------------------------------------------------------------------
1 | (ns cook.sim.database
2 | (:require [clojure.java.io :as io]
3 | [datomic.api :as d]))
4 |
5 | (defn recreate-database!
6 | "Given a Datomic database uri, deletes any existing database at the database,
7 | and creates a new one."
8 | [uri]
9 | (d/delete-database uri)
10 | (d/create-database uri))
11 |
12 | (defn load-schema
13 | "Given a Datomic database connection and an IO resource location (e.g. filename),
14 | transacts the contents of the file."
15 | [conn resource]
16 | (let [m (-> resource io/resource slurp read-string)]
17 | (doseq [v (vals m)
18 | tx v]
19 | @(d/transact conn tx))))
20 |
21 | (defn setup-database!
22 | "Given the settings, loads both the Simulant schema and extensions to support
23 | Cook Simulator specifically into the database at :sim-db-uri"
24 | [settings]
25 | (prn "setting up the schema...")
26 | (-> settings :sim-db-uri recreate-database!)
27 | (let [conn (-> settings :sim-db-uri d/connect)]
28 | (load-schema conn "simulant/schema.edn")
29 | (load-schema conn "job_schedule.edn")))
30 |
--------------------------------------------------------------------------------
/simulator/src/main/cook/sim/system.clj:
--------------------------------------------------------------------------------
1 | (ns cook.sim.system
2 | (:require [clojure.edn :as edn]
3 | [com.stuartsierra.component :as component]
4 | [datomic.api :as d]
5 | ))
6 |
7 | (defrecord Config [path settings]
8 | component/Lifecycle
9 |
10 | (start [component]
11 | (println "Loading settings from " path)
12 | (assoc component :settings (-> path slurp edn/read-string)))
13 |
14 | (stop [component]
15 | (assoc component :settings nil)))
16 |
17 | (defn new-config
18 | "Base system component; encapsulates application configuration. Config
19 | is loaded from an edn file."
20 | [path]
21 | (map->Config {:path path}))
22 |
23 |
24 | (defrecord SimDb [config conn]
25 | component/Lifecycle
26 |
27 | (start [component]
28 | (println "Connecting to simulation database...")
29 | (assoc component :conn (-> config :settings :sim-db-uri d/connect)))
30 |
31 | (stop [component]
32 | (assoc component :conn nil)))
33 |
34 | (defn new-sim-db
35 | "SimDb is a Datomic database that stores everything the Simulator wants to remember
36 | about simulations - workload descriptors, the users therein, the jobs those users
37 | will request during a simulation, etc."
38 | ([] (map->SimDb {}))
39 | ([config] (map->SimDb {:config config})))
40 |
41 |
42 | (defrecord CookDb [config conn]
43 | component/Lifecycle
44 |
45 | (start [component]
46 | (println "Connecting to Cook database...")
47 | (assoc component :conn (-> config :settings :cook-db-uri d/connect)))
48 |
49 | (stop [component]
50 | (assoc component :conn nil)))
51 |
52 | (defn new-cook-db
53 | "CookDb is a reference to Cook Scheduler's own Datomic database. Many functions
54 | of the Simulator depend on having a connection available to this database. For
55 | example, the Cook database is queried to figure out what happened to various jobs
56 | in a Simulation in order to analyze how the Scheduler performed."
57 | ([] (map->CookDb {}))
58 | ([config] (map->CookDb {:config config})))
59 |
60 |
61 | (defn system
62 | "Top level access point for all of the system components."
63 | [config-path]
64 | (component/system-map
65 | :config (new-config config-path)
66 | :sim-db (component/using (new-sim-db) [:config])
67 | :cook-db (component/using (new-cook-db) [:config])))
68 |
--------------------------------------------------------------------------------
/simulator/src/main/cook/sim/util.clj:
--------------------------------------------------------------------------------
1 | (ns cook.sim.util
2 | (:require [datomic.api :as d]))
3 |
4 | (defn transaction-times
5 | "Given a Datomic db snapshot and an entity id, returns the times associated with
6 | all transactions affecting the entity."
7 | [db eid]
8 | (->> (d/q '[:find ?instant
9 | :in $ ?e
10 | :where
11 | [?e _ _ ?tx]
12 | [?tx :db/txInstant ?instant]]
13 | (d/history db) eid)
14 | (map first)
15 | (sort)))
16 |
17 | (defn created-at
18 | "Given a Datomic db snapshot and an entity id, returns the time when the entity
19 | was first created (first transaction)."
20 | [db eid]
21 | (first (transaction-times db eid)))
22 |
23 | (defn updated-at
24 | "Given a Datomic db snapshot and an entity id, returns the time when the entity
25 | was last updated (last transaction)."
26 | [db eid]
27 | (last (transaction-times db eid)))
28 |
29 | (defn seconds
30 | "Returns a printable number of fractional seconds based on input milliseconds."
31 | [millis]
32 | (float (/ millis 1000)))
33 |
--------------------------------------------------------------------------------
/simulator/travis/prepare_simulation.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ev
3 |
4 | export PROJECT_DIR=`pwd`
5 |
6 | lein deps
7 |
8 | ../travis/prepare.sh
9 |
10 | docker pull python:3
11 |
--------------------------------------------------------------------------------
/simulator/travis/run_simulation.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ev
3 |
4 | export PROJECT_DIR=`pwd`
5 | ${GITHUB_WORKSPACE}/travis/start_scheduler.sh
6 |
7 | cd ${PROJECT_DIR}
8 | lein run -c config/settings.edn setup-database -c travis/simulator_config.edn
9 |
10 | set +e
11 | lein run -c config/settings.edn travis -c travis/simulator_config.edn
12 | SIM_EXIT_CODE=$?
13 |
14 | if [ ${SIM_EXIT_CODE} -ne 0 ]; then
15 | echo "Displaying executor logs"
16 | ${GITHUB_WORKSPACE}/travis/show_executor_logs.sh
17 | fi
18 |
19 | exit ${SIM_EXIT_CODE}
20 |
--------------------------------------------------------------------------------
/simulator/travis/scheduler_config.edn:
--------------------------------------------------------------------------------
1 | {:port 12321
2 | :hostname "172.17.0.1"
3 | :authorization {:http-basic true}
4 | :database {:datomic-uri "datomic:free://localhost:4334/cook-jobs"}
5 | :zookeeper {:local? true}
6 | :scheduler {:offer-incubate-ms 15000
7 | :task-constraints {:timeout-hours 1
8 | :timeout-interval-minutes 1
9 | :memory-gb 48
10 | :retry-limit 200
11 | :cpus 6}}
12 | :rebalancer {:interval-seconds 20
13 | :safe-dru-threshold 0.0
14 | ;; virtually ANY improvement in DRU balance should provoke preemption:
15 | :min-dru-diff 1E-309
16 | :max-preemption 64.0
17 | :dru-scale 1}
18 |
19 | :mesos {:master "zk://172.17.0.3:2181/mesos" ; minimesos zookeeper
20 | :failover-timeout-ms nil
21 | :leader-path "/cook-scheduler"
22 | :role "cook"
23 | :framework-id "cook-framework"}
24 | :compute-clusters [{:factory-fn cook.mesos.mesos-compute-cluster/factory-fn
25 | :config {:compute-cluster-name "default-compute-cluster-from-config-defaulting"
26 | :framework-id "cook-framework"
27 | :master "zk://172.17.0.3:2181/mesos"
28 | :failover-timeout nil
29 | :principal nil
30 | :role "cook"
31 | :framework-name nil}}]
32 | :unhandled-exceptions {:log-level :error}
33 | :metrics {:jmx true}
34 | :nrepl {:enabled? true
35 | :port 8888}
36 | :log {:file "log/cook.log"
37 | :levels {"datomic.db" :warn
38 | "datomic.peer" :warn
39 | "datomic.kv-cluster" :warn
40 | "cook.mesos.rebalancer" :debug
41 | :default :info}}}
42 |
--------------------------------------------------------------------------------
/travis/build_cook_executor.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -ev
4 |
5 | cd ${GITHUB_WORKSPACE}/executor
6 | pip install -r requirements.txt
7 | ./bin/prepare-executor.sh local ${GITHUB_WORKSPACE}/scheduler/resources/public
8 | tar -C ${GITHUB_WORKSPACE}/travis -xzf ./dist/cook-executor-local.tar.gz
9 |
--------------------------------------------------------------------------------
/travis/gdrive_upload:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import base64
4 | import os
5 | import requests
6 | import sys
7 | import warnings
8 |
9 | if len(sys.argv) != 3:
10 | print('USAGE: {} JOB-ID XZ-FILE', sys.argv[0])
11 | print('Upload an xz-compressed file to our Google Drive stash')
12 | sys.exit(1)
13 |
14 | tarball_path = sys.argv[2]
15 |
16 | # upload to google drive
17 | app_url = os.environ.get('GDRIVE_LOG_POST_URL')
18 |
19 | if not app_url:
20 | print('Missing application url. Please set GDRIVE_LOG_POST_URL in the environment.')
21 | sys.exit(1)
22 |
23 | with open(tarball_path, 'rb') as tarball:
24 | post_data = {
25 | 'job_id': sys.argv[1],
26 | 'tarball': base64.b64encode(tarball.read())
27 | }
28 |
29 | with warnings.catch_warnings():
30 | warnings.simplefilter('ignore')
31 | response = requests.post(app_url, data=post_data, timeout=10)
32 |
33 | print()
34 | print('==============================')
35 | print('== UPLOAD RESPONSE:')
36 | print('==============================')
37 | print(response.text)
38 | print('==============================')
39 | print()
40 |
41 | if not response.text.strip().endswith('successfully'):
42 | print('UPLOAD FAILED!')
43 | sys.exit(1)
44 |
--------------------------------------------------------------------------------
/travis/install_mesos.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | PACKAGE_CACHE_DIR=$HOME/.apt-cache
4 | DISTRO=$(lsb_release -is | tr '[:upper:]' '[:lower:]')
5 | CODENAME=$(lsb_release -cs)
6 |
7 | if [ -d "$PACKAGE_CACHE_DIR" ] && [ -n "$(find $PACKAGE_CACHE_DIR -name 'mesos_*.deb')" ]; then
8 | echo 'Using cached Mesos library...'
9 | cp -f $PACKAGE_CACHE_DIR/*.deb /var/cache/apt/archives/
10 | else
11 | echo 'Downloading Mesos library...'
12 | apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF
13 | echo "deb http://repos.mesosphere.io/${DISTRO} ${CODENAME} main" | sudo tee /etc/apt/sources.list.d/mesosphere.list
14 | apt-get update -qq
15 | apt-get install mesos -y --download-only
16 | mkdir -p $PACKAGE_CACHE_DIR/
17 | cp -f /var/cache/apt/archives/*.deb $PACKAGE_CACHE_DIR/
18 | fi
19 |
20 | set -x
21 |
22 | apt-get install --allow-downgrades --fix-broken --no-download --yes $PACKAGE_CACHE_DIR/*.deb
23 | APT_EXIT_CODE=$?
24 |
25 | if [ $APT_EXIT_CODE -ne 0 ] || ! [ -f $MESOS_NATIVE_JAVA_LIBRARY ]; then
26 | echo 'Mesos installation error!'
27 | exit $APT_EXIT_CODE
28 | fi
29 |
--------------------------------------------------------------------------------
/travis/minimesos:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 |
5 | MINIMESOS_TAG="0.14.20180710"
6 | PARAMS="--debug $@"
7 | MINIMESOS_CLI_IMAGE="twosigma/minimesos-cli"
8 |
9 | command_exists() {
10 | command -v "$@" > /dev/null 2>&1
11 | }
12 |
13 | DOCKER_VERSION=$(docker version --format "{{.Server.Version}}")
14 | SMALLEST_VERSION=$(printf "%s\n1.11.0\n" $DOCKER_VERSION | sort -t '.' -k 1,1 -k 2,2 -k 3,3 -k 4,4 -g | head -n 1)
15 |
16 | if ! command_exists docker || [ $SMALLEST_VERSION != "1.11.0" ]; then
17 | echo "Minimesos requires Docker 1.11.0 or higher"
18 | exit 1
19 | fi
20 |
21 | if [ "$DOCKER_HOST" != "" ] && [[ $DOCKER_HOST == tcp* ]]; then
22 | DOCKER_HOST_IP=$(echo "$DOCKER_HOST" | grep -o '[0-9]\+[.][0-9]\+[.][0-9]\+[.][0-9]\+')
23 | elif command_exists docker-machine && [ "$DOCKER_MACHINE_NAME" != "" ]; then
24 | DOCKER_HOST_IP=$(docker-machine ip ${DOCKER_MACHINE_NAME})
25 | elif [ $(uname) != "Darwin" ]; then
26 | DOCKER_HOST_IP=$(ip addr show dev docker0 | grep inet | sed -r "s/.*inet\s([0-9\.]+)\/.*/\1/" | head -n 1)
27 | else
28 | DOCKER_HOST_IP=""
29 | fi
30 |
31 | pullImage() {
32 | if [ "$(docker images $1 | grep $2 2> /dev/null)" = "" ]; then
33 | echo "Pulling $1:$2"
34 | docker pull "$1:$2"
35 | fi
36 | }
37 |
38 | if [ "$#" -gt 0 -a "$1" = up ]; then
39 | pullImage ${MINIMESOS_CLI_IMAGE} ${MINIMESOS_TAG}
40 | fi
41 |
42 | if [ $(uname) == "Darwin" ]; then
43 | MINIMESOS_OS="Mac OS X"
44 | else
45 | MINIMESOS_OS="Linux"
46 | fi
47 |
48 | MINIMESOS_HOST_DIR="$(pwd)"
49 | MINIMESOS_DIR="$(pwd)/.minimesos"
50 | if [ ! -d "${MINIMESOS_DIR}" ]; then
51 | mkdir -p "${MINIMESOS_DIR}"
52 | echo "# Created minimesos directory at ${MINIMESOS_DIR}."
53 | fi
54 |
55 | docker run --rm -v "${MINIMESOS_HOST_DIR}":"${MINIMESOS_HOST_DIR}" \
56 | -v /var/run/docker.sock:/var/run/docker.sock \
57 | -v /sys/fs/cgroup:/sys/fs/cgroup \
58 | -i \
59 | --env DOCKER_HOST_IP=${DOCKER_HOST_IP} \
60 | --env MINIMESOS_OS="${MINIMESOS_OS}" \
61 | --entrypoint java \
62 | ${MINIMESOS_CLI_IMAGE}:${MINIMESOS_TAG} \
63 | -Dminimesos.host.dir="${MINIMESOS_HOST_DIR}" \
64 | -jar /usr/local/share/minimesos/minimesos-cli.jar ${PARAMS}
65 |
--------------------------------------------------------------------------------
/travis/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ev
3 |
4 | cd ${GITHUB_WORKSPACE}/scheduler
5 | lein deps
6 | lein uberjar
7 | VERSION=$(lein print :version | tr -d '"')
8 |
9 | cd ${GITHUB_WORKSPACE}/travis
10 | unzip ${GITHUB_WORKSPACE}/scheduler/datomic/datomic-free-0.9.5394.zip
11 | cp "${GITHUB_WORKSPACE}/scheduler/target/cook-${VERSION}.jar" datomic-free-0.9.5394/lib/
12 |
--------------------------------------------------------------------------------
/travis/show_executor_logs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -v
3 |
4 | echo "Printing out all executor logs..."
5 | while read path; do
6 | echo "Contents of ${path}";
7 | cat "${path}";
8 | echo "------------------------------------"
9 | done <<< "$(find ${GITHUB_WORKSPACE}/travis/.minimesos -name 'stdout' -o -name 'stderr' -o -name 'executor.log')"
10 |
11 | ${GITHUB_WORKSPACE}/travis/show_scheduler_logs.sh
12 |
--------------------------------------------------------------------------------
/travis/show_scheduler_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | for log in ${GITHUB_WORKSPACE}/scheduler/log/cook*.log;
4 | do
5 | echo "Contents of ${log}"
6 | cat "${log}";
7 | echo "------------------------------------"
8 | done
9 |
--------------------------------------------------------------------------------
/travis/start_scheduler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ev
3 |
4 | cd ${GITHUB_WORKSPACE}/travis
5 |
6 | ./build_cook_executor.sh
7 | ./datomic-free-0.9.5394/bin/transactor ${GITHUB_WORKSPACE}/scheduler/datomic/datomic_transactor.properties &
8 | ./minimesos up
9 |
10 | cd ${GITHUB_WORKSPACE}/scheduler
11 | # on travis, ports on 172.17.0.1 are bindable from the host OS, and are also
12 | # available for processes inside minimesos containers to connect to
13 | LIBPROCESS_IP=172.17.0.1 lein run ${PROJECT_DIR}/travis/scheduler_config.edn &
14 |
--------------------------------------------------------------------------------
/travis/upload_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 |
5 | cd ${GITHUB_WORKSPACE}
6 |
7 | # Create dump name
8 | repo=${GITHUB_REPOSITORY}
9 | pr_number=$(jq -r ".pull_request.number" "$GITHUB_EVENT_PATH")
10 | dump_name="${repo//\//-}-PR${pr_number}-${GITHUB_WORKFLOW// /-}-$GITHUB_RUN_ID"
11 |
12 | # List the last 10 containers
13 | docker ps --all --last 10
14 |
15 | # Grab the Mesos master logs
16 | mkdir -p ./mesos/master-logs
17 | mesos_master_container=$(docker ps --all --latest --filter "name=minimesos-master-" --format "{{.ID}}")
18 | docker cp --follow-link $mesos_master_container:/var/log/mesos-master.INFO ./mesos/master-logs/
19 | docker cp --follow-link $mesos_master_container:/var/log/mesos-master.WARNING ./mesos/master-logs/
20 |
21 | # Grab the Mesos agent logs
22 | mesos_agent_containers=$(docker ps --all --last 6 --filter "name=minimesos-agent-" --format "{{.ID}}")
23 | for container in $mesos_agent_containers;
24 | do
25 | destination=./mesos/agent-logs/$container
26 | mkdir -p $destination
27 | docker cp --follow-link $container:/var/log/mesos-slave.INFO $destination
28 | docker cp --follow-link $container:/var/log/mesos-slave.WARNING $destination
29 | docker cp --follow-link $container:/var/log/mesos-slave.ERROR $destination
30 | docker cp --follow-link $container:/var/log/mesos-fetcher.INFO $destination || echo "Container $container does not have mesos-fetcher.INFO"
31 | done
32 |
33 | tarball=./dump.txz
34 | tar -cJf $tarball --transform="s|\./[^/]*/\.*|${dump_name}/|" --warning=no-file-changed ./scheduler/log ./travis/.minimesos ./mesos/master-logs ./mesos/agent-logs || exitcode=$?
35 | # GNU tar always exits with 0, 1 or 2 (https://www.gnu.org/software/tar/manual/html_section/tar_19.html)
36 | # 0 = Successful termination
37 | # 1 = Some files differ (we're OK with this)
38 | # 2 = Fatal error
39 | if [ "$exitcode" == "2" ]; then
40 | echo "The tar command exited with exit code $exitcode, exiting..."
41 | exit $exitcode
42 | fi
43 | ./travis/gdrive_upload "travis-${dump_name}" $tarball
44 |
--------------------------------------------------------------------------------