├── .gitignore ├── README.md ├── examples ├── hello_world │ ├── default.nix │ └── files │ │ └── hello_world.py └── prometheus │ ├── default.nix │ └── files │ └── prometheus.yml └── lib └── aurora.nix /.gitignore: -------------------------------------------------------------------------------- 1 | result 2 | .DS_Store 3 | .closure -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nix-aurora 2 | 3 | This repo is a proof-of-concept for integrating [Apache Aurora](http://aurora.apache.org/) with the [Nix](http://nixos.org/nix/) expression language, package manager, and build system. 4 | 5 | ## What's the idea? 6 | 7 | The idea is to define Aurora job configurations using Nix expressions. Each Nix expression defines a single Aurora Job (using the typical Task/Process/Constraint/Resources/etc. schema) where the build result is a JSON configuration file that can be passed to the Aurora CLI. Since the job is defined and built using Nix, all referenced packages and dependencies are built transparently. This means you can avoid defining extra processes for versioning and initialization. If your process depends on a software package, simply reference the package in `cmdline` and Nix will build the package and provide a reference to the dependency in the Nix store. For example, if you want to run an Nginx process, you can create a process with `cmdline = "${pkgs.nginx}/bin/nginx ..."`. When the job is built, Nix will build Nginx and interpolate the path into the configuration. Further, since the Nix store path is composed of a cryptographic hash of the package and its dependencies, updating a package automatically updates the configuration, so you can avoid versioning "tricks". Any time a dependency is changed, the config is changed. 8 | 9 | ## How does this work? 10 | 11 | An Aurora job is composed of a Task which consists of one or more Processes. In addition to building the Job as a Nix derivation, every Process is also built as a Nix derivation (the output is a bash script with `cmdline` as its contents). The JSON configuration creates an "init" process for each Process specified in the Job which ensures this bash script exists in the Nix store. Since referenced packages are dependencies of the Process derivation, those packages then also exist in the Nix store. The configuration then adds a Constraint for each process that ensures its init process completes before the corresponding Process is launched. 12 | 13 | ## Steps to run 14 | 15 | 1. Start the Aurora Vagrant VM. Directions [here](http://aurora.apache.org/documentation/latest/vagrant/). 16 | 2. Install Nix using `curl https://nixos.org/nix/install | sh` and source `/home/vagrant/.nix-profile/etc/profile.d/nix.sh` 17 | 3. `git clone https://github.com/rafikk/nix-aurora.git` 18 | 4. Run `aurora job create --read-json devcluster/vagrant/devel/hello_world $(nix-build --no-out-link examples/hello_world)` to start the job 19 | 20 | If you would just like to see the JSON configuration, install Nix and run `cat $(nix-build ./nix-aurora/examples/hello_world --no-out-link) | python -m json.tool`. Here's the output running on my Mac laptop: 21 | 22 | ```json 23 | { 24 | "cluster": "devcluster", 25 | "environment": "devel", 26 | "name": "hello_world", 27 | "role": "vagrant", 28 | "service": true, 29 | "task": { 30 | "constraints": [ 31 | { 32 | "order": [ 33 | "nix_init_hello_world", 34 | "hello_world" 35 | ] 36 | } 37 | ], 38 | "name": "hello_world", 39 | "processes": [ 40 | { 41 | "cmdline": ". /home/vagrant/.nix-profile/etc/profile.d/nix.sh\nnix-store --add-root .gc/hello_world --indirect -r /nix/store/4c6z04825b9a49vlsp2mk0rg19ybyd5f-aurora-process-devcluster-vagrant-devel-hello_world-hello_world\n", 42 | "name": "nix_init_hello_world" 43 | }, 44 | { 45 | "cmdline": "/nix/store/q41nkp3p684xyjlnv02f8hnid234z4n8-python-2.7.10/bin/python2.7 /nix/store/53fxpfg3nkkjvzfvh54r8ccms2b3l7iz-hello_world.py/hello_world.py", 46 | "name": "hello_world" 47 | } 48 | ], 49 | "resources": { 50 | "cpu": 1, 51 | "disk": 8388608, 52 | "ram": 1048576 53 | } 54 | } 55 | } 56 | ``` 57 | 58 | ## Limitations 59 | 60 | 1. Currently this only works on a single-node Vagrant cluster. In order to run this on a multi-node production cluster, Nix must be running in multi-user daemon mode, and a binary cache (or another distribution mechanism) would be required. 61 | -------------------------------------------------------------------------------- /examples/hello_world/default.nix: -------------------------------------------------------------------------------- 1 | { nixpkgs ? }: 2 | 3 | let 4 | 5 | pkgs = import nixpkgs {}; 6 | 7 | aurora = import ../../lib/aurora.nix { inherit pkgs; }; 8 | 9 | helloWorldProcess = aurora.Process { 10 | name = "hello_world"; 11 | cmdline = "${pkgs.python}/bin/${pkgs.python.executable} ${./files/hello_world.py}"; 12 | }; 13 | 14 | helloWorldTask = aurora.Task { 15 | processes = [ helloWorldProcess ]; 16 | resources = { 17 | cpu = 1; 18 | ram = 1 * aurora.utils.MB; 19 | disk = 8 * aurora.utils.MB; 20 | }; 21 | }; 22 | 23 | in aurora.Service { 24 | cluster = "devcluster"; 25 | environment = "devel"; 26 | role = "vagrant"; 27 | task = helloWorldTask; 28 | } 29 | -------------------------------------------------------------------------------- /examples/hello_world/files/hello_world.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | def main(argv): 5 | SLEEP_DELAY = 10 6 | # Python ninjas - ignore this blatant bug. 7 | for i in xrange(100): 8 | print("Hello world! The time is now: %s. Sleeping for %d secs" % ( 9 | time.asctime(), SLEEP_DELAY)) 10 | sys.stdout.flush() 11 | time.sleep(SLEEP_DELAY) 12 | 13 | if __name__ == "__main__": 14 | main(sys.argv) 15 | -------------------------------------------------------------------------------- /examples/prometheus/default.nix: -------------------------------------------------------------------------------- 1 | { nixpkgs ? }: 2 | 3 | let 4 | 5 | pkgs = import nixpkgs {}; 6 | 7 | aurora = import ../../lib/aurora.nix { inherit pkgs; }; 8 | 9 | prometheusProcess = aurora.Process { 10 | name = "prometheus"; 11 | cmdline = '' 12 | ${pkgs.prometheus}/bin/prometheus \ 13 | -config.file=${aurora.utils.files.copiedExpandedFile ./files/prometheus.yml} \ 14 | -web.listen-address=0.0.0.0:{{thermos.ports[http]}} \ 15 | -log.level=debug 16 | ''; 17 | }; 18 | 19 | prometheusTask = aurora.Task { 20 | processes = [ 21 | prometheusProcess 22 | ]; 23 | resources = { 24 | cpu = 1; 25 | ram = 8 * aurora.utils.MB; 26 | disk = 1 * aurora.utils.GB; 27 | }; 28 | }; 29 | 30 | in aurora.Service { 31 | cluster = "devcluster"; 32 | environment = "devel"; 33 | role = "vagrant"; 34 | task = prometheusTask; 35 | } 36 | -------------------------------------------------------------------------------- /examples/prometheus/files/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # By default, scrape targets every 15 seconds. 4 | evaluation_interval: 15s # By default, scrape targets every 15 seconds. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # Attach these extra labels to all timeseries collected by this Prometheus instance. 8 | labels: 9 | monitor: 'codelab-monitor' 10 | 11 | # Load and evaluate rules in this file every 'evaluation_interval' seconds. 12 | rule_files: 13 | # - "first.rules" 14 | # - "second.rules" 15 | 16 | # A scrape configuration containing exactly one endpoint to scrape: 17 | # Here it's Prometheus itself. 18 | scrape_configs: 19 | # The job name is added as a label `job=` to any timeseries scraped from this config. 20 | - job_name: '{{name}}' 21 | 22 | # Override the global default and scrape targets from this job every 5 seconds. 23 | scrape_interval: 5s 24 | scrape_timeout: 10s 25 | 26 | # metrics_path defaults to '/metrics' 27 | # scheme defaults to 'http'. 28 | 29 | target_groups: 30 | - targets: ['localhost:{{thermos.ports[http]}}'] 31 | -------------------------------------------------------------------------------- /lib/aurora.nix: -------------------------------------------------------------------------------- 1 | { pkgs }: 2 | 3 | rec { 4 | 5 | Job = 6 | { name ? task.name 7 | , role 8 | , contact ? null 9 | , cluster 10 | , environment 11 | , instances ? 1 12 | , task 13 | , announce ? null 14 | , cron_schedule ? null 15 | , cron_collision_policy ? "KILL_EXISTING" 16 | , constraints ? null 17 | , service ? false 18 | , update_config ? UpdateConfig {} 19 | , max_task_failures ? 1 20 | , production ? false 21 | , priority ? 0 22 | , health_check_config ? HealthCheckConfig {} 23 | , enable_hooks ? false 24 | } @ attrs: 25 | 26 | let 27 | 28 | mkProcessDerivation = process: pkgs.writeTextFile { 29 | name = "aurora-process-${cluster}-${role}-${environment}-${name}-${process.name}"; 30 | text = process.cmdline; 31 | executable = true; 32 | }; 33 | 34 | mkInitProcess = process: { 35 | name = "nix_init_${process.name}"; 36 | cmdline = '' 37 | . /home/vagrant/.nix-profile/etc/profile.d/nix.sh 38 | nix-store --add-root .gc/${process.name} --indirect -r ${mkProcessDerivation process} 39 | ''; 40 | }; 41 | 42 | initProcesses = map mkInitProcess task.processes; 43 | 44 | processes = initProcesses ++ task.processes; 45 | 46 | constraints = (map 47 | (p: { order = [ "nix_init_${p.name}" p.name ]; }) task.processes) 48 | ++ task.constraints; 49 | 50 | wrappedTask = task // { inherit processes constraints; }; 51 | 52 | in pkgs.writeTextFile { 53 | name = with attrs; "aurora-job-${cluster}-${role}-${environment}-${name}"; 54 | text = builtins.toJSON (attrs // { inherit name; task = wrappedTask; }); 55 | }; 56 | 57 | Service = attrs: Job (attrs // { service = true; }); 58 | 59 | Task = 60 | { name ? (builtins.head processes).name 61 | , processes 62 | , constraints ? [] 63 | , resources ? null 64 | , max_failures ? 1 65 | , max_concurrency ? 0 66 | , finalization_wait ? 30 67 | } @ attrs: 68 | { inherit name constraints; } // attrs; 69 | 70 | Process = 71 | { cmdline 72 | , name 73 | , max_failures ? 1 74 | , daemon ? false 75 | , ephemeral ? false 76 | , final ? false 77 | , min_duration ? 5 78 | } @ attrs: attrs; 79 | 80 | Resources = 81 | { cpu 82 | , ram 83 | , disk 84 | } @ attrs: attrs; 85 | 86 | UpdateConfig = 87 | { batch_size ? 1 88 | , restart_threshold ? 60 89 | , watch_secs ? 45 90 | , max_per_shard_failures ? 0 91 | , max_total_failures ? 0 92 | , rollback_on_failure ? true 93 | , wait_for_batch_completion ? false 94 | , pulse_interval_secs ? null 95 | } @ attrs: attrs; 96 | 97 | HealthCheckConfig = 98 | { initial_interval_secs ? 15 99 | , interval_secs ? 10 100 | , timeout_secs ? 1 101 | , max_consecutive_failures ? 0 102 | , endpoint ? "/health" 103 | , expected_response ? "ok" 104 | , expected_response_code ? 0 105 | } @ attrs: attrs; 106 | 107 | Announcer = 108 | { primary_port ? "http" 109 | , port_map ? { aurora = "{{primary_port}}"; } 110 | } @ attrs: attrs; 111 | 112 | utils = rec { 113 | B = 1; 114 | KB = 1024 * B; 115 | MB = 1024 * KB; 116 | GB = 1024 * MB; 117 | 118 | files = { 119 | 120 | copiedExpandedFile = filePath: 121 | 122 | let 123 | bindingsFile = pkgs.stdenv.mkDerivation { 124 | name = pkgs.lib.last (pkgs.lib.strings.splitString "/" (builtins.toString filePath)); 125 | buildCommand = '' 126 | mkdir -p $(dirname $out) 127 | ${pkgs.python}/bin/${pkgs.python.executable} -c '\ 128 | import sys, re, json; \ 129 | pattern = re.compile(r"""{{&?([^{}]+?)\1?}}"""); \ 130 | matches = set(pattern.findall(sys.stdin.read())); \ 131 | output = json.dumps(dict((k, """{{%s}}""" % k) for k in matches)); \ 132 | sys.stdout.write(output)' \ 133 | < ${filePath} > $out 134 | ''; 135 | }; 136 | 137 | outputPath = pkgs.lib.last (pkgs.lib.strings.splitString "/" (builtins.toString bindingsFile)); 138 | 139 | in pkgs.lib.strings.concatStrings (pkgs.lib.strings.splitString "\n" '' 140 | $(${pkgs.python}/bin/${pkgs.python.executable} -c ' 141 | import json, re, sys, os; 142 | bindings = json.loads("""${builtins.readFile bindingsFile}"""); 143 | pattern = re.compile(r"""{{&?([^{}]+?)\1?}}"""); 144 | sys.stdout.write(pattern.sub(lambda m: bindings.get(m.group(1)), sys.stdin.read()))' 145 | < ${filePath} 146 | > ${outputPath}; 147 | echo ${outputPath})''); 148 | 149 | }; 150 | 151 | }; 152 | 153 | } 154 | --------------------------------------------------------------------------------