├── site ├── .prettierrc ├── public │ ├── favicon.ico │ └── zeit.svg ├── .gitignore ├── package.json ├── components │ └── Oakfile.js ├── README.md └── pages │ └── index.js ├── surge ├── CORS ├── data │ ├── CA │ │ ├── a │ │ └── b │ └── NY │ │ ├── a │ │ └── b ├── CNAME └── raw_people.csv ├── docker ├── .gitignore ├── Dockerfile.alpine ├── Dockerfile └── Makefile ├── tests ├── library │ ├── env │ │ └── test.txt │ └── test.ts ├── sqlite │ ├── env │ │ └── .gitkeep │ └── test.ts ├── run-import-watch │ ├── env │ │ ├── Oakfile │ │ └── sub │ │ │ ├── write.js │ │ │ └── Oakfile │ └── test.ts ├── run-simple-import │ ├── env │ │ ├── sub │ │ │ ├── subsub │ │ │ │ └── Oakfile │ │ │ └── Oakfile │ │ └── Oakfile │ └── test.ts ├── task-target-dir │ ├── env │ │ ├── print_files.sh │ │ ├── Oakfile │ │ └── generate_files.js │ └── test.ts ├── run-inject-multiple │ ├── env │ │ ├── sub │ │ │ └── Oakfile │ │ └── Oakfile │ └── test.ts ├── run-inject-deep │ ├── env │ │ ├── sub │ │ │ ├── subsub │ │ │ │ └── Oakfile │ │ │ └── Oakfile │ │ └── Oakfile │ └── test.ts ├── run-inject │ ├── env │ │ ├── Oakfile │ │ └── sub │ │ │ └── Oakfile │ └── test.ts ├── task-watch │ ├── env │ │ ├── build_c.js │ │ └── Oakfile │ └── test.ts ├── run-import-aliases │ ├── env │ │ ├── sub │ │ │ └── Oakfile │ │ └── Oakfile │ └── test.ts ├── run-targets-import │ ├── env │ │ ├── sub │ │ │ └── Oakfile │ │ └── Oakfile │ └── test.ts ├── run-mixed-cells │ ├── env │ │ └── Oakfile │ └── test.ts ├── run-hello │ ├── env │ │ └── Oakfile │ └── test.ts ├── lib-command │ ├── env │ │ └── Oakfile │ └── test.ts ├── run-fail │ ├── env │ │ └── Oakfile │ └── test.ts ├── README.md ├── utils-test │ └── duration.ts ├── run-targets-only │ ├── env │ │ └── Oakfile │ └── test.ts └── utils.ts ├── examples ├── c │ ├── .gitignore │ ├── README.md │ ├── Makefile │ ├── hellomake.h │ ├── hellomake.c │ └── hellofunc.c ├── pudding-hypen-names │ ├── output │ │ └── .gitkeep │ ├── README.md │ ├── .gitignore │ ├── Oakfile │ ├── nwsl │ │ ├── s1_get-names-html.js │ │ ├── s3_format-years.js │ │ └── s2_get-names-list.js │ ├── congress │ │ ├── s1_get-names-html.js │ │ └── s2_get-names-list.js │ ├── nhl │ │ ├── s1_get-names-html.js │ │ └── s2_get-names-list.js │ ├── mlb │ │ ├── s1_get-names-html.js │ │ └── s2_get-names-list.js │ ├── nba │ │ ├── s1_get-names-html.js │ │ └── s2_get-names-list.js │ ├── wnba │ │ ├── s1_get-names-html.js │ │ └── s2_get-names-list.js │ ├── nfl │ │ ├── s1_get-names-html.js │ │ └── s2_get-names-list.js │ ├── mls │ │ ├── s1_get-names-list.js │ │ └── s2_format-years.js │ ├── combine-all-names.js │ └── package.json ├── native-lands-colleges │ ├── geocode_colleges.py │ ├── README.md │ ├── Pipfile │ ├── Oakfile │ ├── filter_colleges.py │ ├── .gitignore │ ├── query.py │ └── Pipfile.lock ├── simple │ ├── .gitignore │ └── Makefile ├── ca-schools │ ├── .gitignore │ ├── README.md │ ├── Pipfile │ ├── to_geo.py │ ├── Oakfile.js │ └── clean_cde.py ├── mbostock_planets │ ├── .gitignore │ ├── README.md │ ├── Makefile │ └── Oakfile.js ├── youtube-colors │ ├── .gitignore │ ├── generator │ │ ├── README.md │ │ ├── pillow │ │ │ ├── Dockerfile.pillow │ │ │ └── gen_bars.py │ │ └── Oakfile │ └── Oakfile ├── barr-redaction-pixels │ ├── 1_print.sh │ ├── 4_ocr.sh │ ├── .gitignore │ ├── 0_get.sh │ ├── README.md │ ├── Dockerfile │ ├── 2_measure.sh │ ├── Oakfile │ ├── 3_parse.js │ └── 5_montage.sh ├── ghcdn │ ├── loop.py │ ├── Pipfile │ ├── convert_curr_year.py │ ├── Oakfile │ ├── consolidate_raw_backfill.py │ ├── readme.txt │ └── Pipfile.lock ├── school-accidents │ ├── Pipfile │ ├── Oakfile │ ├── .gitignore │ └── clean_accidents.ipynb ├── ucsd-parking │ ├── Pipfile │ ├── utils.py │ ├── Oakfile │ ├── analyze.py │ ├── extract_list.py │ ├── .gitignore │ ├── convert.py │ └── aggregate_lots.py ├── delays │ └── Oakfile ├── microsoft-buildings │ └── Oakfile ├── docker-test │ └── Oakfile ├── much-logs │ └── Oakfile ├── la-metro-schedule │ ├── notify.py │ ├── upload_db.py │ └── Oakfile ├── README.md └── mbostock-carto │ └── Oakfile ├── .prettierrc ├── assets └── example.png ├── docs ├── static │ ├── term1.png │ ├── term2.png │ └── favicon.ico ├── pages │ ├── index.mdx │ ├── guides.mdx │ ├── reference │ │ ├── oakfile.mdx │ │ ├── stdlib.mdx │ │ ├── cli.mdx │ │ └── index.mdx │ ├── samples.mdx │ └── introduction.md ├── next.config.js ├── .gitignore ├── package.json ├── styles │ └── main.less └── components │ └── DocsLayout.jsx ├── src ├── .prettierrc ├── Library │ ├── index.ts │ ├── env.ts │ ├── command.ts │ ├── shell.ts │ └── library.ts ├── commands │ ├── run │ │ ├── fork.ts │ │ ├── index.ts │ │ └── ui.tsx │ ├── version.ts │ ├── logs.ts │ └── path.ts ├── Execution.ts ├── cli-utils.ts ├── Oakfile ├── Task.ts ├── oak-compile-types.ts ├── cli.ts └── decorator.ts ├── tsconfig.json ├── Oakfile.ui ├── .circleci └── config.yml ├── .gitignore ├── package.json └── README.md /site/.prettierrc: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /surge/CORS: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /surge/data/CA/a: -------------------------------------------------------------------------------- 1 | CA a -------------------------------------------------------------------------------- /surge/data/CA/b: -------------------------------------------------------------------------------- 1 | CA b -------------------------------------------------------------------------------- /surge/data/NY/a: -------------------------------------------------------------------------------- 1 | NY a -------------------------------------------------------------------------------- /surge/data/NY/b: -------------------------------------------------------------------------------- 1 | NY b -------------------------------------------------------------------------------- /docker/.gitignore: -------------------------------------------------------------------------------- 1 | binaries* -------------------------------------------------------------------------------- /tests/library/env/test.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/sqlite/env/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /surge/CNAME: -------------------------------------------------------------------------------- 1 | oak-test.surge.sh 2 | -------------------------------------------------------------------------------- /examples/c/.gitignore: -------------------------------------------------------------------------------- 1 | hellomake 2 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/output/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "typescript" 3 | } 4 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/geocode_colleges.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/simple/.gitignore: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | c 4 | d 5 | e 6 | -------------------------------------------------------------------------------- /examples/ca-schools/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.geojson 3 | *.txt 4 | -------------------------------------------------------------------------------- /examples/mbostock_planets/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | *.csv 3 | *.zip 4 | -------------------------------------------------------------------------------- /examples/youtube-colors/.gitignore: -------------------------------------------------------------------------------- 1 | frames/ 2 | *.mp4 3 | *.png 4 | -------------------------------------------------------------------------------- /examples/c/README.md: -------------------------------------------------------------------------------- 1 | # Compiling C Code 2 | 3 | Inspiration: [TODO]() 4 | -------------------------------------------------------------------------------- /assets/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/oak/master/assets/example.png -------------------------------------------------------------------------------- /docs/static/term1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/oak/master/docs/static/term1.png -------------------------------------------------------------------------------- /docs/static/term2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/oak/master/docs/static/term2.png -------------------------------------------------------------------------------- /examples/pudding-hypen-names/README.md: -------------------------------------------------------------------------------- 1 | # Hyphenated Names 2 | 3 | Inspiration: [TODO]() 4 | -------------------------------------------------------------------------------- /docs/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/oak/master/docs/static/favicon.ico -------------------------------------------------------------------------------- /site/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/oak/master/site/public/favicon.ico -------------------------------------------------------------------------------- /examples/native-lands-colleges/README.md: -------------------------------------------------------------------------------- 1 | # Native Lands Colleges 2 | 3 | Inspiration: [TODO]() 4 | -------------------------------------------------------------------------------- /tests/run-import-watch/env/Oakfile: -------------------------------------------------------------------------------- 1 | name = "Penguin" 2 | 3 | import {t} with {name} from "./sub/Oakfile" -------------------------------------------------------------------------------- /examples/c/Makefile: -------------------------------------------------------------------------------- 1 | hellomake: hellomake.c hellofunc.c 2 | gcc -o hellomake hellomake.c hellofunc.c -I. 3 | -------------------------------------------------------------------------------- /examples/c/hellomake.h: -------------------------------------------------------------------------------- 1 | /* 2 | * example include file 3 | * */ 4 | 5 | void myPrintHelloMake(void); 6 | -------------------------------------------------------------------------------- /examples/ca-schools/README.md: -------------------------------------------------------------------------------- 1 | # California Public and Charter Schools 2 | 3 | Inspiration: [TODO]() 4 | -------------------------------------------------------------------------------- /docs/pages/index.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Home 5 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/1_print.sh: -------------------------------------------------------------------------------- 1 | mkdir -p pages 2 | rm pages/* 3 | pdftoppm -png -f 1 report.pdf pages/report -------------------------------------------------------------------------------- /tests/run-simple-import/env/sub/subsub/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ target: "a", run: a => shell`echo -n "a" > ${a}` }); 2 | -------------------------------------------------------------------------------- /docs/pages/guides.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Guides 5 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/4_ocr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | find pages -name *.png | sort > pagelist 3 | tesseract pagelist report_ocr 4 | -------------------------------------------------------------------------------- /tests/task-target-dir/env/print_files.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | target=$1/* 4 | 5 | for file in $target; 6 | do 7 | cat $file 8 | done -------------------------------------------------------------------------------- /examples/pudding-hypen-names/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .tmp 3 | .vscode 4 | npm-debug.log 5 | node_modules 6 | dev 7 | dist 8 | package-lock.json -------------------------------------------------------------------------------- /tests/run-inject-multiple/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | wrap = s => `A ${s}`; 2 | x = new Task({ target: "x", run: x => shell`echo -n ${wrap("x")} > ${x}` }); 3 | -------------------------------------------------------------------------------- /docs/pages/reference/oakfile.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Oakfile Syntax Reference 5 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/.gitignore: -------------------------------------------------------------------------------- 1 | pages 2 | histograms 3 | montages 4 | values.csv 5 | report.pdf 6 | pagelist 7 | report_ocr.txt 8 | .DS_Store -------------------------------------------------------------------------------- /docs/pages/reference/stdlib.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Standard Library Reference 5 | 6 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/0_get.sh: -------------------------------------------------------------------------------- 1 | #!/usr/env shell 2 | curl -o report.pdf https://assets.documentcloud.org/documents/5955118/The-Mueller-Report.pdf 3 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/Oakfile: -------------------------------------------------------------------------------- 1 | 2 | wnba_html = recipe({}) 3 | 4 | wnba_csv = recipe({}) 5 | 6 | nba_html = recipe({}) 7 | 8 | nba_csv = recipe({}) -------------------------------------------------------------------------------- /docs/pages/reference/cli.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Command Line Interface Reference 5 | 6 | -------------------------------------------------------------------------------- /tests/run-import-watch/env/sub/write.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const [content, file] = process.argv.slice(2, 4); 3 | fs.writeFileSync(file, content, "utf8"); -------------------------------------------------------------------------------- /examples/c/hellomake.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | // call a function in another file 5 | myPrintHelloMake(); 6 | 7 | return(0); 8 | } 9 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/README.md: -------------------------------------------------------------------------------- 1 | # Barr Redaction Pixels 2 | 3 | Inspiration: [nprapps/barr-redaction-pixels](https://github.com/nprapps/barr-redaction-pixels) 4 | -------------------------------------------------------------------------------- /tests/run-import-watch/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | name = "Alex" 2 | 3 | t = new Task({ 4 | target: "file.txt", 5 | run: target => shell`node write.js ${name} ${target}` 6 | }) -------------------------------------------------------------------------------- /tests/run-inject-deep/env/sub/subsub/Oakfile: -------------------------------------------------------------------------------- 1 | wrap = s => `A ${s}`; 2 | 3 | x = new Task({ 4 | target: "x", 5 | run: x => shell`echo -n "${wrap("x")}" > ${x}` 6 | }); 7 | -------------------------------------------------------------------------------- /examples/c/hellofunc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void myPrintHelloMake(void) { 5 | 6 | printf("Hello makefiles!\n"); 7 | 8 | return; 9 | } 10 | -------------------------------------------------------------------------------- /src/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "parser": "typescript", 4 | "singleQuote": false, 5 | "semi": true, 6 | "tabWidth": 2, 7 | "useTabs": false 8 | } -------------------------------------------------------------------------------- /tests/run-inject/env/Oakfile: -------------------------------------------------------------------------------- 1 | STATE = "CA" 2 | 3 | import {c} with {STATE} from "./sub/Oakfile" 4 | 5 | d = new Task({ 6 | target: "d", 7 | run: d => shell`cat ${c} > ${d}` 8 | }) -------------------------------------------------------------------------------- /examples/ghcdn/loop.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | 3 | i = 0 4 | while True: 5 | print('lol' + str(i)) 6 | i += 1 7 | if i > 10: 8 | break 9 | sleep(.5) 10 | 11 | -------------------------------------------------------------------------------- /tests/task-watch/env/build_c.js: -------------------------------------------------------------------------------- 1 | 2 | const { readFileSync } = require("fs"); 3 | const ins = process.argv.slice(2); 4 | ins.map(inp => process.stdout.write(readFileSync(inp))); 5 | process.stdout.write("C2"); 6 | -------------------------------------------------------------------------------- /src/Library/index.ts: -------------------------------------------------------------------------------- 1 | export { default as Library, RunLibrary } from "./library"; 2 | export const LibraryKeys = new Set([ 3 | "Task", 4 | "shell", 5 | "command", 6 | "env", 7 | "invalidation", 8 | ]); 9 | -------------------------------------------------------------------------------- /src/Library/env.ts: -------------------------------------------------------------------------------- 1 | export default function env(key: string) { 2 | if (typeof key !== "string") { 3 | throw Error(`env: Invalid argument supplied (must be a string) ${key}`); 4 | } 5 | return process.env[key]; 6 | } 7 | -------------------------------------------------------------------------------- /tests/run-import-aliases/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "a", 3 | run: a => shell`echo -n "a" > ${a}` 4 | }); 5 | 6 | b = new Task({ 7 | target: "b", 8 | run: b => shell`cat ${a} > ${b}` 9 | }); 10 | -------------------------------------------------------------------------------- /tests/run-simple-import/env/Oakfile: -------------------------------------------------------------------------------- 1 | import { c } from "./sub/Oakfile"; 2 | 3 | d = new Task({ target: "d", run: d => shell`cat ${c} > ${d}` }); 4 | 5 | f = new Task({ target: "f", run: f => shell`echo -n "fff" > ${f}` }); 6 | -------------------------------------------------------------------------------- /tests/run-targets-import/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "a", 3 | run: a => shell`echo -n "a" > ${a}` 4 | }); 5 | 6 | b = new Task({ 7 | target: "b", 8 | run: b => shell`cat ${a} > ${b}` 9 | }); 10 | -------------------------------------------------------------------------------- /examples/simple/Makefile: -------------------------------------------------------------------------------- 1 | all: a b c d e 2 | .PHONY : all 3 | .DEFAULT_GOAL := all 4 | 5 | a: 6 | echo "a" > a 7 | b: 8 | echo "b" > b 9 | c: a b 10 | cat a b > c 11 | d: c 12 | cat c > d 13 | e: d 14 | cat d > e 15 | -------------------------------------------------------------------------------- /tests/run-simple-import/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | import { a } from "./subsub/Oakfile"; 2 | 3 | b = new Task({ target: "b", run: b => shell`echo -n "b" > ${b}` }); 4 | c = new Task({ target: "c", run: c => shell`cat ${a} ${b} > ${c}` }); 5 | -------------------------------------------------------------------------------- /examples/ghcdn/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | pandas = "*" 10 | 11 | [requires] 12 | python_version = "3.6" 13 | -------------------------------------------------------------------------------- /docker/Dockerfile.alpine: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | RUN mkdir /lib64 && ln -s /lib/libc.musl-x86_64.so.1 /lib64/ld-linux-x86-64.so.2 4 | RUN apk add libc6-compat --no-cache 5 | 6 | COPY binaries /opt/oak/bin 7 | 8 | ENV PATH="/opt/oak/bin:${PATH}" 9 | -------------------------------------------------------------------------------- /src/commands/run/fork.ts: -------------------------------------------------------------------------------- 1 | import { oak_run } from "../../core/run"; 2 | const [oakfile, runHash, ...targets] = process.argv.slice(2, process.argv.length) 3 | oak_run({ 4 | filename:oakfile, 5 | targets: targets, 6 | runHash 7 | }).then(()=>process.exit(0)) -------------------------------------------------------------------------------- /examples/mbostock_planets/README.md: -------------------------------------------------------------------------------- 1 | # mbostock-exoplanets 2 | 3 | This is a modified Makefile from Mike Bostock's [exoplanet gist](https://gist.github.com/mbostock/3007180) that outputs a CSV files of a list of exoplanets from the Planetary Habitability Laboratory. 4 | -------------------------------------------------------------------------------- /tests/run-targets-import/env/Oakfile: -------------------------------------------------------------------------------- 1 | import { b } from "sub/Oakfile"; 2 | 3 | x = new Task({ 4 | target: "x", 5 | run: x => shell`echo -n "x" > ${x}` 6 | }); 7 | 8 | y = new Task({ 9 | target: "y", 10 | run: y => shell`cat ${x} ${b} > ${y}` 11 | }); 12 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "./built", 4 | "allowJs": true, 5 | "target": "es6", 6 | "module": "commonjs", 7 | "esModuleInterop": true, 8 | "jsx":"react" 9 | }, 10 | "include": ["./src/**/*"] 11 | } 12 | -------------------------------------------------------------------------------- /tests/run-inject-deep/env/Oakfile: -------------------------------------------------------------------------------- 1 | wrap = s => { 2 | console.log('Wow wrap c called'); 3 | return `C ${s}`; 4 | } 5 | 6 | import {x} with {wrap} from "./sub/Oakfile"; 7 | 8 | y = new Task({ 9 | target: "y", 10 | run: y => shell`cat ${x} > ${y}` 11 | }) -------------------------------------------------------------------------------- /tests/task-target-dir/env/Oakfile: -------------------------------------------------------------------------------- 1 | a_dir = new Task({ 2 | target: "a", 3 | run: a_dir => command("node", ["generate_files.js", a_dir, 5, "a"]) 4 | }); 5 | 6 | b = new Task({ 7 | target: "b.txt", 8 | run: b => shell`./print_files.sh ${a_dir} > ${b}` 9 | }); 10 | -------------------------------------------------------------------------------- /examples/ca-schools/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | geopandas = "*" 10 | shapely = "*" 11 | pandas = "*" 12 | 13 | [requires] 14 | python_version = "3.6" 15 | -------------------------------------------------------------------------------- /surge/raw_people.csv: -------------------------------------------------------------------------------- 1 | id,name,location 2 | 0,Alex,La Jolla 3 | 1,Bob,Memphis 4 | 2,Chrissy,Sacramento 5 | 3,Dylan,Denver 6 | 4,Elliot,New York City 7 | 5,Fred,Austin 8 | 6,Gillian,Dallas 9 | 7,Alex,Flower 10 | 8,Dog,Anaheim 11 | 9,Alex,La Jolla 12 | 10,DatBoi,Seattle 13 | -------------------------------------------------------------------------------- /tests/run-mixed-cells/env/Oakfile: -------------------------------------------------------------------------------- 1 | getName = name => `${name}.txt`; 2 | 3 | a = new Task({ 4 | target: getName("a"), 5 | run: a => shell`echo -n "a" > ${a}` 6 | }); 7 | 8 | b = new Task({ 9 | target: getName("b"), 10 | run: b => shell`cat ${a} > ${b}` 11 | }); 12 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | pandas = "*" 10 | shapely = "*" 11 | tqdm = "*" 12 | 13 | [requires] 14 | python_version = "3.6" 15 | -------------------------------------------------------------------------------- /examples/school-accidents/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | pandas = "*" 10 | notebook = "*" 11 | papermill = "*" 12 | 13 | [requires] 14 | python_version = "3.6" 15 | -------------------------------------------------------------------------------- /tests/run-import-aliases/env/Oakfile: -------------------------------------------------------------------------------- 1 | import { b as myB } from "./sub/Oakfile"; 2 | 3 | x = new Task({ 4 | target: "x", 5 | run: x => shell`echo -n "x" > ${x}` 6 | }); 7 | 8 | y = new Task({ 9 | target: "y", 10 | run: y => shell`cat ${x} ${myB} > ${y}` 11 | }); 12 | -------------------------------------------------------------------------------- /tests/run-inject-deep/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | wrap = s => { 2 | console.log('Wow wrap b called'); 3 | return `B ${s}`; 4 | } 5 | 6 | import {x} with {wrap} from "./subsub/Oakfile"; 7 | 8 | y = new Task({ 9 | target: "y", 10 | run: y => shell`cat ${x} > ${y}` 11 | }) -------------------------------------------------------------------------------- /examples/ucsd-parking/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | bs4 = "*" 10 | requests = "*" 11 | pandas = "*" 12 | xlrd = "*" 13 | 14 | [requires] 15 | python_version = "3.6" 16 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:buster-slim 2 | 3 | #RUN apt-get update && \ 4 | #DEBIAN_FRONTEND=noninteractive apt-get -y install sqlite3 && \ 5 | #rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 6 | 7 | 8 | COPY binaries /opt/oak/bin 9 | 10 | ENV PATH="/opt/oak/bin:${PATH}" 11 | -------------------------------------------------------------------------------- /src/Execution.ts: -------------------------------------------------------------------------------- 1 | import { WriteStream } from "fs-extra"; 2 | import { ChildProcess } from "child_process"; 3 | 4 | export type Execution = { 5 | process: ChildProcess; 6 | outStream?: WriteStream; 7 | config: { 8 | stdout: boolean; 9 | stderr: boolean; 10 | }; 11 | }; 12 | -------------------------------------------------------------------------------- /tests/run-hello/env/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "a", 3 | run: a => shell`echo -n "a" > ${a}` 4 | }); 5 | b = new Task({ 6 | target: "b", 7 | run: b => shell`echo -n "b" > ${b}` 8 | }); 9 | c = new Task({ 10 | target: "c", 11 | run: c => shell`cat ${a} ${b} > ${c}` 12 | }); 13 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:12.9-alpine 2 | LABEL name="Barr Redaction Pixels" 3 | LABEL maintainer="Alex Garcia " 4 | 5 | RUN apk add --update \ 6 | imagemagick \ 7 | tesseract-ocr \ 8 | poppler \ 9 | poppler-utils \ 10 | curl 11 | WORKDIR /app -------------------------------------------------------------------------------- /docs/next.config.js: -------------------------------------------------------------------------------- 1 | // next.config.js 2 | const withLess = require('@zeit/next-less') 3 | 4 | const withMDX = require("@zeit/next-mdx")({ 5 | extension: /\.mdx?$/, 6 | options: {} 7 | }); 8 | 9 | module.exports = withLess(withMDX({ 10 | pageExtensions: ["js", "jsx", "md", "mdx"], 11 | cssModules: true 12 | })); 13 | -------------------------------------------------------------------------------- /examples/delays/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "a", 3 | run: a => shell`sleep 2 && echo -n "a" > ${a}` 4 | }); 5 | b = new Task({ 6 | target: "b", 7 | run: b => shell`sleep 3 && echo -n "b" > ${b}` 8 | }); 9 | c = new Task({ 10 | target: "c", 11 | run: c => shell`sleep 1.5 && cat ${a} ${b} > ${c}` 12 | }); 13 | -------------------------------------------------------------------------------- /tests/task-watch/env/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ target: "a", run: a => command("echo", ["-n", "a"], a) }); 2 | 3 | b = new Task({ target: "b", run: b => command("echo", ["-n", "b"], b) }); 4 | 5 | c = new Task({ 6 | target: "c", 7 | run: c => command("node", ["build_c.js", a, b], c), 8 | watch: ["build_c.js"] 9 | }); 10 | -------------------------------------------------------------------------------- /src/cli-utils.ts: -------------------------------------------------------------------------------- 1 | // For the --file=path/to/file.txt CLI argument. 2 | import untildify from "untildify"; 3 | import { isAbsolute, join } from "path"; 4 | 5 | export function fileArgument(inputPath: string): string { 6 | const expand = untildify(inputPath); 7 | return isAbsolute(expand) ? expand : join(process.cwd(), expand); 8 | } 9 | -------------------------------------------------------------------------------- /examples/microsoft-buildings/Oakfile: -------------------------------------------------------------------------------- 1 | raw_data = new Task({ 2 | target: "state.zip", 3 | run: raw_data => shell`wget -O ${raw_data} "https://usbuildingdata.blob.core.windows.net/usbuildings-v1-1/DistrictofColumbia.zip"` 4 | }) 5 | 6 | data = new Task({ 7 | target: "data", 8 | run: data => shell`unzip -d ${data} ${raw_data}` 9 | }) 10 | -------------------------------------------------------------------------------- /examples/youtube-colors/generator/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | youtube-dl https://www.youtube.com/watch?v=Rb4lgOiHBZo 4 | 5 | docker run -it --rm -v $PWD:/tmp jrottenberg/ffmpeg:4.1-alpine -i "/tmp/OK Go - Skyscrapers - Official Video-Rb4lgOiHBZo.mp4" -r 30 /tmp/frames/$filename%09d.jpg 6 | 7 | docker run -it --rm -v $PWD:/tmp pillow-test python3 /tmp/gen_bars.py 8 | -------------------------------------------------------------------------------- /examples/ucsd-parking/utils.py: -------------------------------------------------------------------------------- 1 | def sort_qtr(qtr): 2 | qtr = qtr['quarter'] 3 | year_val = int(qtr['year'][:2]) * 10 4 | qtr_val = None 5 | term = qtr['term'] 6 | if term == 'WI': 7 | qtr_val = 3 8 | elif term == 'SP': 9 | qtr_val = 4 10 | elif term == 'SU': 11 | qtr_val = 1 12 | elif term == 'FA': 13 | qtr_val = 2 14 | return year_val + qtr_val -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | .env* 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | -------------------------------------------------------------------------------- /site/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | .env* 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | -------------------------------------------------------------------------------- /tests/lib-command/env/Oakfile: -------------------------------------------------------------------------------- 1 | echo = (path, content) => command("echo", ["-n", content], path); 2 | 3 | cat = (path, files) => command("cat", files, path); 4 | 5 | a = new Task({ 6 | target: "a", 7 | run: a => echo(a, "a") 8 | }); 9 | 10 | b = new Task({ 11 | target: "b", 12 | run: a => echo(a, "b") 13 | }); 14 | c = new Task({ 15 | target: "c", 16 | run: c => cat(c, [a, b]) 17 | }); 18 | -------------------------------------------------------------------------------- /tests/run-inject/env/sub/Oakfile: -------------------------------------------------------------------------------- 1 | STATE = "NY"; 2 | 3 | a = new Task({ 4 | target: "a", 5 | run: a => shell`wget -O ${a} --quiet http://oak-test.surge.sh/data/${STATE}/a` 6 | }); 7 | 8 | b = new Task({ 9 | target: "b", 10 | run: b => shell`wget -O ${b} --quiet http://oak-test.surge.sh/data/${STATE}/b` 11 | }); 12 | 13 | c = new Task({ target: "c", run: c => shell`cat ${a} ${b} > ${c}` }); 14 | -------------------------------------------------------------------------------- /site/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "oak-site", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --port=3000 --hostname=0.0.0.0", 7 | "build": "next build", 8 | "start": "next start" 9 | }, 10 | "dependencies": { 11 | "highlight.js": "^9.18.1", 12 | "next": "9.2.2", 13 | "react": "16.12.0", 14 | "react-dom": "16.12.0" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/commands/version.ts: -------------------------------------------------------------------------------- 1 | import readPackageJson from "read-package-json"; 2 | import { join } from "path"; 3 | 4 | const pkgPath = join(__dirname, "..", "..", "package.json"); 5 | 6 | export function versionCommand() { 7 | readPackageJson(pkgPath, (err: any, data: any) => { 8 | if (err) throw Error(`Could not read package.json at ${pkgPath}`); 9 | console.log(data.version); 10 | }); 11 | } 12 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/2_measure.sh: -------------------------------------------------------------------------------- 1 | #!/usr/env shell 2 | mkdir -p histograms 3 | rm histograms/* 4 | 5 | red="rgb(236,64,37)" 6 | blue="rgb(5,51,255)" 7 | 8 | for file in pages/*.png; do 9 | convert $file -fuzz 20% \ 10 | -channel A \ 11 | -transparent $red \ 12 | -transparent $blue \ 13 | -negate +channel -posterize 2 \ 14 | -format %c histogram:info:histograms/${file#pages/}.txt 15 | done -------------------------------------------------------------------------------- /tests/run-inject-multiple/env/Oakfile: -------------------------------------------------------------------------------- 1 | wrap1 = s => `B1 ${s}` 2 | 3 | wrap2 = s => `B2 ${s}` 4 | 5 | import {x as x1} with {wrap1 as wrap} from './sub/Oakfile' 6 | import {x as x2} with {wrap2 as wrap} from './sub/Oakfile' 7 | 8 | y1 = new Task({ 9 | target: 'y1', 10 | run: y1 => command('cat', [x1], y1) 11 | }) 12 | 13 | y2 = new Task({ 14 | target: 'y2', 15 | run: y2 => command('cat', [x2], y2) 16 | }) -------------------------------------------------------------------------------- /examples/docker-test/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "a.txt", 3 | run: a => shell`docker run --rm alpine echo "supA" > ${a}` 4 | }); 5 | 6 | b = new Task({ 7 | target: "b.txt", 8 | run: b => shell`docker run --rm alpine echo "supB" > ${b}` 9 | }); 10 | 11 | c = new Task({ 12 | target: "c.txt", 13 | run: c => 14 | shell`docker run --rm -v ${a}:/oak/a -v ${b}:/oak/b alpine cat /oak/a /oak/b > ${c}` 15 | }); 16 | -------------------------------------------------------------------------------- /tests/run-fail/env/Oakfile: -------------------------------------------------------------------------------- 1 | // pases alright 2 | a = new Task({ 3 | target: "a", 4 | run: target => shell`echo -n "a" > ${target}` 5 | }); 6 | 7 | // creates file, but exits non-zero 8 | b = new Task({ 9 | target: "b", 10 | run: target => shell`echo -n "b" > ${target}; exit 1;` 11 | }); 12 | 13 | // doesnt create file, exits with 0 14 | c = new Task({ 15 | target: "c", 16 | run: target => shell`exit 0` 17 | }); 18 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "oak-docs", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start" 9 | }, 10 | "dependencies": { 11 | "@mdx-js/loader": "^1.3.0", 12 | "@zeit/next-less": "^1.0.1", 13 | "@zeit/next-mdx": "^1.2.0", 14 | "less": "^3.10.3", 15 | "next": "9.0.3", 16 | "react": "16.9.0", 17 | "react-dom": "16.9.0" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/youtube-colors/generator/pillow/Dockerfile.pillow: -------------------------------------------------------------------------------- 1 | FROM python:3.8.1-alpine3.11 2 | 3 | RUN apk --no-cache add \ 4 | build-base \ 5 | python3-dev \ 6 | zlib-dev \ 7 | jpeg-dev \ 8 | freetype-dev \ 9 | lcms2-dev \ 10 | openjpeg-dev \ 11 | tiff-dev \ 12 | tk-dev \ 13 | tcl-dev \ 14 | harfbuzz-dev \ 15 | fribidi-dev 16 | 17 | RUN python -m pip install Pillow 18 | 19 | COPY gen_bars.py /scripts/gen_bars.py -------------------------------------------------------------------------------- /tests/library/test.ts: -------------------------------------------------------------------------------- 1 | import { Library } from "../../src/Library"; 2 | import test from "tape"; 3 | import { envFile, open } from "../utils"; 4 | import shell from "../../src/Library/shell"; 5 | import { Execution } from "../../src/Execution"; 6 | 7 | const env = envFile(__dirname); 8 | 9 | test("Library.ts", async t => { 10 | t.test("shell", async st => { 11 | const b1 = await shell`echo "hello dog"`; 12 | st.skip(); //typeof b1, "excution"); 13 | st.end(); 14 | }); 15 | t.end(); 16 | }); 17 | -------------------------------------------------------------------------------- /tests/task-target-dir/env/generate_files.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs"); 2 | const path = require("path"); 3 | 4 | const outputDir = process.argv[2]; 5 | const numFiles = process.argv[3]; 6 | const contentPrefix = process.argv[4]; 7 | 8 | if(!fs.existsSync(outputDir)) 9 | fs.mkdirSync(outputDir); 10 | 11 | console.log(`generating ${numFiles} files inside ${outputDir} ...`); 12 | for (let i = 0; i < numFiles; i++) { 13 | fs.writeFileSync(path.join(outputDir, `file${i}`), `${contentPrefix}${i}`); 14 | } 15 | -------------------------------------------------------------------------------- /docs/pages/reference/index.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # API Reference 5 | 6 | ## [Command Line Reference](/reference/cli) 7 | 8 | Commands, arguments, flags, and options for the `oak` command line interface. 9 | 10 | 11 | ## [Standard Library Reference](/reference/stdlib) 12 | 13 | The builtin cells that are used in Oakfiles. 14 | 15 | 16 | ## [Oakfile Syntax Reference](/reference/oakfile) 17 | 18 | The "almost javascript" syntax used by Oakfiles. -------------------------------------------------------------------------------- /examples/school-accidents/Oakfile: -------------------------------------------------------------------------------- 1 | raw_accidents = new Task({ 2 | target: "raw_accidents.csv", 3 | run: raw_accidents => 4 | shell`wget -O ${raw_accidents} --no-verbose "https://data.lacity.org/api/views/d5tf-ez2w/rows.csv?accessType=DOWNLOAD"` 5 | }); 6 | 7 | accidents = new Task({ 8 | target: "accidents.csv", 9 | run: accidents => 10 | shell`pipenv run papermill clean_accidents.ipynb /dev/null -p raw_accidents_path ${raw_accidents} -p accidents_path ${accidents}`, 11 | watch: ["clean_accidents.ipynb"] 12 | }); 13 | -------------------------------------------------------------------------------- /docs/pages/samples.mdx: -------------------------------------------------------------------------------- 1 | import Layout from '../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Samples 5 | 6 | 7 | ## Python 8 | 9 | ``` 10 | import argparse 11 | from os import path 12 | 13 | parser = argparse.ArgumentParser(description='Process CLI arguments...') 14 | parser.add_argument('-i', '--input') 15 | parser.add_argument('-o', '--output') 16 | args = parser.parse_args() 17 | 18 | dir_path = path.dirname(path.realpath(__file__)) 19 | 20 | INPUT_FILE = path.join(dir_path, args.input) 21 | OUTPUT_FILE = path.join(dir_path, args.output) 22 | 23 | 24 | ``` -------------------------------------------------------------------------------- /Oakfile.ui: -------------------------------------------------------------------------------- 1 | txt = "yfer" 2 | 3 | a = new Task({ 4 | target: "a", 5 | run: target => shell`sleep 5; echo -n ${txt} > ${target}` 6 | }) 7 | 8 | b = new Task({ 9 | target: "b", 10 | run: target => shell`sleep 1.5; echo -n ${txt} > ${target}` 11 | }) 12 | 13 | c = new Task({ 14 | target: "c", 15 | run: target => shell`sleep 1; cat ${a} ${b} > ${target}` 16 | }) 17 | 18 | err = new Task({ 19 | target: "err", 20 | run: target => shell`ntfound` 21 | }) 22 | 23 | long = new Task({ 24 | target: "long", 25 | run: target => shell`sleep 3; echo -n ${txt} > ${target}` 26 | }) -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nwsl/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/nwsl' 6 | const years = d3.range(2016, 2020) 7 | 8 | async function getNamesHTML(year) { 9 | const url = `http://www.nwslsoccer.com/stats?season=${year}#players` 10 | 11 | return new Promise((resolve, reject) => { 12 | request(url, (err, response, body) => { 13 | fs.writeFileSync(`${OUT_PATH}/season-${year}.html`, body); 14 | }) 15 | }) 16 | } 17 | 18 | function init() { 19 | years.map(getNamesHTML) 20 | } 21 | 22 | init(); 23 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/congress/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/congress' 6 | const pages = d3.range(1, 11) 7 | 8 | async function getNamesHTML(page) { 9 | const url = `https://www.congress.gov/members?pageSize=250&page=${page}` 10 | 11 | return new Promise((resolve, reject) => { 12 | request(url, (err, response, body) => { 13 | fs.writeFileSync(`${OUT_PATH}/names-${page}.html`, body); 14 | }) 15 | }) 16 | } 17 | 18 | function init() { 19 | pages.map(getNamesHTML) 20 | } 21 | 22 | init(); 23 | -------------------------------------------------------------------------------- /tests/run-mixed-cells/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { open, envFile } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env("oak_data")); 10 | removeSync(env(".oak")); 11 | } 12 | 13 | test.onFinish(() => { 14 | cleanUp(); 15 | }); 16 | 17 | cleanUp(); 18 | 19 | test("run-mixed-cell", async t => { 20 | await oak_run({ filename: env("Oakfile"), targets: [] }); 21 | t.equals((await open(env("oak_data/a.txt"))).content, "a"); 22 | t.equals((await open(env("oak_data/b.txt"))).content, "a"); 23 | t.end(); 24 | }); 25 | -------------------------------------------------------------------------------- /examples/mbostock_planets/Makefile: -------------------------------------------------------------------------------- 1 | GENERATED_FILES = \ 2 | exoplanets.csv 3 | 4 | all: $(GENERATED_FILES) 5 | 6 | clean: 7 | rm -rf -- $(GENERATED_FILES) 8 | 9 | build/exoplanets.zip: 10 | mkdir -p build 11 | curl -o $@ 'http://www.hpcf.upr.edu/~abel/phl/phl_hec_all_confirmed.csv.zip' 12 | 13 | planets.csv: 14 | mkdir -p build 15 | wget https://gist.github.com/mbostock/3007180/raw/79339d19b6c9fea256ab9e99f7f0be18372904bf/planets.csv 16 | mv planets.csv build/ 17 | ls 18 | 19 | 20 | exoplanets.csv: build/exoplanets.zip planets.csv 21 | unzip -u -d build $< 22 | cp -f build/planets.csv $@ 23 | cut -d, -f1,12,46 build/phl_hec_all_confirmed.csv | tail -n+2 >> $@ 24 | 25 | 26 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: binaries docker alpine alpine-binaries 2 | 3 | docker: binaries 4 | docker build -t oak:latest -f Dockerfile . 5 | 6 | # NOTE: I dont think alpine works 7 | # docker is hard and i lave limits 8 | alpine: alpine-binaries 9 | docker build -t oak-alpine:latest -f Dockerfile.alpine . 10 | 11 | alpine-binaries: 12 | rm -rf binaries-alpine 13 | mkdir -p binaries-alpine/node_modules 14 | cp ../pkg/oak-alpine binaries-alpine/oak 15 | cp -r ../node_modules/better-sqlite3 binaries-alpine/node_modules 16 | 17 | binaries: 18 | rm -rf binaries 19 | mkdir -p binaries 20 | cp ../pkg/oak-linux binaries/oak 21 | cp ../node_modules/better-sqlite3/build/Release/better_sqlite3.node binaries -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nhl/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/nhl' 6 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 7 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 8 | 9 | async function getNamesHTML(letter) { 10 | const url = `https://www.hockey-reference.com/players/${letter}/` 11 | console.log(url) 12 | 13 | return new Promise((resolve, reject) => { 14 | request(url, (err, response, body) => { 15 | fs.writeFileSync(`${OUT_PATH}/names-${letter}.html`, body); 16 | }) 17 | }) 18 | } 19 | 20 | function init() { 21 | abcs.map(getNamesHTML) 22 | } 23 | 24 | init(); 25 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/mlb/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/mlb' 6 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 7 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 8 | 9 | async function getNamesHTML(letter) { 10 | const url = `https://www.baseball-reference.com/players/${letter}/` 11 | console.log(url) 12 | 13 | return new Promise((resolve, reject) => { 14 | request(url, (err, response, body) => { 15 | fs.writeFileSync(`${OUT_PATH}/names-${letter}.html`, body); 16 | }) 17 | }) 18 | } 19 | 20 | function init() { 21 | abcs.map(getNamesHTML) 22 | } 23 | 24 | init(); 25 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nba/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/nba' 6 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 7 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 8 | 9 | async function getNamesHTML(letter) { 10 | const url = `https://www.basketball-reference.com/players/${letter}/` 11 | console.log(url) 12 | 13 | return new Promise((resolve, reject) => { 14 | request(url, (err, response, body) => { 15 | fs.writeFileSync(`${OUT_PATH}/names-${letter}.html`, body); 16 | }) 17 | }) 18 | } 19 | 20 | function init() { 21 | abcs.map(getNamesHTML) 22 | } 23 | 24 | init(); 25 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/wnba/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/wnba' 6 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 7 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 8 | 9 | async function getNamesHTML(letter) { 10 | const url = `https://www.basketball-reference.com/wnba/players/${letter}/` 11 | console.log(url) 12 | 13 | return new Promise((resolve, reject) => { 14 | request(url, (err, response, body) => { 15 | fs.writeFileSync(`${OUT_PATH}/names-${letter}.html`, body); 16 | }) 17 | }) 18 | } 19 | 20 | function init() { 21 | abcs.map(getNamesHTML) 22 | } 23 | 24 | init(); 25 | -------------------------------------------------------------------------------- /examples/ucsd-parking/Oakfile: -------------------------------------------------------------------------------- 1 | parking_html = recipe({ 2 | target: 'quarterly_tables.html', 3 | run: parking_html => shell`wget -O ${parking_html} http://rmp-wapps.ucsd.edu/TS/Survey/Parking%20Space%20Inventory/Quarterly%20Tables/Contents.html` 4 | }) 5 | 6 | wb_dir = recipe({ 7 | target: 'quarterly_workbooks', 8 | run: wb_dir => shell`mkdir -p ${wb_dir}` 9 | }) 10 | 11 | wbs_list = recipe({ 12 | target: 'workbooks_list.txt', 13 | run: wbs_list => shell`pipenv run python extract_list.py --input-html=${parking_html} --output-dir=${wb_dir} --output-list=${wbs_list}` 14 | }) 15 | 16 | raw_lots_csv = recipe({ 17 | target: 'raw_lots.csv', 18 | run: raw_lots_csv => shell`pipenv run python aggregate_lots.py --workbooks=${wbs_list} --output-csv=${raw_lots_csv}` 19 | }) 20 | 21 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nfl/s1_get-names-html.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | 5 | const OUT_PATH = './output/nfl' 6 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 7 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 8 | 9 | async function getNamesHTML(letter) { 10 | const uppercase = letter.toUpperCase() 11 | const url = `https://www.pro-football-reference.com/players/${uppercase}/` 12 | console.log(url) 13 | 14 | return new Promise((resolve, reject) => { 15 | request(url, (err, response, body) => { 16 | fs.writeFileSync(`${OUT_PATH}/names-${letter}.html`, body); 17 | }) 18 | }) 19 | } 20 | 21 | function init() { 22 | abcs.map(getNamesHTML) 23 | } 24 | 25 | init(); 26 | -------------------------------------------------------------------------------- /docs/styles/main.less: -------------------------------------------------------------------------------- 1 | @min-width: 500px; 2 | 3 | .global { 4 | border: 1px solid black; 5 | margin: 20px; 6 | padding: 20px; 7 | min-width: @min-width; 8 | img { 9 | max-width: 100%; 10 | min-width: @min-width; 11 | height: auto; 12 | margin: 0 auto; 13 | display: block; 14 | } 15 | } 16 | 17 | .main { 18 | min-height: 100vh; 19 | display: grid; 20 | grid-template-columns: 16rem auto; 21 | max-width: 1024px; 22 | margin: 0 auto; 23 | border: 1px solid green; 24 | } 25 | 26 | .container { 27 | border: 1px solid red; 28 | } 29 | .sidebar { 30 | font-size: 20px; 31 | } 32 | .sidebaritem { 33 | font-size: 200px; 34 | } 35 | @media only screen and (max-width: 650px) { 36 | .main { 37 | //display: block; 38 | grid-template-columns: auto; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /examples/mbostock_planets/Oakfile.js: -------------------------------------------------------------------------------- 1 | upr_all = new Task({ 2 | target: "phl_hec_all_confirmed.csv", 3 | run: upr_all => shell`echo "doing upr_all" 4 | curl 'http://www.hpcf.upr.edu/~abel/phl/phl_hec_all_confirmed.csv.zip' -O phl_hec_all_confirmed.csv.zip 5 | unzip -u phl_hec_all_confirmed.csv.zip 6 | mv phl_hec_all_confirmed.csv ${upr_all}` 7 | }); 8 | 9 | planets = new Task({ 10 | target: "planets.csv", 11 | run: planets => 12 | shell`echo "doing planets" 13 | wget -O ${planets} https://gist.github.com/mbostock/3007180/raw/79339d19b6c9fea256ab9e99f7f0be18372904bf/planets.csv` 14 | }); 15 | 16 | exoplanets = new Task({ 17 | target: "exoplanets.csv", 18 | run: exoplanets => shell`# echo "doing exoplanets" 19 | # cp -f ${planets} ${exoplanets} 20 | cut -d, -f1,12,46 ${upr_all} | tail -n+2 >> ${exoplanets}` 21 | }); 22 | -------------------------------------------------------------------------------- /src/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "temp/a.txt", 3 | run: a => shell`sleep 1 && echo "hello its a! " > ${a}`, 4 | }); 5 | 6 | b = new Task({ 7 | target: "temp/b.txt", 8 | run: b => shell`sleep 1.5 && echo "hello now its b! " > ${b}`, 9 | }); 10 | 11 | c = new Task({ 12 | target: "temp/c.txt", 13 | run: c => shell`sleep 1 && cat ${a} ${b} > ${c}`, 14 | }); 15 | 16 | d = new Task({ 17 | target: "temp/d.txt", 18 | run: d => shell`sleep 1 && cat ${c} > ${d}`, 19 | }); 20 | 21 | e = new Task({ 22 | target: "temp/e.txt", 23 | run: e => shell`sleep 1 && cat ${d} > ${e}`, 24 | }); 25 | 26 | f = new Task({ 27 | target: "temp/f.txt", 28 | run: f => shell`sleep 1 && cat ${e} > ${f}`, 29 | }); 30 | 31 | g = new Task({ 32 | target: "temp/g.txt", 33 | run: g => shell`sleep 1 && cat ${e} ${f} > ${g}`, 34 | }); 35 | -------------------------------------------------------------------------------- /site/public/zeit.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /examples/much-logs/Oakfile: -------------------------------------------------------------------------------- 1 | a = new Task({ 2 | target: "a", 3 | run: a => shell`for i in {1..10}; do echo "a hey $i"; done; \ 4 | echo "a" > ${a}` 5 | }); 6 | 7 | b = new Task({ 8 | target: "b", 9 | run: b => shell`for i in {1..10}; do sleep .02; echo "b hey $i"; done; \ 10 | echo "b" > ${b}` 11 | }); 12 | 13 | c = new Task({ 14 | target: "c", 15 | run: c => shell`for i in {1..10}; do sleep .00; echo "c hey $i"; done; \ 16 | exit 1; \ 17 | cat ${a} ${b} > ${c}` 18 | }); 19 | 20 | d = new Task({ 21 | target: "d", 22 | run: d => shell`for i in {1..10}; do sleep .02; echo "d hey $i"; done; \ 23 | exit 1; \ 24 | cat ${a} ${b} > ${d}` 25 | }); 26 | 27 | e = new Task({ 28 | target: "e", 29 | run: e => shell`for i in {1..10}; do sleep .01; echo "e hey $i"; done; \ 30 | exit 1; \ 31 | cat ${a} ${b} > ${e}` 32 | }); 33 | -------------------------------------------------------------------------------- /tests/run-import-watch/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open, touch } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env("oak_data")); 10 | removeSync(env(".oak")); 11 | removeSync(env("sub/oak_data")); 12 | removeSync(env("sub/.oak")); 13 | } 14 | 15 | test.onFinish(() => { 16 | cleanUp(); 17 | }); 18 | 19 | cleanUp(); 20 | 21 | test.skip("run-import-watch", async t => { 22 | await oak_run({ filename: env("sub/Oakfile"), targets: [] }); 23 | t.equal((await open(env("sub/oak_data/file.txt"))).content, "Alex"); 24 | 25 | await oak_run({ filename: env("Oakfile"), targets: [] }); 26 | t.equal((await open(env("hoak_data/file.txt"))).content, "Penguin"); 27 | 28 | t.end(); 29 | }); 30 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/Oakfile: -------------------------------------------------------------------------------- 1 | dockerfile = new Task({ 2 | run: () => shell`docker build --tag bar-pixels-test .`, 3 | watch: ["Dockerfile"] 4 | }); 5 | 6 | report_pdf = new Task({ 7 | target: "report.pdf", 8 | run: report_pdf => 9 | command("curl", [ 10 | "-o", 11 | report_pdf, 12 | "https://assets.documentcloud.org/documents/5955118/The-Mueller-Report.pdf" 13 | ]) 14 | }); 15 | 16 | pages_dir = new Task({ 17 | target: "pages", 18 | run: pages_dir => shell`docker run --rm \ 19 | -v ${report_pdf}:/oak_data/report.pdf \ 20 | -v ${pages_dir}:/oak_data/pages \ 21 | bar-pixels-test \ 22 | pdftoppm -png -f 1 /oak_data/report.pdf /oak_data/pages`, 23 | ensureEmptyDir: true, 24 | deps: dockerfile 25 | }); 26 | 27 | histogram_dir = new Task({ 28 | target: "histograms", 29 | run: histogram_dir => shell`` 30 | }); 31 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | ## Skeleton for most integration tests 2 | 3 | ```typescript 4 | import test from "tape"; 5 | import { oak_run } from "../../src/core/run"; 6 | import { cleanUp, envFile, open } from "../utils"; 7 | 8 | const env = envFile(__dirname); 9 | 10 | const outs = ["a", "b", "c"]; 11 | 12 | test.onFinish(() => { 13 | cleanUp(env, outs); 14 | }); 15 | 16 | cleanUp(env, outs); 17 | 18 | test("TEST_NAME", async t => { 19 | await oak_run({ filename: env("Oakfile"), targets: [] }); 20 | const a_file = await open(env("a")); 21 | const b_file = await open(env("b")); 22 | const c_file = await open(env("c")); 23 | t.equal(a_file.content, "a"); 24 | t.equal(b_file.content, "b"); 25 | t.equal(c_file.content, "ab"); 26 | t.true(a_file.stat.mtime < c_file.stat.mtime); 27 | t.true(b_file.stat.mtime < c_file.stat.mtime); 28 | t.end(); 29 | }); 30 | ``` 31 | -------------------------------------------------------------------------------- /examples/youtube-colors/generator/pillow/gen_bars.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | from os import listdir, path 3 | from PIL import Image, ImageStat, ImageDraw 4 | 5 | INPUT_DIR = argv[1] 6 | OUTPUT_PNG = argv[2] 7 | 8 | frames = sorted(listdir(INPUT_DIR)) 9 | 10 | print("Input directory: ", INPUT_DIR) 11 | print("Output PNG: ", OUTPUT_PNG) 12 | print("Number of frames: %d" % len(frames)) 13 | 14 | height = int(len(frames) * 9 / 16) 15 | out_img = Image.new('RGB', (len(frames), height)) 16 | draw = ImageDraw.Draw(out_img) 17 | 18 | for i, frame_relpath in enumerate(frames): 19 | 20 | frame_abspath = path.join(INPUT_DIR, frame_relpath) 21 | 22 | frame = Image.open(frame_abspath) 23 | 24 | stat = ImageStat.Stat(frame) 25 | draw.line((i, 0, i, height), fill=tuple(map(lambda x: int(x), stat.mean))) 26 | 27 | print("saving...") 28 | out_img.save(OUTPUT_PNG, 'PNG') 29 | -------------------------------------------------------------------------------- /src/Library/command.ts: -------------------------------------------------------------------------------- 1 | import { execFile } from "child_process"; 2 | import Task from "../Task"; 3 | import { Execution } from "../Execution"; 4 | import { createWriteStream } from "fs"; 5 | import pino from "pino"; 6 | 7 | type CommandConfig = { 8 | stdout: boolean; 9 | stderr: boolean; 10 | }; 11 | export default function( 12 | file: string = "", 13 | args: any[] = [], 14 | outPath?: string | Task, 15 | config: CommandConfig = { stdout: true, stderr: true } 16 | ): Execution { 17 | const cleanedArgs = args.map(arg => { 18 | if (arg instanceof Task) { 19 | return arg.target; 20 | } 21 | return arg; 22 | }); 23 | outPath = outPath && (outPath instanceof Task ? outPath.target : outPath); 24 | const outStream = outPath && createWriteStream(outPath); 25 | 26 | const process = execFile(file, cleanedArgs); 27 | return { process, outStream, config }; 28 | } 29 | -------------------------------------------------------------------------------- /tests/lib-command/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env("oak_data")); 10 | removeSync(env(".oak")); 11 | } 12 | 13 | test.onFinish(() => { 14 | cleanUp(); 15 | }); 16 | 17 | cleanUp(); 18 | 19 | test("lib-command", async t => { 20 | await oak_run({ filename: env("Oakfile"), targets: [] }); 21 | const a_file = await open(env("oak_data/a")); 22 | const b_file = await open(env("oak_data/b")); 23 | const c_file = await open(env("oak_data/c")); 24 | t.equal(a_file.content, "a"); 25 | t.equal(b_file.content, "b"); 26 | t.equal(c_file.content, "ab"); 27 | t.true(a_file.stat.mtime < c_file.stat.mtime); 28 | t.true(b_file.stat.mtime < c_file.stat.mtime); 29 | t.end(); 30 | }); 31 | -------------------------------------------------------------------------------- /examples/youtube-colors/generator/Oakfile: -------------------------------------------------------------------------------- 1 | ytUrl = "https://www.youtube.com/watch?v=Rb4lgOiHBZo"; 2 | 3 | video_mp4 = new Task({ 4 | target: "video.mp4", 5 | run: video_mp4 => command("youtube-dl", [ytUrl, "--output", video_mp4]) 6 | }); 7 | 8 | frames_dir = new Task({ 9 | target: "frames", 10 | run: frames_dir => shell`docker run --rm \ 11 | -v ${frames_dir}:/tmp/frames \ 12 | -v ${video_mp4}:/tmp/in.mp4 \ 13 | jrottenberg/ffmpeg:4.1-alpine \ 14 | -i /tmp/in.mp4 -r 30 /tmp/frames/$filename%09d.jpg` 15 | }); 16 | 17 | barcode_png = new Task({ 18 | target: "barcode.png", 19 | run: barcode_png => 20 | shell`docker run --rm \ 21 | -v ${frames_dir}:/oak/frames \ 22 | -v ${barcode_png}:/oak/out.png \ 23 | pillow-test \ 24 | python3 /scripts/gen_bars.py /oak/frames /oak/out.png`, 25 | createFileBeforeRun: true, 26 | watch: ["pillow/Dockerfile.pillow", "pillow/gen_bars.py"] 27 | }); 28 | -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/3_parse.js: -------------------------------------------------------------------------------- 1 | var fs = require("fs"); 2 | 3 | var reports = fs.readdirSync("histograms"); 4 | 5 | var headers = new Set(); 6 | var rows = []; 7 | 8 | reports.forEach(function(filename) { 9 | var report = fs.readFileSync("histograms/" + filename, "utf-8"); 10 | var lines = report.split("\n").filter(l => l); 11 | var page = filename.replace(/[a-z\-._]/gi, ""); 12 | var data = { filename, page }; 13 | lines.forEach(function(line) { 14 | var [ all, pixels, hex, color ] = line.match(/^\s+(\d+): \([^)]+\) (#[0-9a-f]{6,8})/i); 15 | data[color || hex] = pixels * 1; 16 | }); 17 | rows.push(data); 18 | for (var k in data) headers.add(k); 19 | }); 20 | 21 | headers = Array.from(headers); 22 | 23 | var out = fs.createWriteStream("values.csv"); 24 | out.write(headers.join(",") + "\n"); 25 | rows.forEach(function(r) { 26 | out.write(headers.map(h => r[h] || 0).join(",") + "\n"); 27 | }); 28 | out.end(); -------------------------------------------------------------------------------- /examples/ca-schools/to_geo.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import argparse 3 | from os import path 4 | import geopandas as gpd 5 | from shapely.geometry import Point 6 | 7 | parser = argparse.ArgumentParser(description='Process CLI arguments..') 8 | parser.add_argument('-i', '--input') 9 | parser.add_argument('-o', '--output') 10 | args = parser.parse_args() 11 | 12 | # https://github.com/datadesk/california-k12-notebooks/blob/master/02_transform.ipynb 13 | 14 | print(args.input, args.output) 15 | 16 | def df_to_gdf(input_df, crs={'init': u'epsg:4326'}): 17 | """ 18 | Accepts a DataFrame with longitude and latitude columns. Returns a GeoDataFrame. 19 | """ 20 | df = input_df.copy() 21 | geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)] 22 | return gpd.GeoDataFrame(df, crs=crs, geometry=geometry) 23 | 24 | 25 | input_df = gpd.pd.read_csv(args.input) 26 | gdf = df_to_gdf(input_df) 27 | gdf.to_file(args.output, driver='GeoJSON') 28 | -------------------------------------------------------------------------------- /examples/youtube-colors/Oakfile: -------------------------------------------------------------------------------- 1 | fopUrl = "https://www.youtube.com/watch?v=QE_vO6Ti9l4" 2 | 3 | import {barcode_png as fop} with {fopUrl as ytUrl} from "./generator/Oakfile" 4 | 5 | sbspUrl = "https://www.youtube.com/watch?v=-FRXiHx-IDA" 6 | 7 | import {barcode_png as sbsp} with {sbspUrl as ytUrl} from "./generator/Oakfile" 8 | 9 | 10 | pf_mp4 = new Task({ 11 | target: "pf.mp4", 12 | run: pf_mp4 => command("youtube-dl", ["https://www.youtube.com/watch?v=IQL3B4o5rLk", "--output", pf_mp4]) 13 | }); 14 | 15 | import {barcode_png as pf} with {pf_mp4 as video_mp4} from "./generator/Oakfile" 16 | 17 | 18 | 19 | /* 20 | lol = new Task({ 21 | target: "a.png", 22 | run: lol => shell`cp ${barcode_png} ${lol}` 23 | }) 24 | */ 25 | 26 | /* 27 | 28 | import * {barcode_png as barcode} from "./generator/Oakfile" 29 | 30 | fop = barcode({ 31 | ytUrl: "https:/...." 32 | }) 33 | 34 | sbsp = barcode({ 35 | ytUrl: "https:/...." 36 | }) 37 | 38 | */ -------------------------------------------------------------------------------- /examples/la-metro-schedule/notify.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | import urllib3 3 | import sqlite3 4 | import json 5 | import time 6 | 7 | args = argv[1:3] 8 | db_path, slack_webhook = args 9 | 10 | http = urllib3.PoolManager() 11 | 12 | conn = sqlite3.connect(db_path) 13 | c = conn.cursor() 14 | 15 | c.execute("SELECT COUNT(*) as count FROM vehicle_readings;") 16 | readings_count = c.fetchone()[0] 17 | 18 | recent_time = round((time.time() - (15*60)) * 1000) 19 | c.execute("""SELECT 20 | COUNT(*) as count 21 | FROM vehicle_readings 22 | WHERE recordTime > ?;""", 23 | (recent_time,)) 24 | recent_readings_count = c.fetchone()[0] 25 | 26 | message = { 27 | "text": "Total: {}\nLast 15 minutes: {}".format(readings_count, recent_readings_count) 28 | } 29 | 30 | print("Posting to slack", message) 31 | http.request('POST', slack_webhook, headers={ 32 | "Content-type": "application/json", 33 | }, body=json.dumps(message).encode('utf8')) 34 | -------------------------------------------------------------------------------- /examples/ucsd-parking/analyze.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import json 3 | 4 | dtype = { 5 | 'quarter':str, 6 | 'lot': str, 7 | 'space_type': str, 8 | 9 | 'num_spots': int, 10 | 'time_counts': object, 11 | '8am_empty_count': int, 12 | '9am_empty_count': int, 13 | '11am_empty_count': int, 14 | '12pm_empty_count': int, 15 | '1pm_empty_count': int, 16 | '2pm_empty_count': int, 17 | '3pm_empty_count': int, 18 | '4pm_empty_count': int, 19 | '5pm_empty_count': int, 20 | } 21 | 22 | def analyze(): 23 | df = pandas.read_csv('test.csv', dtype=dtype) 24 | df['time_counts'] = df['time_counts'].apply(lambda x: json.loads(x)) 25 | 26 | s_df = df[ (df['space_type'] == 'S') & (df['num_spots'] > 0)] 27 | s_df['max_occupancy'] = s_df['time_counts'].apply(lambda x: min(x)) 28 | 29 | print(s_df.groupby(['quarter', 'lot']).sum().head()) 30 | return df 31 | 32 | def main(): 33 | analyze() 34 | 35 | if __name__ == '__main__': 36 | main() -------------------------------------------------------------------------------- /examples/barr-redaction-pixels/5_montage.sh: -------------------------------------------------------------------------------- 1 | # sections: 2 | # 1-II 22-43 3 | # 1-III 44-73 4 | # 1-IV 74-181 5 | # 1-V 182-207 6 | 7 | mkdir -p montages 8 | rm montages/* 9 | 10 | function makeMontage() { 11 | start=$1 12 | end=$2 13 | name=$3 14 | files=() 15 | for i in $(eval seq $start $end); do 16 | printf -v page '%03d' $i 17 | # echo $page 18 | files+=("pages/report-$page.png") 19 | done 20 | # echo "${files[@]}"; 21 | montage "${files[@]}" -geometry 100x -quality 60% -tile 10x "montages/tiled-$name.jpg" 22 | } 23 | 24 | makeMontage 9 18 section1-summary 25 | makeMontage 19 21 section1-i 26 | makeMontage 22 43 section1-ii 27 | makeMontage 44 73 section1-iii 28 | makeMontage 74 181 section1-iv 29 | makeMontage 182 207 section1-v 30 | makeMontage 213 220 section2-summary 31 | makeMontage 221 226 section2-i 32 | makeMontage 227 370 section2-ii 33 | makeMontage 371 393 section2-iii 34 | makeMontage 394 394 section2-iv 35 | makeMontage 395 448 appendices 36 | -------------------------------------------------------------------------------- /tests/run-fail/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open, touch } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env.data("")); 10 | removeSync(env(".oak")); 11 | } 12 | 13 | test.onFinish(() => { 14 | cleanUp(); 15 | console.log('finished'); 16 | }); 17 | 18 | cleanUp(); 19 | 20 | test("run fail", async t => { 21 | 22 | // a should work as expected. 23 | // b creates file, but exits nonzero. 24 | // c doesnt create a file, but exits 0. 25 | await oak_run({ filename: env("Oakfile"), targets: [] }); 26 | const a_file = await open(env.data("a")); 27 | const b_file = await open(env.data("b")); 28 | const c_file = await open(env.data("c")); 29 | 30 | t.equal(a_file.content, "a"); 31 | t.equal(b_file.content, "b"); 32 | t.equal(c_file.content, null); 33 | t.equal(c_file.stat, null); 34 | 35 | t.end(); 36 | }); 37 | -------------------------------------------------------------------------------- /examples/ghcdn/convert_curr_year.py: -------------------------------------------------------------------------------- 1 | from os import listdir, path, remove 2 | import sys 3 | import pandas as pd 4 | import sqlite3 5 | 6 | curr_year_raw = sys.argv[1] 7 | output_db = sys.argv[2] 8 | 9 | if path.exists(output_db): 10 | remove(output_db) 11 | 12 | conn = sqlite3.connect(output_db) 13 | 14 | c = conn.cursor() 15 | c.execute( 16 | 'CREATE TABLE records (source text, station text, date text, type text, value real)') 17 | conn.commit() 18 | 19 | 20 | print(f"Uploading {curr_year_raw} to database") 21 | 22 | df_chunk = pd.read_csv(curr_year_raw, compression="gzip", 23 | chunksize=2000000, header=None) 24 | for df in df_chunk: 25 | print(f"\tchunk...") 26 | df = df.drop(columns=[4, 5, 6, 7]) 27 | df = df.rename(index=str, columns={ 28 | 0: "station", 1: "date", 2: "type", 3: "value"}) 29 | df['source'] = curr_year_raw 30 | df.to_sql('records', con=conn, if_exists='append', index=False) 31 | 32 | conn.close() 33 | 34 | print("All files processed.") 35 | -------------------------------------------------------------------------------- /src/Library/shell.ts: -------------------------------------------------------------------------------- 1 | import { spawn } from "child_process"; 2 | import { Execution } from "../Execution"; 3 | import Task from "../Task"; 4 | 5 | function transform(strings: string[], ...values: any[]): Execution { 6 | let s = strings[0]; 7 | for (let i = 0, n = values.length; i < n; ++i) { 8 | if (typeof values[i] === "string") { 9 | s += `${values[i]}${strings[i + 1]}`; 10 | } else if (values[i] instanceof Task) { 11 | const cleanedTargetPath = values[i].target 12 | ? values[i].target.replace(`"`, `"`) 13 | : ""; 14 | s += `"${cleanedTargetPath}"${strings[i + 1]}`; 15 | } else { 16 | s += `${values[i]}${strings[i + 1]}`; 17 | } 18 | } 19 | const process = spawn(s, { shell: true }); 20 | return { process, outStream: null, config: { stdout: true, stderr: false } }; 21 | } 22 | 23 | export default function shell(args = {}): () => Execution { 24 | return Array.isArray(args) 25 | ? ((args = {}), transform.apply(this, arguments)) 26 | : transform; 27 | } 28 | -------------------------------------------------------------------------------- /src/Library/library.ts: -------------------------------------------------------------------------------- 1 | const constant = function(x: any) { 2 | return function() { 3 | return x; 4 | }; 5 | }; 6 | 7 | import shell from "./shell"; 8 | import command from "./command"; 9 | import Task from "../Task"; 10 | import env from "./env"; 11 | 12 | export default function Library() { 13 | Object.defineProperties(this, { 14 | shell: { value: constant(shell), writable: true, enumerable: true }, 15 | Task: { value: constant(Task), writable: true, enumerable: true }, 16 | command: { value: constant(command), writable: true, enumerable: true }, 17 | env: { value: constant(env), writable: true, enumerable: true }, 18 | }); 19 | } 20 | 21 | export function RunLibrary() { 22 | Object.defineProperties(this, { 23 | shell: { value: constant(shell), writable: true, enumerable: true }, 24 | Task: { value: constant(Task), writable: true, enumerable: true }, 25 | command: { value: constant(command), writable: true, enumerable: true }, 26 | env: { value: constant(env), writable: true, enumerable: true }, 27 | }); 28 | } 29 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/mls/s1_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | 4 | const IN_PATH = './output/mls/csvs/'; 5 | const OUT_PATH = './output/mls'; 6 | const years = d3.range(1996, 2020) 7 | let seasonsData = []; 8 | let flatData = []; 9 | const names = []; 10 | 11 | function loadData(year) { 12 | const file = d3.csvParse(fs.readFileSync(`${IN_PATH}season-${year}.csv`, 'utf-8')); 13 | 14 | const withSeasons = file.map(d => ({ 15 | ...d, 16 | season: +year 17 | })); 18 | seasonsData.push(withSeasons) 19 | 20 | } 21 | 22 | function getNames(d) { 23 | const name = (d.Player).split('\\')[0] 24 | const season = +d.season 25 | 26 | names.push({name, season}) 27 | } 28 | 29 | function init() { 30 | years.map(loadData) 31 | flatData = [].concat(...seasonsData) 32 | flatData.map(getNames) 33 | 34 | const allNames = [].concat(...names).map(d => ({ 35 | ...d 36 | })); 37 | 38 | const csv = d3.csvFormat(allNames); 39 | fs.writeFileSync(`${OUT_PATH}/names-no-years.csv`, csv) 40 | } 41 | 42 | init(); 43 | -------------------------------------------------------------------------------- /tests/utils-test/duration.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { duration } from "../../src/utils"; 3 | 4 | test("utils-duration", async t => { 5 | const d0 = new Date(2020, 0, 1, 0, 0, 0); 6 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 0, 0, 1)), "just now"); 7 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 0, 0, 999)), "just now"); 8 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 0, 1, 0)), "1 second ago"); 9 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 0, 2, 0)), "2 seconds ago"); 10 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 0, 2, 500)), "2 seconds ago"); 11 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 1, 0, 0)), "1 minute ago"); 12 | t.equal(duration(d0, new Date(2020, 0, 1, 0, 30, 0, 0)), "30 minutes ago"); 13 | t.equal(duration(d0, new Date(2020, 0, 1, 1, 0, 0, 0)), "1 hour ago"); 14 | t.equal(duration(d0, new Date(2020, 0, 1, 23, 0, 0, 0)), "23 hours ago"); 15 | t.equal(duration(d0, new Date(2020, 0, 2, 0, 0, 0, 0)), "1 day ago"); 16 | t.equal(duration(d0, new Date(2020, 1, 2, 0, 0, 0, 0)), "32 days ago"); 17 | t.end(); 18 | }); 19 | -------------------------------------------------------------------------------- /examples/la-metro-schedule/upload_db.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import sqlite3 4 | from os.path import getmtime 5 | 6 | vehicles_path, db_path = sys.argv[1:3] 7 | 8 | record_time = round(getmtime(vehicles_path) * 100) 9 | 10 | vehicles = json.load(open(vehicles_path)) 11 | conn = sqlite3.connect(db_path) 12 | 13 | 14 | def serialize_item(item): 15 | return ( 16 | record_time, 17 | item.get('id'), 18 | item.get('route_id'), 19 | item.get('predictable'), 20 | item.get('run_id'), 21 | item.get('latitude'), 22 | item.get('longitude'), 23 | item.get('heading'), 24 | item.get('seconds_since_report') 25 | ) 26 | 27 | 28 | readings = list(map(serialize_item, vehicles.get('items'))) 29 | 30 | c = conn.cursor() 31 | c.executemany('''INSERT INTO vehicle_readings ( 32 | recordTime, 33 | vehicle, 34 | route, 35 | predictable, 36 | run, 37 | latitude, 38 | longitude, 39 | heading, 40 | secs_since_report 41 | ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)''', readings) 42 | conn.commit() 43 | 44 | conn.close() 45 | -------------------------------------------------------------------------------- /tests/run-import-aliases/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env.data("")); 10 | removeSync(env(".oak")); 11 | removeSync(env("sub/oak_data")); 12 | } 13 | 14 | test.onFinish(() => { 15 | cleanUp(); 16 | }); 17 | 18 | cleanUp(); 19 | 20 | /* 21 | 22 | a 23 | | 24 | b --- myB x 25 | | / 26 | y 27 | 28 | */ 29 | test("run-import-aliases", async t => { 30 | await oak_run({ 31 | filename: env("Oakfile"), 32 | targets: ["x"] 33 | }); 34 | const x = await open(env.data("x")); 35 | let y = await open(env.data("y")); 36 | t.equal(x.content, "x"); 37 | t.equal(y.stat, null); 38 | 39 | await oak_run({ 40 | filename: env("Oakfile"), 41 | targets: ["y"] 42 | }); 43 | const b = await open(env("sub/oak_data/b")); 44 | y = await open(env.data("y")); 45 | t.equal(b.content, "a"); 46 | t.equal(y.content, "xa"); 47 | 48 | t.end(); 49 | }); 50 | -------------------------------------------------------------------------------- /examples/ghcdn/Oakfile: -------------------------------------------------------------------------------- 1 | backfill_raw = new Task({ 2 | target: "backfill_raw", 3 | // could/should be a python script too, idk 4 | run: backfill_raw => shell` mkdir -p ${backfill_raw}; 5 | for YEAR in {2015..2019} 6 | do 7 | wget -O ${backfill_raw}/$YEAR.csv.gz ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/$YEAR.csv.gz 8 | done` 9 | }); 10 | 11 | backfill = new Task({ 12 | target: "backfill.db", 13 | run: backfill => 14 | command("pipenv", [ 15 | "run", 16 | "python", 17 | "consolidate_raw_backfill.py", 18 | backfill_raw, 19 | backfill 20 | ]) 21 | }); 22 | 23 | curr_year_raw = new Task({ 24 | target: "curr_year_raw.csv.gz", 25 | run: curr_year_raw => 26 | command("wget", [ 27 | "-O", 28 | curr_year_raw, 29 | "ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/2020.csv.gz" 30 | ]) 31 | }); 32 | 33 | curr_year = new Task({ 34 | target: "curr_year.db", 35 | run: curr_year => 36 | command("pipenv", [ 37 | "run", 38 | "python", 39 | "convert_curr_year.py", 40 | curr_year_raw, 41 | curr_year 42 | ]) 43 | }); 44 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Javascript Node CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-javascript/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | build: 8 | docker: 9 | # specify the version you desire here 10 | - image: circleci/node:10.16 11 | 12 | # Specify service dependencies here if necessary 13 | # CircleCI maintains a library of pre-built images 14 | # documented at https://circleci.com/docs/2.0/circleci-images/ 15 | # - image: circleci/mongo:3.4.4 16 | 17 | working_directory: ~/repo 18 | 19 | steps: 20 | - checkout 21 | 22 | # Download and cache dependencies 23 | - restore_cache: 24 | keys: 25 | - v1-dependencies-{{ checksum "package.json" }} 26 | # fallback to using the latest cache if no exact match is found 27 | - v1-dependencies- 28 | 29 | - run: yarn install 30 | 31 | - save_cache: 32 | paths: 33 | - node_modules 34 | key: v1-dependencies-{{ checksum "package.json" }} 35 | 36 | # run tests! 37 | - run: yarn test 38 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/Oakfile: -------------------------------------------------------------------------------- 1 | cscFile = "CollegeScorecard_Raw_Data/MERGED2015_16_PP.csv"; 2 | 3 | nativelands = new Task({ 4 | target: "indigenousTerritories.geojson", 5 | run: nativelands => 6 | shell`wget -O ${nativelands} https://native-land.ca/coordinates/indigenousTerritories.json` 7 | }); 8 | 9 | zipped_csc = new Task({ 10 | target: "CollegeScorecard_Raw_Data.zip", 11 | run: zipped_csc => 12 | shell`wget -O ${zipped_csc} https://ed-public-download.app.cloud.gov/downloads/CollegeScorecard_Raw_Data.zip` 13 | }); 14 | 15 | csc15 = new Task({ 16 | target: "csc15.csv", 17 | run: csc15 => shell`unzip -j ${zipped_csc} ${cscFile} -d . 18 | mv ${cscFile} ${csc15}` 19 | }); 20 | 21 | known_colleges = new Task({ 22 | target: "colleges.csv", 23 | run: known_colleges => 24 | shell`pipenv run python filter_colleges.py --input=${csc15} --output=${known_colleges}` 25 | }); 26 | 27 | /*nativelands_colleges = cell({ 28 | target: 'nativelands_colleges.geojson', 29 | run: nativelands_colleges => shell`pipenv run python geocode_colleges.py --colleges=${known_colleges} --nativelands=${nativelands} --output=${nativelands_colleges}` 30 | })*/ 31 | -------------------------------------------------------------------------------- /site/components/Oakfile.js: -------------------------------------------------------------------------------- 1 | import { Component } from 'react'; 2 | import hljs from 'highlight.js/lib/highlight'; 3 | import javascript from 'highlight.js/lib/languages/javascript'; 4 | 5 | hljs.registerLanguage('javascript', javascript); 6 | 7 | class Highlight extends Component { 8 | constructor(props) { 9 | super(props); 10 | this.nodeRef = React.createRef(); 11 | } 12 | 13 | componentDidMount() { 14 | this.highlight(); 15 | } 16 | 17 | componentDidUpdate() { 18 | this.highlight(); 19 | } 20 | 21 | highlight = () => { 22 | if (this.nodeRef) { 23 | const nodes = this.nodeRef.current.querySelectorAll('pre'); 24 | nodes.forEach((node) => { 25 | hljs.highlightBlock(node); 26 | }); 27 | } 28 | } 29 | 30 | render() { 31 | const { content, children } = this.props; 32 | return ( 33 |
34 |
{children}
35 |
36 | ); 37 | } 38 | } 39 | 40 | export default function () { 41 | return
console.log('hi')
42 | } -------------------------------------------------------------------------------- /tests/task-target-dir/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { oak_run } from "../../src/core/run"; 3 | import { envFile, open, touch } from "../utils"; 4 | import { removeSync } from "fs-extra"; 5 | import { join } from "path"; 6 | 7 | const env = envFile(__dirname); 8 | 9 | function cleanUp() { 10 | removeSync(env.data("")); 11 | removeSync(env(".oak")); 12 | } 13 | 14 | test.onFinish(() => { 15 | cleanUp(); 16 | }); 17 | 18 | cleanUp(); 19 | 20 | test("task-target-dir", async t => { 21 | await oak_run({ filename: env("Oakfile"), targets: [] }); 22 | 23 | const a0 = await open(env.data(join("a", "file0"))); 24 | const a4 = await open(env.data(join("a", "file4"))); 25 | const origB = await open(env.data("b.txt")); 26 | 27 | t.equals(a0.content, "a0"); 28 | t.equals(a4.content, "a4"); 29 | t.equals(origB.content, "a0a1a2a3a4"); 30 | 31 | // touch a file in a_dir. Now, on next oak run, b will update 32 | await touch(env.data(join("a", "file1")), origB.stat.atime, origB.stat.mtime); 33 | await oak_run({ filename: env("Oakfile"), targets: [] }); 34 | 35 | const newB = await open(env.data("b.txt")); 36 | t.true(newB.stat.mtime > origB.stat.mtime); 37 | t.end(); 38 | }); 39 | -------------------------------------------------------------------------------- /site/README.md: -------------------------------------------------------------------------------- 1 | This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/zeit/next.js/tree/canary/packages/create-next-app). 2 | 3 | ## Getting Started 4 | 5 | First, run the development server: 6 | 7 | ```bash 8 | npm run dev 9 | # or 10 | yarn dev 11 | ``` 12 | 13 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 14 | 15 | You can start editing the page by modifying `pages/index.js`. The page auto-updates as you edit the file. 16 | 17 | ## Learn More 18 | 19 | To learn more about Next.js, take a look at the following resources: 20 | 21 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 22 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 23 | 24 | You can check out [the Next.js GitHub repository](https://github.com/zeit/next.js/) - your feedback and contributions are welcome! 25 | 26 | ## Deploy on ZEIT Now 27 | 28 | The easiest way to deploy your Next.js app is to use the [ZEIT Now Platform](https://zeit.co/) from the creators of Next.js. 29 | 30 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details. 31 | -------------------------------------------------------------------------------- /examples/ghcdn/consolidate_raw_backfill.py: -------------------------------------------------------------------------------- 1 | from os import listdir, path, remove 2 | import sys 3 | import pandas as pd 4 | import sqlite3 5 | 6 | backfill_raw_dir = sys.argv[1] 7 | output_db = sys.argv[2] 8 | 9 | if path.exists(output_db): 10 | remove(output_db) 11 | 12 | conn = sqlite3.connect(output_db) 13 | 14 | c = conn.cursor() 15 | c.execute( 16 | 'CREATE TABLE records (source text, station text, date text, type text, value real)') 17 | conn.commit() 18 | 19 | raw_files = listdir(backfill_raw_dir) 20 | 21 | print(f"There are {len(raw_files)} raw files that will be uploaded.") 22 | 23 | for f in raw_files: 24 | filepath = path.join(backfill_raw_dir, f) 25 | print(f"Uploading {filepath} to database") 26 | df_chunk = pd.read_csv(filepath, compression="gzip", 27 | chunksize=2000000, header=None) 28 | for df in df_chunk: 29 | print(f"\tchunk...") 30 | df = df.drop(columns=[4, 5, 6, 7]) 31 | df = df.rename(index=str, columns={ 32 | 0: "station", 1: "date", 2: "type", 3: "value"}) 33 | df['source'] = f 34 | df.to_sql('records', con=conn, if_exists='append', index=False) 35 | 36 | print("All files processed.") 37 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nwsl/s3_format-years.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | 4 | const IN_FILE = './output/nwsl/names-no-years.csv'; 5 | const OUT_PATH = './output/nwsl'; 6 | let nestedData = null; 7 | let seasonsData = null; 8 | 9 | function loadData() { 10 | const file = d3.csvParse(fs.readFileSync(IN_FILE, 'utf-8')); 11 | 12 | nestedData = d3.nest() 13 | .key(d => d.name) 14 | .rollup(values => { 15 | const last = (values[0].name).substr((values[0].name).indexOf(' ')+1) 16 | const hyphen = last.includes('-') ? true : false 17 | const [startDate, endDate] = d3.extent(values, v => +v.season) 18 | return {startDate, endDate, hyphen} 19 | }) 20 | .entries(file) 21 | } 22 | 23 | function minMaxSeasons(data) { 24 | seasonsData = [].concat(...data).map(d => ({ 25 | name: d.key, 26 | startDate: d.value.startDate, 27 | endDate: d.value.endDate, 28 | league: 'nwsl', 29 | hyphen: d.value.hyphen 30 | })); 31 | } 32 | 33 | function init() { 34 | loadData() 35 | minMaxSeasons(nestedData) 36 | 37 | const allNames = [].concat(...seasonsData) 38 | 39 | const csv = d3.csvFormat(allNames); 40 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 41 | } 42 | 43 | init(); 44 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Oak Examples 2 | 3 | This directory contains several examples that all use Oak. 4 | 5 | Each directory contains a project that uses Oak. They (should) each container an `Oakfile`, `README`, and other files/code that the Oak project uses. 6 | 7 | Not all of the examples found here are complete. Here is an up-to-date table with how complete these examples are. 8 | 9 | 10 | 11 | | Directory | Complete? | Short Description | 12 | | --- | --- | --- | 13 | | [`barr-redaction-pixels/`](barr-redaction-pixels) | :x: | Shell scripts for PDF art on Mueller's report. 14 | | [`c/`](c) | :x: | Compiling C code. | 15 | | [`ca-schools/`](ca-schools) | :x: | Retrieve list of California public and charter schools. | 16 | | [`mbostock_planets/`](mbostock_planets) | :x: | Generate list of familiar and unfamiliar planets. | 17 | | [`native-lands-colleges/`](native-lands-colleges) | :x: | Get list of which native lands US colleges fall into. | 18 | | [`pudding-hyphen-names/`](pudding-hyphen-names) | :x: | See how popular hyphenated names are. | 19 | | [`simple/`](simple) | :x: | Simple Oak workflow. | 20 | | [`ucsd-parking/`](ucsd-parking) | :x: | Retrieve data on parking spaces at UC San Diego. | 21 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nwsl/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/nwsl/'; 7 | const OUT_PATH = './output/nwsl'; 8 | const names = []; 9 | const years = d3.range(2016, 2020) 10 | 11 | function getNames(year) { 12 | const file = fs.readFileSync(`${IN_PATH}season-${year}.html`, 'utf-8'); 13 | const $ = cheerio.load(file) 14 | 15 | $('.fullstats tbody tr .player-name') 16 | .each((i, el) => { 17 | let name = $(el) 18 | .find('a') 19 | .text() 20 | if (name) { 21 | let firstName = name.split('\n')[1].trim() 22 | console.log(firstName) 23 | let lastName = $(el) 24 | .find('a span') 25 | .text() 26 | name = firstName.concat(' ', lastName) 27 | } 28 | let season = year 29 | if (name) names.push({name, season}) 30 | }); 31 | //console.log(names) 32 | return names; 33 | } 34 | 35 | function init() { 36 | years.map(getNames) 37 | 38 | const allNames = [].concat(...names).map(d => ({ 39 | ...d 40 | })); 41 | 42 | const csv = d3.csvFormat(allNames); 43 | fs.writeFileSync(`${OUT_PATH}/names-no-years.csv`, csv) 44 | } 45 | 46 | init(); 47 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/mls/s2_format-years.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | 4 | const IN_FILE = './output/mls/names-no-years.csv'; 5 | const OUT_PATH = './output/mls'; 6 | let nestedData = null; 7 | let seasonsData = null; 8 | 9 | function loadData() { 10 | const file = d3.csvParse(fs.readFileSync(IN_FILE, 'utf-8')); 11 | 12 | nestedData = d3.nest() 13 | .key(d => d.name) 14 | .rollup(values => { 15 | const last = (values[0].name).substr((values[0].name).indexOf(' ')+1) 16 | const hyphen = last.includes('-') ? true : false 17 | const [startDate, endDate] = d3.extent(values, v => +v.season) 18 | return {startDate, endDate, hyphen} 19 | }) 20 | .entries(file) 21 | 22 | //console.log(nestedData) 23 | } 24 | 25 | function minMaxSeasons(data) { 26 | seasonsData = [].concat(...data).map(d => ({ 27 | name: d.key, 28 | startDate: d.value.startDate, 29 | endDate: d.value.endDate, 30 | league: 'mls', 31 | hyphen: d.value.hyphen 32 | })); 33 | } 34 | 35 | function init() { 36 | loadData() 37 | minMaxSeasons(nestedData) 38 | 39 | const allNames = [].concat(...seasonsData) 40 | 41 | const csv = d3.csvFormat(allNames); 42 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 43 | } 44 | 45 | init(); 46 | -------------------------------------------------------------------------------- /tests/run-targets-only/env/Oakfile: -------------------------------------------------------------------------------- 1 | /* 2 | a b c x y m 3 | | |/ |/ 4 | d e z 5 | | | 6 | f g 7 | | / 8 | h 9 | */ 10 | 11 | a = new Task({ 12 | target: "a", 13 | run: a => shell`echo -n "a" > ${a}` 14 | }); 15 | 16 | b = new Task({ 17 | target: "b", 18 | run: b => shell`echo -n "b" > ${b}` 19 | }); 20 | 21 | c = new Task({ 22 | target: "c", 23 | run: c => shell`echo -n "c" > ${c}` 24 | }); 25 | 26 | x = new Task({ 27 | target: "x", 28 | run: x => shell`echo -n "x" > ${x}` 29 | }); 30 | 31 | y = new Task({ 32 | target: "y", 33 | run: y => shell`echo -n "y" > ${y}` 34 | }); 35 | 36 | m = new Task({ 37 | target: "m", 38 | run: m => shell`echo -n "m" > ${m}` 39 | }); 40 | 41 | d = new Task({ 42 | target: "d", 43 | run: d => shell`cat ${a} > ${d}` 44 | }); 45 | 46 | e = new Task({ 47 | target: "e", 48 | run: e => shell`cat ${b} ${c} > ${e}` 49 | }); 50 | 51 | z = new Task({ 52 | target: "z", 53 | run: z => shell`cat ${x} ${y} > ${z}` 54 | }); 55 | 56 | f = new Task({ 57 | target: "f", 58 | run: f => shell`cat ${d} > ${f}` 59 | }); 60 | 61 | g = new Task({ 62 | target: "g", 63 | run: g => shell`cat ${e} > ${g}` 64 | }); 65 | 66 | h = new Task({ 67 | target: "h", 68 | run: h => shell`cat ${f} ${g} > ${h}` 69 | }); 70 | -------------------------------------------------------------------------------- /tests/run-simple-import/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env(".oak")); 10 | removeSync(env("oak_data")); 11 | removeSync(env("sub/oak_data")); 12 | removeSync(env("sub/subsub/oak_data")); 13 | } 14 | 15 | test.onFinish(() => { 16 | cleanUp(); 17 | }); 18 | 19 | cleanUp(); 20 | 21 | test("oak-run simple-import", async t => { 22 | await oak_run({ 23 | filename: env("Oakfile"), 24 | targets: [] 25 | }); 26 | const a_file = await open(env("sub/subsub/oak_data/a")); 27 | const b_file = await open(env("sub/oak_data/b")); 28 | const c_file = await open(env("sub/oak_data/c")); 29 | const d_file = await open(env("oak_data/d")); 30 | const f_file = await open(env("oak_data/f")); 31 | t.equal(a_file.content, "a"); 32 | t.equal(b_file.content, "b"); 33 | t.equal(c_file.content, "ab"); 34 | t.equal(d_file.content, "ab"); 35 | t.equal(f_file.content, "fff"); 36 | t.true(a_file.stat.mtime < c_file.stat.mtime); 37 | t.true(b_file.stat.mtime < c_file.stat.mtime); 38 | t.true(c_file.stat.mtime < d_file.stat.mtime); 39 | t.end(); 40 | }); 41 | -------------------------------------------------------------------------------- /tests/run-targets-import/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env("oak_data")); 10 | removeSync(env(".oak")); 11 | removeSync(env("sub/oak_data")); 12 | } 13 | 14 | test.onFinish(() => { 15 | cleanUp(); 16 | }); 17 | 18 | cleanUp(); 19 | 20 | /* 21 | 22 | a 23 | | 24 | b ----- b x 25 | | / 26 | y 27 | 28 | */ 29 | test("run-targets-import", async t => { 30 | await oak_run({ filename: env("Oakfile"), targets: ["x"] }); 31 | let a = await open(env("sub/oak_data/a")); 32 | let b = await open(env("sub/oak_data/b")); 33 | let x = await open(env("oak_data/x")); 34 | let y = await open(env("oak_data/y")); 35 | 36 | t.equal(a.content, null); 37 | t.equal(b.content, null); 38 | t.equal(x.content, "x"); 39 | t.equal(y.content, null); 40 | 41 | await oak_run({ filename: env("Oakfile"), targets: [] }); 42 | a = await open(env("sub/oak_data/a")); 43 | b = await open(env("sub/oak_data/b")); 44 | x = await open(env("oak_data/x")); 45 | y = await open(env("oak_data/y")); 46 | 47 | t.equal(a.content, "a"); 48 | t.equal(b.content, "a"); 49 | t.equal(x.content, "x"); 50 | t.equal(y.content, "xa"); 51 | 52 | t.end(); 53 | }); 54 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nba/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/nba/'; 7 | const OUT_PATH = './output/nba'; 8 | const names = []; 9 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 10 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']; 11 | 12 | function getNames(letter) { 13 | const file = fs.readFileSync(`${IN_PATH}names-${letter}.html`, 'utf-8'); 14 | const $ = cheerio.load(file) 15 | 16 | $('#players tbody tr') 17 | .each((i, el) => { 18 | const name = $(el) 19 | .find('th a') 20 | .text() 21 | const startDate = $(el) 22 | .find(`[data-stat='year_min']`) 23 | .text() 24 | const endDate = $(el) 25 | .find(`[data-stat='year_max']`) 26 | .text() 27 | const league = 'nba' 28 | const last = name.substr(name.indexOf(' ')+1) 29 | const hyphen = last.includes('-') ? true : false 30 | if (name) names.push({name, startDate, endDate, league, hyphen}) 31 | }); 32 | //console.log(names) 33 | return names; 34 | } 35 | 36 | function init() { 37 | abcs.map(getNames) 38 | 39 | const allNames = [].concat(...names).map(d => ({ 40 | ...d 41 | })); 42 | 43 | const csv = d3.csvFormat(allNames); 44 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 45 | } 46 | 47 | init(); 48 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nfl/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/nfl/'; 7 | const OUT_PATH = './output/nfl'; 8 | const names = []; 9 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 10 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']; 11 | 12 | function getNames(letter) { 13 | const file = fs.readFileSync(`${IN_PATH}names-${letter}.html`, 'utf-8'); 14 | const $ = cheerio.load(file) 15 | 16 | $('#div_players p') 17 | .each((i, el) => { 18 | const name = $(el) 19 | .find('a') 20 | .text() 21 | let dates = $(el) 22 | .text() 23 | dates = dates.slice((dates.length - 9), dates.length) 24 | let startDate = dates.split('-')[0]; 25 | let endDate = dates.split('-')[1]; 26 | const league = 'nfl' 27 | const last = name.substr(name.indexOf(' ')+1) 28 | const hyphen = last.includes('-') ? true : false 29 | if (name) names.push({name, startDate, endDate, league, hyphen}) 30 | }); 31 | //console.log(names) 32 | return names; 33 | } 34 | 35 | function init() { 36 | abcs.map(getNames) 37 | 38 | const allNames = [].concat(...names).map(d => ({ 39 | ...d 40 | })); 41 | 42 | const csv = d3.csvFormat(allNames); 43 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 44 | } 45 | 46 | init(); 47 | -------------------------------------------------------------------------------- /examples/la-metro-schedule/Oakfile: -------------------------------------------------------------------------------- 1 | schedule = yield * (await new Scheduler("*/30 * * * * *")); 2 | 3 | notifSchedule = yield * (await new Scheduler("*/3 * * * *")); 4 | 5 | vehicles_live = new Task({ 6 | target: "vehicles.json", 7 | run: vehicles_live => 8 | shell`wget -O ${vehicles_live} "https://api.metro.net/agencies/lametro/vehicles/"`, 9 | schedule 10 | }); 11 | 12 | db = new Task({ 13 | target: "la_metro.db", 14 | run: db => shell`sqlite3 ${db} "CREATE TABLE vehicle_readings( \ 15 | recordTime INTEGER, \ 16 | vehicle TEXT, \ 17 | route TEXT, \ 18 | predictable BOOLEAN, \ 19 | run TEXT, \ 20 | latitude REAL, \ 21 | longitude REAL, \ 22 | heading INTEGER, \ 23 | secs_since_report INTEGER \ 24 | )";`, 25 | freshIgnoreTarget: true 26 | }); 27 | 28 | test = new Task({ 29 | run: () => 30 | shell`python3 -c "import os; print(os.getcwd())"; python3 upload_db.py ${vehicles_live} ${db}`, 31 | watch: ["upload_db.py"] 32 | }); 33 | 34 | notif = new Task({ 35 | run: () => shell`python3 notify.py ${db} ${env("SLACK_PIPELINE_WEBHOOK")}`, 36 | watch: ["notify.py"], 37 | schedule: notifSchedule 38 | }); 39 | 40 | /* 41 | new Task({ 42 | run: () => shell`cp ${vehicles_live} ${vehicles_backup_dir}` 43 | }); 44 | 45 | vehicles_backup_dir = new Task({ 46 | target: "vehicles_live_backup", 47 | run: vehicles_backup_dir => shell`mkdir -p ${vehicles_backup_dir}` 48 | }); 49 | */ 50 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/mlb/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/mlb/'; 7 | const OUT_PATH = './output/mlb'; 8 | const names = []; 9 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 10 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']; 11 | 12 | function getNames(letter) { 13 | const file = fs.readFileSync(`${IN_PATH}names-${letter}.html`, 'utf-8'); 14 | const $ = cheerio.load(file) 15 | 16 | $('#div_players_ p') 17 | .each((i, el) => { 18 | const name = $(el) 19 | .find('a') 20 | .text() 21 | let dates = $(el) 22 | .text() 23 | dates = dates.slice((dates.length - 10), (dates.length - 1)) 24 | let startDate = dates.split('-')[0]; 25 | let endDate = dates.split('-')[1]; 26 | const league = 'mlb' 27 | const last = name.substr(name.indexOf(' ')+1) 28 | const hyphen = last.includes('-') ? true : false 29 | if (name) names.push({name, startDate, endDate, league, hyphen}) 30 | }); 31 | //console.log(names) 32 | return names; 33 | } 34 | 35 | function init() { 36 | abcs.map(getNames) 37 | 38 | const allNames = [].concat(...names).map(d => ({ 39 | ...d 40 | })); 41 | 42 | const csv = d3.csvFormat(allNames); 43 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 44 | } 45 | 46 | init(); 47 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/nhl/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/nhl/'; 7 | const OUT_PATH = './output/nhl'; 8 | const names = []; 9 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 10 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']; 11 | 12 | function getNames(letter) { 13 | const file = fs.readFileSync(`${IN_PATH}names-${letter}.html`, 'utf-8'); 14 | const $ = cheerio.load(file) 15 | 16 | $('#div_players .nhl') 17 | .each((i, el) => { 18 | const name = $(el) 19 | .find('a') 20 | .text() 21 | let dates = $(el) 22 | .text() 23 | dates = dates.split('(')[1] 24 | dates = dates.slice(0, 9) 25 | let startDate = dates.split('-')[0]; 26 | let endDate = dates.split('-')[1]; 27 | const league = 'nhl' 28 | const last = name.substr(name.indexOf(' ')+1) 29 | const hyphen = last.includes('-') ? true : false 30 | if (name) names.push({name, startDate, endDate, league, hyphen}) 31 | }); 32 | //console.log(names) 33 | return names; 34 | } 35 | 36 | function init() { 37 | abcs.map(getNames) 38 | 39 | const allNames = [].concat(...names).map(d => ({ 40 | ...d 41 | })); 42 | 43 | const csv = d3.csvFormat(allNames); 44 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 45 | } 46 | 47 | init(); 48 | -------------------------------------------------------------------------------- /docs/components/DocsLayout.jsx: -------------------------------------------------------------------------------- 1 | import Link from "next/link"; 2 | import css from "../styles/main.less"; 3 | 4 | function SidebarItem(props) { 5 | const { href, label, children } = props; 6 | return ( 7 |
  • 8 | 9 | {label} 10 | 11 | {children &&
      {children}
    } 12 |
  • 13 | ); 14 | } 15 | export default function DocsLayout(props) { 16 | return ( 17 |
    18 |
    19 |
    20 |

    21 | 22 | Oak 23 | 24 |

    25 |
      26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 |
    37 |
    38 |
    39 |
    {props.children}
    40 |
    41 |
    42 |
    43 | ); 44 | } 45 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/wnba/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/wnba/'; 7 | const OUT_PATH = './output/wnba'; 8 | const names = []; 9 | const abcs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 10 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']; 11 | 12 | function getNames(letter) { 13 | const file = fs.readFileSync(`${IN_PATH}names-${letter}.html`, 'utf-8'); 14 | const $ = cheerio.load(file) 15 | 16 | $('#content p') 17 | .each((i, el) => { 18 | const name = $(el) 19 | .find('a') 20 | .text() 21 | let dates = $(el) 22 | .text() 23 | dates = dates.split('\n')[2] 24 | let startDate = null; 25 | let endDate = null; 26 | const league = 'wnba' 27 | const last = name.substr(name.indexOf(' ')+1) 28 | const hyphen = last.includes('-') ? true : false 29 | if (dates != undefined){ 30 | startDate = dates.substring(0,4) 31 | if (dates.length === 4) { 32 | endDate = startDate 33 | } else { 34 | endDate = dates.substring(8,12) 35 | } 36 | } 37 | if (name && dates != undefined) names.push({name, startDate, endDate, league, hyphen}) 38 | }); 39 | //console.log(names) 40 | return names; 41 | } 42 | 43 | function init() { 44 | abcs.map(getNames) 45 | 46 | const allNames = [].concat(...names).map(d => ({ 47 | ...d 48 | })); 49 | 50 | const csv = d3.csvFormat(allNames); 51 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 52 | } 53 | 54 | init(); 55 | -------------------------------------------------------------------------------- /tests/run-hello/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open, touch } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env.data("")); 10 | removeSync(env(".oak")); 11 | } 12 | 13 | test.onFinish(() => { 14 | cleanUp(); 15 | console.log('finished'); 16 | }); 17 | 18 | cleanUp(); 19 | 20 | test("oak-run hello", async t => { 21 | await oak_run({ filename: env("Oakfile"), targets: [] }); 22 | const a_file = await open(env.data("a")); 23 | const b_file = await open(env.data("b")); 24 | const c_file = await open(env.data("c")); 25 | t.equal(a_file.content, "a"); 26 | t.equal(b_file.content, "b"); 27 | t.equal(c_file.content, "ab"); 28 | t.true(a_file.stat.mtime < c_file.stat.mtime); 29 | t.true(b_file.stat.mtime < c_file.stat.mtime); 30 | 31 | await touch(env.data("b"), b_file.stat.atime, c_file.stat.mtime); 32 | 33 | await oak_run({ filename: env("Oakfile"), targets: [] }); 34 | const new_a_file = await open(env.data("a")); 35 | const new_b_file = await open(env.data("b")); 36 | const new_c_file = await open(env.data("c")); 37 | 38 | t.equal( 39 | a_file.stat.mtime.getTime(), 40 | new_a_file.stat.mtime.getTime(), 41 | "a should not update" 42 | ); 43 | t.true( 44 | new_c_file.stat.mtime > new_b_file.stat.mtime, 45 | "new_c should be updated after new_b" 46 | ); 47 | t.true( 48 | new_c_file.stat.mtime > c_file.stat.mtime, 49 | "new_c should be updated after c" 50 | ); 51 | t.end(); 52 | }); 53 | -------------------------------------------------------------------------------- /tests/run-inject-multiple/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { oak_run } from "../../src/core/run"; 3 | import { envFile, getTree } from "../utils"; 4 | import { getBaseFileHashes } from "../../src/utils"; 5 | import { removeSync } from "fs-extra"; 6 | 7 | const env = envFile(__dirname); 8 | 9 | function cleanUp() { 10 | removeSync(env("oak_data")); 11 | removeSync(env(".oak")); 12 | removeSync(env("sub/.oak")); 13 | removeSync(env("sub/oak_data")); 14 | } 15 | 16 | const source = env("Oakfile"); 17 | const target = env("sub/Oakfile"); 18 | const hash = getBaseFileHashes(source, target); 19 | 20 | const outs = [ 21 | "sub/oak_data/x", 22 | `sub/oak_data/.oak-imports/${hash(["x1"])}/x`, 23 | `sub/oak_data/.oak-imports/${hash(["x2"])}/x`, 24 | "oak_data/y1", 25 | "oak_data/y2" 26 | ]; 27 | 28 | test.onFinish(async () => { 29 | cleanUp(); 30 | }); 31 | 32 | cleanUp(); 33 | 34 | test("run-inject-multiple", async t => { 35 | await oak_run({ 36 | filename: env("sub/Oakfile"), 37 | targets: [] 38 | }); 39 | const t1 = await getTree(outs, env); 40 | t.equal(t1.get("sub/oak_data/x").content, "A x"); 41 | 42 | await oak_run({ 43 | filename: env("Oakfile"), 44 | targets: [] 45 | }); 46 | const t2 = await getTree(outs, env); 47 | 48 | t.equal(t2.get("oak_data/y1").content, "B1 x"); 49 | t.equal(t2.get("oak_data/y2").content, "B2 x"); 50 | 51 | t.equal( 52 | t2.get(`sub/oak_data/.oak-imports/${hash(["x1"])}/x`).content, 53 | "B1 x" 54 | ); 55 | t.equal( 56 | t2.get(`sub/oak_data/.oak-imports/${hash(["x2"])}/x`).content, 57 | "B2 x" 58 | ); 59 | t.end(); 60 | }); 61 | -------------------------------------------------------------------------------- /src/commands/logs.ts: -------------------------------------------------------------------------------- 1 | import { OakCompiler } from "../oak-compile"; 2 | import pino from "pino"; 3 | import { fileArgument } from "../cli-utils"; 4 | import { getAndMaybeIntializeOakDB } from "../db"; 5 | import { hashFile } from "../utils"; 6 | import { createReadStream } from "fs-extra"; 7 | 8 | const logger = pino({ 9 | prettyPrint: true, 10 | }); 11 | 12 | export async function logsCommand(args: { 13 | filename: string; 14 | targets: readonly string[]; 15 | }): Promise { 16 | if (args.targets.length < 0) { 17 | } 18 | const oakfilePath = fileArgument(args.filename); 19 | const oakfileHash = hashFile(oakfilePath); 20 | 21 | const oakDB = getAndMaybeIntializeOakDB(oakfilePath); 22 | 23 | const compiler = new OakCompiler(); 24 | const { cellHashMap } = await compiler.file(oakfilePath, null, null); 25 | 26 | for (const target of args.targets) { 27 | const cellSigature = cellHashMap.get(target); 28 | const result = await oakDB.getLog(target); 29 | if (!result) { 30 | logger.error(`No logs found for ${target}.`); 31 | continue; 32 | } 33 | const { path, oakfile, cellAncestorHash } = result; 34 | if ( 35 | oakfileHash !== oakfile || 36 | cellAncestorHash !== cellSigature.ancestorHash 37 | ) 38 | logger.warn( 39 | `WARNING (${target}): This log is from a previous Oakfile version. Running oak run may update the task's log.` 40 | ); 41 | await new Promise((resolve, reject) => { 42 | const s = createReadStream(path); 43 | s.pipe(process.stdout); 44 | s.on("end", resolve); 45 | s.on("error", reject); 46 | }); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | 18 | # Directory for instrumented libs generated by jscoverage/JSCover 19 | lib-cov 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | 24 | # nyc test coverage 25 | .nyc_output 26 | 27 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 28 | .grunt 29 | 30 | # Bower dependency directory (https://bower.io/) 31 | bower_components 32 | 33 | # node-waf configuration 34 | .lock-wscript 35 | 36 | # Compiled binary addons (https://nodejs.org/api/addons.html) 37 | build/Release 38 | 39 | # Dependency directories 40 | node_modules/ 41 | jspm_packages/ 42 | 43 | # TypeScript v1 declaration files 44 | typings/ 45 | 46 | # Optional npm cache directory 47 | .npm 48 | 49 | # Optional eslint cache 50 | .eslintcache 51 | 52 | # Optional REPL history 53 | .node_repl_history 54 | 55 | # Output of 'npm pack' 56 | *.tgz 57 | 58 | # Yarn Integrity file 59 | .yarn-integrity 60 | 61 | # dotenv environment variables file 62 | .env 63 | .env.test 64 | 65 | # parcel-bundler cache (https://parceljs.org/) 66 | .cache 67 | 68 | # next.js build output 69 | .next 70 | 71 | # nuxt.js build output 72 | .nuxt 73 | 74 | # vuepress build output 75 | .vuepress/dist 76 | 77 | # Serverless directories 78 | .serverless/ 79 | 80 | # FuseBox cache 81 | .fusebox/ 82 | 83 | # DynamoDB Local files 84 | .dynamodb/ 85 | 86 | # me 87 | temp/ 88 | built/ 89 | oak_data/ 90 | .oak/ 91 | pkg/ -------------------------------------------------------------------------------- /examples/ca-schools/Oakfile.js: -------------------------------------------------------------------------------- 1 | COUNTY = "Los Angeles"; 2 | 3 | COUNTY_SNAKE = COUNTY.toLowerCase().replace(" ", "_"); 4 | 5 | cde_txt = new Task({ 6 | target: "pubschools.txt", 7 | run: target => 8 | shell`curl -o ${target} https://raw.githubusercontent.com/datadesk/california-k12-notebooks/master/input/pubschls.txt` 9 | }); 10 | 11 | public_schools = new Task({ 12 | target: "public_schools.csv", 13 | run: target => 14 | shell`pipenv run python clean_cde.py --input=${cde_txt} --output=${target}` 15 | }); 16 | 17 | charter_schools = new Task({ 18 | target: "charter_schools.csv", 19 | run: target => shell`csvgrep -c is_charter -m Y ${public_schools} > ${target}` 20 | }); 21 | 22 | public_schools_in_county = new Task({ 23 | target: `public_schools_in_${COUNTY_SNAKE}_county.csv`, 24 | run: target => 25 | shell`csvgrep -c county -m "${COUNTY}" ${public_schools} > ${target}` 26 | }); 27 | 28 | charter_schools_in_county = new Task({ 29 | target: `charter_schools_in_${COUNTY_SNAKE}_county.csv`, 30 | run: target => 31 | shell`csvgrep -c county -m "${COUNTY}" ${charter_schools} > ${target}` 32 | }); 33 | 34 | to_geo = (input, output) => 35 | shell`pipenv run python to_geo.py --input ${input} --output ${output}`; 36 | 37 | charter_schools_geo = new Task({ 38 | target: "charter_schools.geojson", 39 | run: target => to_geo(charter_schools, target) 40 | }); 41 | 42 | public_schools_in_county_geo = new Task({ 43 | target: `public_schools_in_${COUNTY_SNAKE}_county.geojson`, 44 | run: target => to_geo(public_schools_in_county, target) 45 | }); 46 | 47 | charter_schools_in_county_geo = new Task({ 48 | target: `charter_schools_in_${COUNTY_SNAKE}_county.geojson`, 49 | run: target => to_geo(charter_schools_in_county, target) 50 | }); 51 | -------------------------------------------------------------------------------- /src/commands/run/index.ts: -------------------------------------------------------------------------------- 1 | //import { oak_run, defaultHookEmitter } from "../../core/run"; 2 | import { EventEmitter } from "events"; 3 | import { runInkApp } from "./ui"; 4 | import { fork } from "child_process"; 5 | import { dirname, join } from "path"; 6 | import { createInterface } from "readline"; 7 | import { createWriteStream } from "fs"; 8 | import { hashString } from "../../utils"; 9 | import { fileArgument } from "../../cli-utils"; 10 | import { mkdirsSync } from "fs-extra"; 11 | 12 | export async function runCommand(args: { 13 | filename: string; 14 | redefines: readonly string[]; 15 | targets: readonly string[]; 16 | }) { 17 | const runEvents = new EventEmitter(); 18 | 19 | const runHash = hashString(`${Math.random()}`); 20 | 21 | let app; 22 | if(process.stdout.isTTY) 23 | app = runInkApp(runEvents, runHash); 24 | const oakfilePath = fileArgument(args.filename); 25 | 26 | const oakLogPath = join( 27 | dirname(oakfilePath), 28 | ".oak", 29 | "oak-logs", 30 | `${runHash}.log` 31 | ); 32 | mkdirsSync(dirname(oakLogPath)); 33 | let oakLogStream = createWriteStream(oakLogPath); 34 | 35 | process.on("SIGINT", () => { 36 | if(app) 37 | app.unmount(); 38 | }); 39 | const runProcess = fork( 40 | join(__dirname, "fork"), 41 | [args.filename, ...args.targets], 42 | { silent: true } 43 | ); 44 | runProcess.stdout.pipe(oakLogStream); 45 | createInterface({ 46 | input: runProcess.stdout, 47 | }) 48 | .on("line", line => { 49 | try { 50 | const data = JSON.parse(line); 51 | runEvents.emit("log", data); 52 | } catch (e) { 53 | console.error(e); 54 | } 55 | }) 56 | .on("close", () => { 57 | runEvents.emit("close"); 58 | if(app) 59 | app.unmount(); 60 | }); 61 | } 62 | -------------------------------------------------------------------------------- /src/commands/path.ts: -------------------------------------------------------------------------------- 1 | import { Runtime } from "@observablehq/runtime"; 2 | import { OakCompiler } from "../oak-compile"; 3 | import { fileArgument } from "../cli-utils"; 4 | import { OakDB, getAndMaybeIntializeOakDB } from "../db"; 5 | import { Library } from "../Library"; 6 | import Task from "../Task"; 7 | import decorator from "../decorator"; 8 | import { EOL } from "os"; 9 | 10 | export async function pathCommand(args: { 11 | filename: string; 12 | targets: readonly string[]; 13 | }): Promise { 14 | if (args.targets.length < 0) { 15 | } 16 | const oakfilePath = fileArgument(args.filename); 17 | const oakDB = await getAndMaybeIntializeOakDB(oakfilePath); 18 | 19 | const compiler = new OakCompiler(); 20 | const d = decorator( 21 | { 22 | onTaskUpToDate: t => t, 23 | onTaskCellDefinitionChanged: t => t, 24 | onTaskDependencyChanged: t => t, 25 | onTaskTargetChanged: t => t, 26 | onTaskTargetMissing: t => t, 27 | }, 28 | oakDB 29 | ); 30 | const { define } = await compiler.file(oakfilePath, d, null); 31 | 32 | const runtime = new Runtime(new Library()); 33 | 34 | const m1 = runtime.module(define); 35 | await runtime._compute(); 36 | let error = false; 37 | for (const target of args.targets) { 38 | const result = await m1.value(target).catch(e => { 39 | error = true; 40 | process.stderr.write( 41 | `"${target}" may not be defined, or may have an error.` 42 | ); 43 | return null; 44 | }); 45 | if (result instanceof Task) { 46 | process.stdout.write(result.target); 47 | process.stdout.write(EOL); 48 | } else { 49 | error = true; 50 | process.stderr.write(`"${target}" is not a Task.`); 51 | process.stderr.write(EOL); 52 | } 53 | } 54 | runtime.dispose(); 55 | if (error) process.exit(1); 56 | } 57 | -------------------------------------------------------------------------------- /src/Task.ts: -------------------------------------------------------------------------------- 1 | import { join } from "path"; 2 | import { Execution } from "./Execution"; 3 | 4 | type WatchArg = string | string[]; 5 | 6 | type TaskParams = { 7 | target: string; 8 | run: (any) => any; 9 | watch?: WatchArg; 10 | createFileBeforeRun?: boolean; 11 | createDirectoryBeforeRun?: boolean; 12 | freshIgnoreTarget?: boolean; 13 | ensureEmptyFile?: boolean; 14 | ensureEmptyDirectory?: boolean; 15 | persistTarget?: boolean; 16 | }; 17 | export default class Task { 18 | target: string; 19 | targetOriginal: string; 20 | run: (any) => any; 21 | watch: string[]; 22 | baseTargetDir: string; 23 | freshIgnoreTarget: boolean; 24 | ensureEmptyFile: boolean; 25 | ensureEmptyDirectory: boolean; 26 | persistTarget: boolean; 27 | 28 | constructor(params: TaskParams) { 29 | let { 30 | target, 31 | run, 32 | watch = [], 33 | freshIgnoreTarget = false, 34 | ensureEmptyFile = false, 35 | ensureEmptyDirectory = false, 36 | persistTarget = false, 37 | } = params; 38 | if (ensureEmptyFile && ensureEmptyDirectory) 39 | throw Error( 40 | "Task param Error: Only 'ensureEmptyFile' or 'ensureEmptyDirectory' can be true, not both." 41 | ); 42 | watch = Array.isArray(watch) ? watch : [watch]; 43 | 44 | this.targetOriginal = target; 45 | this.target = target; 46 | this.run = run; 47 | this.watch = watch; 48 | this.freshIgnoreTarget = freshIgnoreTarget; 49 | this.ensureEmptyFile = ensureEmptyFile; 50 | this.ensureEmptyDirectory = ensureEmptyDirectory; 51 | this.persistTarget = persistTarget; 52 | } 53 | async updateBasePath(newBasePath: string) { 54 | this.baseTargetDir = newBasePath; 55 | if (this.targetOriginal) 56 | this.target = join(this.baseTargetDir, this.targetOriginal); 57 | } 58 | runTask(): Execution { 59 | return this.run(this.target); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /tests/run-inject/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { oak_run } from "../../src/core/run"; 3 | import { envFile, getTree } from "../utils"; 4 | import { removeSync } from "fs-extra"; 5 | import { getBaseFileHashes } from "../../src/utils"; 6 | 7 | const env = envFile(__dirname); 8 | 9 | function cleanUp() { 10 | removeSync(env.data("")); 11 | removeSync(env(".oak")); 12 | removeSync(env("sub/.oak")); 13 | removeSync(env("sub/oak_data")); 14 | } 15 | 16 | const source = env("Oakfile"); 17 | const target = env("sub/Oakfile"); 18 | const hash = getBaseFileHashes(source, target)(["c"]); 19 | const outs = [ 20 | "sub/oak_data/a", 21 | "sub/oak_data/b", 22 | "sub/oak_data/c", 23 | `sub/oak_data/.oak-imports/${hash}/a`, 24 | `sub/oak_data/.oak-imports/${hash}/b`, 25 | `sub/oak_data/.oak-imports/${hash}/c`, 26 | "oak_data/d" 27 | ]; 28 | 29 | test.onFinish(async () => { 30 | cleanUp(); 31 | }); 32 | 33 | cleanUp(); 34 | 35 | test("run-inject", async t => { 36 | await oak_run({ 37 | filename: env("sub/Oakfile"), 38 | targets: [] 39 | }); 40 | const t1 = await getTree(outs, env); 41 | t.equal(t1.get("sub/oak_data/a").content, "NY a"); 42 | t.equal(t1.get("sub/oak_data/b").content, "NY b"); 43 | t.equal(t1.get("sub/oak_data/c").content, "NY aNY b"); 44 | 45 | await oak_run({ 46 | filename: env("Oakfile"), 47 | targets: [] 48 | }); 49 | const t2 = await getTree(outs, env); 50 | 51 | t.equal(t2.get("sub/oak_data/a").content, "NY a"); 52 | t.equal(t2.get("sub/oak_data/b").content, "NY b"); 53 | t.equal(t2.get("sub/oak_data/c").content, "NY aNY b"); 54 | 55 | t.equal(t2.get(`sub/oak_data/.oak-imports/${hash}/a`).content, "CA a"); 56 | t.equal(t2.get(`sub/oak_data/.oak-imports/${hash}/b`).content, "CA b"); 57 | t.equal(t2.get(`sub/oak_data/.oak-imports/${hash}/c`).content, "CA aCA b"); 58 | t.equal(t2.get("oak_data/d").content, "CA aCA b"); 59 | t.end(); 60 | }); 61 | -------------------------------------------------------------------------------- /examples/ca-schools/clean_cde.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import argparse 3 | from os import path 4 | import pandas as pd 5 | import numpy as np 6 | 7 | parser = argparse.ArgumentParser(description='Process CLI arguments..') 8 | parser.add_argument('-i', '--input') 9 | parser.add_argument('-o', '--output') 10 | args = parser.parse_args() 11 | 12 | dir_path = path.dirname(path.realpath(__file__)) 13 | INPUT_FILE = path.join(dir_path, args.input) 14 | OUTPUT_FILE = path.join(dir_path, args.output) 15 | 16 | # Adaptd from https://github.com/datadesk/california-k12-notebooks/blob/master/02_transform.ipynb 17 | roster_df = pd.read_csv( 18 | INPUT_FILE, 19 | dtype={"CDSCode": str}, 20 | delimiter="\t", 21 | encoding="latin-1" 22 | ) 23 | 24 | schools_df = roster_df[~(roster_df.School.isnull())] 25 | 26 | active_df = schools_df[schools_df['StatusType'] == 'Active'] 27 | trimmed_df = active_df[[ 28 | 'CDSCode', 29 | 'School', 30 | 'District', 31 | 'StreetAbr', 32 | 'City', 33 | 'County', 34 | 'Zip', 35 | 'Charter', 36 | 'FundingType', 37 | 'Latitude', 38 | 'Longitude', 39 | 'SOCType', 40 | 'EILCode', 41 | 'GSserved', 42 | ]] 43 | 44 | trimmed_df['low_grade_served'] = trimmed_df.GSserved.str.split('-').str.get(0) 45 | 46 | trimmed_df['high_grade_served'] = trimmed_df.GSserved.str.split('-').str.get(1) 47 | 48 | cleaned_df = trimmed_df.rename(columns={ 49 | 'CDSCode': "cds_code", 50 | 'School': "name", 51 | 'District': "district", 52 | 'StreetAbr': "street", 53 | 'City': "city", 54 | 'County': "county", 55 | 'Zip': "zipcode", 56 | 'Charter': "is_charter", 57 | 'FundingType': "funding_type", 58 | 'Latitude': "latitude", 59 | 'Longitude': "longitude", 60 | 'SOCType': "ownership", 61 | 'EILCode': "instructional_level", 62 | 'GSserved': "grades_served", 63 | }) 64 | 65 | cleaned_df.to_csv(OUTPUT_FILE, encoding="utf-8", index=False) 66 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/congress/s2_get-names-list.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const request = require('request'); 4 | const cheerio = require('cheerio'); 5 | 6 | const IN_PATH = './output/congress/'; 7 | const OUT_PATH = './output/congress'; 8 | const names = []; 9 | const pages = d3.range(1, 11) 10 | 11 | function getNames(page) { 12 | const file = fs.readFileSync(`${IN_PATH}names-${page}.html`, 'utf-8'); 13 | const $ = cheerio.load(file) 14 | 15 | $('#main ol .expanded') 16 | .each((i, el) => { 17 | let name = $(el) 18 | .find('span a') 19 | .text() 20 | let firstName = name.split(',')[1].trim() 21 | let lastName = name.split(',')[0].trim() 22 | lastName = lastName.replace('Representative ', '') 23 | lastName = lastName.replace('Senator ', '') 24 | name = firstName.concat(' ', lastName) 25 | let stats = $(el) 26 | .find('.quick-search-member .member-profile') 27 | .children() 28 | .last() 29 | let dates = stats 30 | .find('span ul li') 31 | .text() 32 | let dateArray = dates.match(/\d+/g) 33 | let dateNums = null 34 | let startDate = null 35 | let endDate = null 36 | if (dateArray !== null) { 37 | dateNums = dateArray.map(Number) 38 | startDate = Math.min(...dateNums) 39 | if (dates.includes("Present")) { 40 | endDate = 2019 41 | } else { 42 | endDate = Math.max(...dateNums) 43 | } 44 | const league = 'congress' 45 | const last = name.substr(name.indexOf(' ')+1) 46 | const hyphen = last.includes('-') ? true : false 47 | if (name) names.push({name, startDate, endDate, league, hyphen}) 48 | } 49 | }); 50 | return names; 51 | } 52 | 53 | function init() { 54 | pages.map(getNames) 55 | const allNames = [].concat(...names).map(d => ({ 56 | ...d 57 | })); 58 | 59 | const csv = d3.csvFormat(allNames); 60 | fs.writeFileSync(`${OUT_PATH}/names.csv`, csv) 61 | } 62 | 63 | init(); 64 | -------------------------------------------------------------------------------- /tests/run-inject-deep/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { oak_run } from "../../src/core/run"; 3 | import { envFile, getTree } from "../utils"; 4 | import { removeSync } from "fs-extra"; 5 | import { getBaseFileHashes } from "../../src/utils"; 6 | 7 | const env = envFile(__dirname); 8 | 9 | function cleanUp() { 10 | removeSync(env("oak_data")); 11 | removeSync(env(".oak")); 12 | removeSync(env("sub/.oak")); 13 | removeSync(env("sub/oak_data")); 14 | removeSync(env("sub/subsub/.oak")); 15 | removeSync(env("sub/subsub/oak_data")); 16 | } 17 | 18 | const top = env("Oakfile"); 19 | const mid = env("sub/Oakfile"); 20 | const bot = env("sub/subsub/Oakfile"); 21 | const deep = getBaseFileHashes(mid, bot)(["x"]); 22 | const shallow = getBaseFileHashes(top, bot)(["x"]); 23 | 24 | const outs = [ 25 | `sub/subsub/oak_data/x`, 26 | 27 | `sub/oak_data/y`, 28 | `sub/subsub/oak_data/.oak-imports/${deep}/x`, 29 | 30 | `oak_data/y`, 31 | `sub/subsub/oak_data/.oak-imports/${shallow}/x` 32 | ]; 33 | 34 | test.onFinish(async () => { 35 | cleanUp(); 36 | }); 37 | 38 | cleanUp(); 39 | 40 | test("run-inject-deep", async t => { 41 | await oak_run({ 42 | filename: env("sub/subsub/Oakfile"), 43 | targets: [] 44 | }); 45 | const t1 = await getTree(outs, env); 46 | t.equal(t1.get("sub/subsub/oak_data/x").content, "A x"); 47 | 48 | await oak_run({ 49 | filename: env("sub/Oakfile"), 50 | targets: [] 51 | }); 52 | const t2 = await getTree(outs, env); 53 | t.equal(t2.get("sub/oak_data/y").content, "B x"); 54 | t.equal(t2.get(`sub/subsub/oak_data/.oak-imports/${deep}/x`).content, "B x"); 55 | 56 | await oak_run({ 57 | filename: env("Oakfile"), 58 | targets: [] 59 | }); 60 | const t3 = await getTree(outs, env); 61 | 62 | t.equal(t3.get("oak_data/y").content, "C x"); 63 | t.equal( 64 | t3.get(`sub/subsub/oak_data/.oak-imports/${shallow}/x`).content, 65 | "C x" 66 | ); 67 | 68 | t.end(); 69 | }); 70 | -------------------------------------------------------------------------------- /src/oak-compile-types.ts: -------------------------------------------------------------------------------- 1 | import { CellSignature } from "./utils"; 2 | 3 | export type InjectingSource = { 4 | sourcePath: string; 5 | cells: string[]; 6 | }; 7 | 8 | export type Decorator = ( 9 | cellFunction: (...any) => any, 10 | cellName: string, 11 | cellReferences: string[], 12 | cellHashMap: CellSignature, 13 | baseModuleDir: string, 14 | oakfilePath: string, 15 | importId: string 16 | ) => (...any) => any; 17 | 18 | export type Inspector = { 19 | pending: () => void; 20 | fulfilled: (value: any) => void; 21 | rejected: (error: any) => void; 22 | }; 23 | export type DefineFunctionType = ( 24 | runtime: any, 25 | observer: (name: string) => Inspector | null 26 | ) => any; 27 | 28 | export type ObservableImportDeclaration = { 29 | type: "ImportDeclaration"; 30 | specifiers: { 31 | type: "ImportSpecifier"; 32 | view: boolean; 33 | imported: { type: "Identifier"; name: string }; 34 | local: { type: "Identifier"; name: string }; 35 | }[]; 36 | injections: { 37 | type: "ImportSpecifier"; 38 | view: boolean; 39 | imported: { type: "Identifier"; name: string }; 40 | local: { type: "Identifier"; name: string }; 41 | }[]; 42 | source: { type: "Literal"; value: string; raw: string }; 43 | start: number; 44 | end: number; 45 | }; 46 | export type ObservableLiteral = { 47 | type: "Literal"; 48 | value: any; 49 | raw: string; 50 | start: number; 51 | end: number; 52 | }; 53 | export type ObservableBlockStatement = { 54 | type: "BlockStatement"; 55 | body: any[]; 56 | start: number; 57 | end: number; 58 | }; 59 | export type ObservableCell = { 60 | type: "Cell"; 61 | id: { 62 | type: "Identifier"; 63 | name: string; 64 | id: { 65 | name: string; 66 | }; 67 | } | null; 68 | input: string; 69 | start: number; 70 | end: number; 71 | async: boolean; 72 | generator: boolean; 73 | references: { type: string; name: string }[]; 74 | body: ObservableLiteral & 75 | ObservableImportDeclaration & 76 | ObservableBlockStatement; 77 | }; 78 | -------------------------------------------------------------------------------- /examples/ucsd-parking/extract_list.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from os import path 3 | from bs4 import BeautifulSoup 4 | from urllib.request import urlopen, unquote 5 | import json 6 | 7 | WB_LINK_BASE = 'http://rmp-wapps.ucsd.edu/TS/Survey/Parking%20Space%20Inventory/Quarterly%20Tables/' 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Process CLI arguments...') 12 | parser.add_argument('--input-html') 13 | parser.add_argument('--output-dir') 14 | parser.add_argument('--output-list') 15 | return parser.parse_args() 16 | 17 | 18 | def parse_quarter(href): 19 | month = href.split(' ')[2] 20 | 21 | if month in set(['January', 'April']): 22 | year = href.split(' ')[1].split('-')[1] 23 | elif month in set(['July', 'October']): 24 | year = href.split(' ')[1].split('-')[0] 25 | else: 26 | raise Exception('wtf', href) 27 | 28 | return {"month": month, "year": year} 29 | 30 | 31 | def main(): 32 | args = parse_args() 33 | dir_path = path.dirname(path.realpath(__file__)) 34 | 35 | INPUT_HTML_FILE = path.join(dir_path, args.input_html) 36 | OUTPUT_LIST_DIR = path.join(dir_path, args.output_dir) 37 | OUTPUT_LIST_FILE = path.join(dir_path, args.output_list) 38 | 39 | in_html = open(INPUT_HTML_FILE, 'r') 40 | out_list = [] 41 | 42 | soup = BeautifulSoup(in_html, 'html.parser') 43 | all_links = soup.find_all('a') 44 | for link in all_links: 45 | if 'xls' in link['href']: 46 | wb_link = WB_LINK_BASE + link['href'] 47 | wb_data = urlopen(wb_link).read() 48 | qtr = parse_quarter(unquote(link['href'])) 49 | wb_out_file = path.join( 50 | OUTPUT_LIST_DIR, "{year}-{month}.xls".format(year=qtr.get('year'), month=qtr.get('month'))) 51 | 52 | with open(wb_out_file, 'wb+') as f: 53 | f.write(wb_data) 54 | 55 | out_list.append(wb_out_file) 56 | 57 | with open(OUTPUT_LIST_FILE, 'w+') as f: 58 | f.write(('\n').join(out_list)) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /tests/utils.ts: -------------------------------------------------------------------------------- 1 | import { readFile, stat, Stats, unlinkSync, utimes } from "fs"; 2 | import { join } from "path"; 3 | 4 | type OpenFileType = { 5 | content: string; 6 | stat: Stats; 7 | }; 8 | 9 | export const envFile = (dirname: string) => 10 | Object.assign((path: string) => 11 | join(dirname, "env", path), {data: (path: string) => join(dirname, 'env', 'oak_data', path)}); 12 | 13 | export const touch = async ( 14 | path: string, 15 | atime: Date, 16 | mtime: Date 17 | ): Promise => { 18 | await utimes( 19 | path, 20 | atime.getTime() / 1000, 21 | mtime.getTime() / 1000, 22 | err => new Promise((resolve, reject) => (err ? reject(err) : resolve())) 23 | ); 24 | return open(path); 25 | }; 26 | export const open = async (path: string): Promise => { 27 | const content: string = await new Promise((resolve, reject) => { 28 | readFile(path, "utf8", (err: NodeJS.ErrnoException, data: string) => { 29 | if (err) { 30 | if (err.code === "ENOENT") resolve(null); 31 | else reject(err); 32 | } 33 | resolve(data); 34 | }); 35 | }); 36 | const s: Stats = await new Promise((resolve, reject) => { 37 | stat(path, (err: NodeJS.ErrnoException, s: Stats) => { 38 | if (err) { 39 | if (err.code === "ENOENT") resolve(null); 40 | else reject(err); 41 | } 42 | resolve(s); 43 | }); 44 | }); 45 | return { content, stat: s }; 46 | }; 47 | 48 | export function cleanUp( 49 | filepath: (path: string) => string, 50 | fileNames: string[] 51 | ) { 52 | fileNames.map(name => { 53 | try { 54 | unlinkSync(join(filepath(name))); 55 | } catch (err) { 56 | // console.error("cleanUp error: ", err); 57 | } 58 | }); 59 | } 60 | 61 | export const getTree = async ( 62 | outFiles: string[], 63 | env: (string) => string 64 | ): Promise> => { 65 | const files = await Promise.all( 66 | outFiles.map(async out => [out, await open(env(out))]) 67 | ); 68 | const map = new Map(); 69 | files.map(file => { 70 | map.set(file[0], file[1]); 71 | }); 72 | return map; 73 | }; 74 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/filter_colleges.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import argparse 3 | from os import path 4 | import pandas as pd 5 | import numpy as np 6 | 7 | parser = argparse.ArgumentParser(description='Process CLI arguments..') 8 | parser.add_argument('-i', '--input') 9 | parser.add_argument('-o', '--output') 10 | args = parser.parse_args() 11 | 12 | dir_path = path.dirname(path.realpath(__file__)) 13 | INPUT_FILE = path.join(dir_path, args.input) 14 | OUTPUT_FILE = path.join(dir_path, args.output) 15 | 16 | df = pd.read_csv(INPUT_FILE) 17 | pdb.set_trace() 18 | 19 | # only public/private non profits 20 | schools = df[(df['CONTROL'] == 1) | (df['CONTROL'] == 2)] 21 | schools = schools[schools['MAIN'] == 1] # only main campuses 22 | # only look at predominately bachelors serving institutions 23 | schools = schools[schools['PREDDEG'] == 3] 24 | schools = schools[schools['DISTANCEONLY'] == 0] # Only in-person colleges 25 | # schools = schools[schools['CURROPER'] == 1] # Only currently operating 26 | 27 | 28 | # Dropping NaN values for certain columns 29 | schools = schools[(~np.isnan(schools['LATITUDE']))] 30 | schools = schools[(~np.isnan(schools['LONGITUDE']))] 31 | 32 | 33 | # Personal Preference 34 | schools = schools[schools['UGDS'] > 1800] # reduce # of schools 35 | 36 | cols = [ 37 | 'UNITID', # id 38 | 'INSTNM', # name 39 | 'CITY', # city 40 | 'STABBR', # state 41 | 'ZIP', # zip 42 | 'INSTURL', # website 43 | 'UGDS', # size 44 | 'LATITUDE', 45 | 'LONGITUDE', 46 | 'ADM_RATE', # admit rate 47 | 'HIGHDEG', # highest degree offered 48 | 'CONTROL', # public/private 49 | 'CCBASIC', # Carnegie Classification -- basic 50 | 'CCUGPROF', # Carnegie Classification -- undergraduate profile 51 | 'CCSIZSET', # Carnegie Classification -- size and setting 52 | 'TRIBAL', # Flag for tribal college and university 53 | 'NANTI', # Flag for Native American non-tribal institution 54 | 'UGDS_AIAN', # Total share of enrollment of undergraduate degree-seeking students who are American Indian/Alaska Native 55 | 'UG_AIANOLD', # Total share of enrollment of undergraduate students who are Asian/Pacific Islander 56 | ] 57 | 58 | schools = schools[cols] 59 | schools = schools.sort_values(['UGDS']) 60 | schools.to_csv(OUTPUT_FILE) 61 | -------------------------------------------------------------------------------- /tests/task-watch/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync, writeFile } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, open } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env("oak_data")); 10 | removeSync(env(".oak")); 11 | removeSync(env("build_c.js")); 12 | } 13 | 14 | const createBuildC = async (contents: string) => new Promise((resolve, reject)=>{ 15 | writeFile(env("build_c.js"), contents, "utf8", err => { 16 | if(err)reject(err); 17 | else resolve(); 18 | }); 19 | 20 | }) 21 | 22 | test.onFinish(() => { 23 | //cleanUp(); 24 | }); 25 | 26 | cleanUp(); 27 | 28 | test("task-watch", async t => { 29 | await createBuildC(` 30 | const { readFileSync } = require("fs"); 31 | const ins = process.argv.slice(2); 32 | ins.map(inp => process.stdout.write(readFileSync(inp))); 33 | process.stdout.write("C1"); 34 | `); 35 | 36 | await oak_run({ filename: env("Oakfile"), targets: [] }); 37 | const a_file = await open(env("oak_data/a")); 38 | const b_file = await open(env("oak_data/b")); 39 | const c_file = await open(env("oak_data/c")); 40 | t.equal(a_file.content, "a"); 41 | t.equal(b_file.content, "b"); 42 | t.equal(c_file.content, "abC1"); 43 | t.true(a_file.stat.mtime < c_file.stat.mtime); 44 | t.true(b_file.stat.mtime < c_file.stat.mtime); 45 | 46 | // when we change build_c, only c should update. 47 | await createBuildC(` 48 | const { readFileSync } = require("fs"); 49 | const ins = process.argv.slice(2); 50 | ins.map(inp => process.stdout.write(readFileSync(inp))); 51 | process.stdout.write("C2"); 52 | `); 53 | 54 | await oak_run({ filename: env("Oakfile"), targets: [] }); 55 | const a_file2 = await open(env("oak_data/a")); 56 | const b_file2 = await open(env("oak_data/b")); 57 | const c_file2 = await open(env("oak_data/c")); 58 | t.equal(a_file2.content, "a"); 59 | t.equal(b_file2.content, "b"); 60 | t.equal(c_file2.content, "abC2"); 61 | t.true(a_file2.stat.mtime < c_file2.stat.mtime); 62 | t.true(b_file2.stat.mtime < c_file2.stat.mtime); 63 | 64 | // FIXME TODO why so flaky 65 | // maybe the sqlite datastore is storing bad filesignature stuff 66 | //t.equal(a_file.stat.mtime.getTime(), a_file2.stat.mtime.getTime()); 67 | //t.equal(b_file.stat.mtime.getTime(), b_file2.stat.mtime.getTime()); // flaky? 68 | //t.true(c_file.stat.mtime < c_file2.stat.mtime); 69 | 70 | t.end(); 71 | }); 72 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | # ME 127 | *.csv 128 | *json 129 | *.zip 130 | -------------------------------------------------------------------------------- /examples/ucsd-parking/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | *.csv 127 | *.txt 128 | *.html 129 | *.xls 130 | quarterly_workbooks/* 131 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@alex.garcia/oak", 3 | "version": "0.0.21", 4 | "description": "Easy, simple, data workflows", 5 | "main": "./built/cli.js", 6 | "bin": { 7 | "oak": "./built/cli.js" 8 | }, 9 | "files": [ 10 | "built/**/*" 11 | ], 12 | "pkg": { 13 | "targets": [ 14 | "node10.17-linux-x64", 15 | "node10.17-alpine-x64", 16 | "node10.17-macos-x64", 17 | "node10.17-win-x64" 18 | ] 19 | }, 20 | "license": "MIT", 21 | "publishConfig": { 22 | "access": "public" 23 | }, 24 | "repository": { 25 | "type": "git", 26 | "url": "https://github.com/asg017/oak.git" 27 | }, 28 | "keywords": [ 29 | "etl", 30 | "pipeline", 31 | "workflow", 32 | "cli", 33 | "dag", 34 | "task", 35 | "oak" 36 | ], 37 | "scripts": { 38 | "build": "tsc", 39 | "pkg": "mkdir -p pkg && pkg . --out-path ./pkg", 40 | "prepare": "rm -rf built && npm run build", 41 | "preversion": "npm test", 42 | "version": "rm -rf built && npm run build", 43 | "postversion": "git push && git push --tags && rm -rf build/temp", 44 | "format": "prettier --write src/**/*.ts src/**/*.tsx", 45 | "format-test": "prettier --check src/**/*.ts src/**/*.tsx", 46 | "test": "npm run format-test && ts-node ./node_modules/tape/bin/tape tests/**/*.ts", 47 | "t": "ts-node ./node_modules/tape/bin/tape", 48 | "o": "ts-node ./src/cli.ts" 49 | }, 50 | "dependencies": { 51 | "@observablehq/parser": "^4.0.1", 52 | "@observablehq/runtime": "4.4.4", 53 | "@observablehq/stdlib": "^3.0.2", 54 | "@rushstack/ts-command-line": "^4.3.13", 55 | "better-sqlite3": "^7.0.1", 56 | "chalk": "^2.4.2", 57 | "d3-array": "^2.2.0", 58 | "express": "^4.17.1", 59 | "fs-extra": "^8.1.0", 60 | "hasha": "^5.1.0", 61 | "immutable": "^4.0.0-rc.12", 62 | "ink": "^3.0.4", 63 | "ink-spinner": "4.0.1", 64 | "pino": "^5.17.0", 65 | "pino-pretty": "^3.6.1", 66 | "react": "^16.13.1", 67 | "read-package-json": "^2.1.0", 68 | "split2": "^3.1.1", 69 | "untildify": "^4.0.0" 70 | }, 71 | "devDependencies": { 72 | "@types/better-sqlite3": "^5.4.0", 73 | "@types/fs-extra": "^8.1.0", 74 | "@types/node": "^13.9.5", 75 | "@types/pino": "^5.17.0", 76 | "@types/react": "^16.9.32", 77 | "@types/split2": "^2.1.6", 78 | "@types/tape": "^4.2.34", 79 | "mock-fs": "^4.10.1", 80 | "pkg": "^4.4.7", 81 | "prettier": "^1.19.1", 82 | "surge": "^0.21.3", 83 | "tape": "^4.11.0", 84 | "ts-node": "^8.3.0", 85 | "tslib": "^1.10.0", 86 | "typescript": "^3.5.3" 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/combine-all-names.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const d3 = require('d3'); 3 | const _ = require('lodash'); 4 | 5 | const OUT_PATH = './output/' 6 | const CON_IN = './output/congress/names.csv' 7 | const MLB_IN = './output/mlb/names.csv' 8 | const MLS_IN = './output/mls/names.csv' 9 | const NBA_IN = './output/nba/names.csv' 10 | const NFL_IN = './output/nfl/names.csv' 11 | const NHL_IN = './output/nhl/names.csv' 12 | const NWLS_IN = './output/nwls/names.csv' 13 | const WNBA_IN = './output/wnba/names.csv' 14 | 15 | const files = [CON_IN, MLB_IN, MLS_IN, NBA_IN, NFL_IN, NHL_IN, NWLS_IN, WNBA_IN] 16 | let combinedNames = [] 17 | let withDecades = [] 18 | 19 | function processCSV(filename) { 20 | let raw = fs.readFileSync(filename, 'utf-8') 21 | let csv = d3.csvParse(raw) 22 | let newdb = _.unionBy(combinedNames, csv, 'name') 23 | combinedNames = newdb 24 | } 25 | 26 | function assignDecade(num){ 27 | if (num >= 1880 && num < 1890) { return 1880} 28 | else if (num >= 1890 && num < 1900) { return 1890} 29 | else if (num >= 1900 && num < 1910) { return 1900} 30 | else if (num >= 1910 && num < 1920) { return 1910} 31 | else if (num >= 1920 && num < 1930) { return 1920} 32 | else if (num >= 1930 && num < 1940) { return 1930} 33 | else if (num >= 1940 && num < 1950) { return 1940} 34 | else if (num >= 1950 && num < 1960) { return 1950} 35 | else if (num >= 1960 && num < 1970) { return 1960} 36 | else if (num >= 1970 && num < 1980) { return 1970} 37 | else if (num >= 1980 && num < 1990) { return 1980} 38 | else if (num >= 1990 && num < 2000) { return 1990} 39 | else if (num >= 2000 && num < 2010) { return 2000} 40 | else if (num >= 2010 && num < 2020) { return 2010} 41 | else { return null } 42 | } 43 | 44 | function addDecade(data){ 45 | withDecades = data.map(d => ({ 46 | ...d, 47 | decade: assignDecade(d.startDate), 48 | lastName: (d.name).split(' ')[((d.name).split(' ')).length-1], 49 | nameLength: ((d.name).split(' ')[((d.name).split(' ')).length-1]).length, 50 | reason: null 51 | })) 52 | } 53 | 54 | function init() { 55 | _.each(files, filename => processCSV(filename)) 56 | 57 | addDecade(combinedNames) 58 | console.log(withDecades) 59 | 60 | const all = d3.csvFormat(withDecades) 61 | fs.writeFileSync(`${OUT_PATH}/allCombinedNames.csv`, all) 62 | 63 | const noCongress = _.filter(withDecades, function(d) { return d.league !== 'congress'; }); 64 | const allSports = d3.csvFormat(noCongress) 65 | fs.writeFileSync(`${OUT_PATH}/sportsCombinedNames.csv`, allSports) 66 | 67 | const withHyphens = _.filter(allSports, ['hyphen', 'true']) 68 | const allHyphens = d3.csvFormat(withHyphens) 69 | fs.writeFileSync(`${OUT_PATH}/hyphensCombinedNames.csv`, allHyphens) 70 | } 71 | 72 | init(); 73 | -------------------------------------------------------------------------------- /examples/ucsd-parking/convert.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xlrd 3 | import pandas 4 | 5 | def get_lot_entries(qtr, ws, row_i, num_types): 6 | lot_name = ws.cell_value(rowx=row_i, colx=0) 7 | entries = [] 8 | if not lot_name: 9 | return entries 10 | for i in range(num_types): 11 | row = ws.row(row_i+i) 12 | row = map(lambda x: x.value, row) 13 | 14 | for j in range(2,13): 15 | if not row[j] or type(row[j]) is str or type(row[j]) is unicode: 16 | row[j] = 0 17 | else: 18 | row[j] = int(row[j]) 19 | 20 | entries.append({ 21 | 'quarter':qtr, 22 | 'lot': lot_name, 23 | 'space_type': row[1], 24 | 25 | 'num_spots': row[2], 26 | 'time_counts': row[3:13], 27 | '8am_empty_count': row[3], 28 | '9am_empty_count': row[4], 29 | '10am_empty_count': row[5], 30 | '11am_empty_count': row[6], 31 | '12pm_empty_count': row[7], 32 | '1pm_empty_count': row[8], 33 | '2pm_empty_count': row[9], 34 | '3pm_empty_count': row[10], 35 | '4pm_empty_count': row[11], 36 | '5pm_empty_count': row[12], 37 | }) 38 | return entries 39 | 40 | from pprint import pprint as pp 41 | def bylot_df(qtr, ws): 42 | entries = [] 43 | 44 | row_i = 6 # starting one 45 | row_limit = ws.nrows 46 | 47 | num_types = 0 48 | while ws.cell_value(rowx=row_i+num_types, colx=1) != 'Total': 49 | num_types += 1 50 | 51 | while row_i < row_limit: 52 | lot_name = ws.cell_value(rowx=row_i, colx=0) 53 | lot_entries = get_lot_entries(qtr, ws, row_i, num_types) 54 | entries = entries + lot_entries 55 | row_i += num_types + 1 56 | return pandas.DataFrame(entries) 57 | 58 | def convert_wbs(): 59 | 60 | curr_dir = os.path.dirname(__file__) 61 | filenames = os.listdir( os.path.join(curr_dir, '../input/parking_occupancy') ) 62 | frames = [] 63 | for filename in filenames: 64 | print('Converting %s...' % (filename)) 65 | qtr = filename[:4] 66 | f = open(os.path.join(curr_dir, '../input/parking_occupancy/' + filename) , 'r' ) 67 | wb_data = f.read() 68 | f.close() 69 | wb = xlrd.open_workbook(file_contents=wb_data) # xlrd.book.Book object 70 | try: 71 | ws_lot = wb.sheet_by_name('By Lot') 72 | except: 73 | ws_lot = wb.sheet_by_name('By Lot ') #SU16, wtf 74 | df = bylot_df(qtr, ws_lot) 75 | frames.append(df) 76 | 77 | #wb.nsheets # 11 78 | #wb.sheet_names() #[u'University-wide', u'By Location', u'By Area', u'By Neighborhood', u'By Lot', u'By Structure', u'Closed', u'Visitor', u'Allocated', u'Key', u'Schedule'] 79 | df = pandas.concat(frames) 80 | df.index.name = 'id' 81 | df.to_csv('test.csv') 82 | def main(): 83 | convert_wbs() 84 | 85 | if __name__ == '__main__': 86 | main() -------------------------------------------------------------------------------- /examples/school-accidents/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | -------------------------------------------------------------------------------- /docs/pages/introduction.md: -------------------------------------------------------------------------------- 1 | import Layout from '../components/DocsLayout.jsx' 2 | export default Layout 3 | 4 | # Introduction 5 | 6 | ## Terminology 7 | 8 | When you use `oak`, you create a file called `Oakfile`. You can name it however you wish - `Oakfile.js` if you want javascript syntax highlighting, most oak commands have a `-f` or `--file` flag (e.g. `oak print -f Oakfile.js`, `oak static --file path/to/Oak`, etc.). 9 | 10 | Each Oakfile has code that defines a **workflow**. Each workflow is a series of re-usable components called **recipes**. Each recipe outputs a file - and a recipe can take in other recipe's outputted files as input. 11 | 12 | For example, this is an example workflow that has a series of recipes that work together linearly: 13 | 14 | ![](../static/term1.png) 15 | 16 | In this workflow, there is a recipe `a` that outputs some file - let's say `a.txt`. Then, recipe `b` takes in that file, `a.txt`, as input, and outputs a file, `b.txt`. Then, recipe `c`, takes in `b.txt` as input, outputs, a file, and so on and so forth, until recipe `f` outputs a file, `f.txt`. 17 | 18 | Workflows don't have to run linearly like above - as long as it's a [directed acyclic graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph) (ie, no loops), then `oak` will run it. 19 | 20 | Here's a more complex example: 21 | 22 | ![](../static/term2.png) 23 | 24 | Notice how there's 3 distinct "outputs" to this workflow - `q`, `z`, and `h`. This is completely legal - not all recipes have to work together like the example above. 25 | 26 | ### Comparing to Observable notebooks 27 | 28 | `oak` is built on the same runtime and parser as [Observable notebooks](https://observablehq.com) - so many concepts are similar. 29 | 30 | An entire Observable notebook is similar to a single `Oakfile` - in fact, an Observable notebook is a [module in the Observable runtime](https://github.com/observablehq/runtime#modules), and an Oakfile also is just a module. 31 | 32 | A cell in an Observable notebook is similar to a recipe in an `Oakfile`. You could define cells in an `Oakfile` that aren't recipes, but in most use cases, every cell in an Oakfile is a recipe. 33 | 34 | There are two main differences between an Observable notebook and an `Oakfile`. The first one is that Observable notebooks are executed in your browser, and `Oakfiles` are executed on your computer's shell. There are security implications of this, so I'd highly recommend reading the [Security portion](./security) of these docs - ESPECIALLY if you're running someone else's code. 35 | 36 | The second different is that Observable notebook uses [a standard library](https://github.com/observablehq/stdlib) that's built for web browsers, while `oak` uses a smaller standard library that's built for node.js. See the [API Reference](./reference) to learn more about `oak`'s standard library. 37 | 38 | ### Explaining with a cooking metaphor 39 | -------------------------------------------------------------------------------- /examples/ghcdn/readme.txt: -------------------------------------------------------------------------------- 1 | GHCN-D is a dataset that contains daily observations over global land areas. 2 | Like its monthly counterpart, GHCN-Daily is a composite of climate records from 3 | numerous sources that were merged together and subjected to a common suite of quality 4 | assurance reviews. The archive includes the following meteorological elements: 5 | 6 | * Daily maximum temperature 7 | * Daily minimum temperature 8 | * Temperature at the time of observation 9 | * Precipitation (i.e., rain, melted snow) 10 | * Snowfall 11 | * Snow depth 12 | * Other elements where available 13 | 14 | The format of the GHCN Daily data is different than NCDC's DSI 3200 dataset and may 15 | necessitate some changes for users accustomed to receiveing monthly updates of 16 | DSI 3200. The format documentation of the GHCN Daily period of record station 17 | files and list of country codes can be found in the GHCN Daily "readme.txt" file 18 | located on the ftp server (http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt or 19 | ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt). 20 | 21 | This by_year directory contains an alternate form of the GHCN Daily dataset. In this 22 | directory, the period of record station files are parsed into 23 | yearly files that contain all available GHCN Daily station data for that year 24 | plus a time of observation field (where available--primarily for U.S. Cooperative 25 | Observers). The obsertation times for U.S. Cooperative Observer data 26 | come from the station histories archived in NCDC's Multinetwork Metadata System (MMS). 27 | The by_year files are updated daily to be in sync with updates to the GHCN Daily dataset. 28 | The yearly files are formatted so that every observation 29 | (i.e.,station/year/month/day/element/observation time) is represented by a single row 30 | with the following fields: 31 | 32 | station identifier (GHCN Daily Identification Number) 33 | date (yyyymmdd; where yyyy=year; mm=month; and, dd=day) 34 | observation type (see ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt for definitions) 35 | observation value (see ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt for units) 36 | observation time (if available, as hhmm where hh=hour and mm=minutes in local time) 37 | 38 | The fields are comma delimited. 39 | 40 | Further documentation details are provided in the text file ghcn-daily_format.rtf in this 41 | ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/ directory. 42 | 43 | Users may find data files located on our ftp server at ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/all/. 44 | NOTE: 45 | There is no observation time contained in period of record station files. 46 | 47 | GHCN Daily data are currently available to ALL users at no charge. 48 | All users will continue to have access to directories for ftp/ghcn/ and ftp3/3200 & 3210/ data at no charge. 49 | 50 | For detailed information on this dataset visit the GHCN Daily web page at http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/ 51 | 52 | Please email questions/concerns to nndc.weborder@noaa.gov 53 | -------------------------------------------------------------------------------- /examples/ucsd-parking/aggregate_lots.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import argparse 3 | import xlrd 4 | import pandas 5 | from pprint import pprint as pp 6 | 7 | 8 | def get_lot_entries(qtr, ws, row_i, num_types): 9 | lot_name = ws.cell_value(rowx=row_i, colx=0) 10 | entries = [] 11 | if not lot_name: 12 | return entries 13 | for i in range(num_types): 14 | row = ws.row(row_i+i) 15 | row = map(lambda x: x.value, row) 16 | 17 | for j in range(2, 13): 18 | if not row[j] or type(row[j]) is str or type(row[j]) is unicode: 19 | row[j] = 0 20 | else: 21 | row[j] = int(row[j]) 22 | 23 | entries.append({ 24 | 'quarter': qtr, 25 | 'lot': lot_name, 26 | 'space_type': row[1], 27 | 28 | 'num_spots': row[2], 29 | 'time_counts': row[3:13], 30 | '8am_empty_count': row[3], 31 | '9am_empty_count': row[4], 32 | '10am_empty_count': row[5], 33 | '11am_empty_count': row[6], 34 | '12pm_empty_count': row[7], 35 | '1pm_empty_count': row[8], 36 | '2pm_empty_count': row[9], 37 | '3pm_empty_count': row[10], 38 | '4pm_empty_count': row[11], 39 | '5pm_empty_count': row[12], 40 | }) 41 | return entries 42 | 43 | 44 | def bylot_df(qtr, ws): 45 | entries = [] 46 | 47 | row_i = 6 # starting one 48 | row_limit = ws.nrows 49 | 50 | num_types = 0 51 | while ws.cell_value(rowx=row_i+num_types, colx=1) != 'Total': 52 | num_types += 1 53 | 54 | while row_i < row_limit: 55 | lot_name = ws.cell_value(rowx=row_i, colx=0) 56 | lot_entries = get_lot_entries(qtr, ws, row_i, num_types) 57 | entries = entries + lot_entries 58 | row_i += num_types + 1 59 | return pandas.DataFrame(entries) 60 | 61 | 62 | def parse_args(): 63 | parser = argparse.ArgumentParser(description='Process CLI arguments...') 64 | parser.add_argument('--workbooks') 65 | parser.add_argument('--output-csv') 66 | return parser.parse_args() 67 | 68 | 69 | def main(): 70 | args = parse_args() 71 | dir_path = path.dirname(path.realpath(__file__)) 72 | 73 | WORKBOOKS_FILE = path.join(dir_path, args.workbooks) 74 | OUTPUT_CSV = path.join(dir_path, args.output_csv) 75 | 76 | frames = [] 77 | filenames = open(WORKBOOKS_FILE, 'r').read().split('\n') 78 | for filename in filenames: 79 | print('Converting %s...' % (filename)) 80 | qtr = filename[:4] 81 | with open(filename, 'r') as f: 82 | wb_data = f.read() 83 | f.close() 84 | wb = xlrd.open_workbook(file_contents=wb_data) # xlrd.book.Book object 85 | try: 86 | ws_lot = wb.sheet_by_name('By Lot') 87 | except: 88 | ws_lot = wb.sheet_by_name('By Lot ') # SU16, wtf 89 | df = bylot_df(qtr, ws_lot) 90 | frames.append(df) 91 | 92 | df = pandas.concat(frames) 93 | df.index.name = 'id' 94 | df.to_csv(OUTPUT_CSV) 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tests/run-targets-only/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync } from "fs-extra"; 3 | import { oak_run } from "../../src/core/run"; 4 | import { envFile, touch, getTree } from "../utils"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env("oak_data")); 10 | removeSync(env(".oak")); 11 | } 12 | 13 | const outs: string[] = [ 14 | "a", 15 | "b", 16 | "c", 17 | "d", 18 | "e", 19 | "f", 20 | "g", 21 | "h", 22 | "x", 23 | "y", 24 | "z", 25 | "m" 26 | ].map(s => `oak_data/${s}`); 27 | 28 | test.onFinish(() => { 29 | cleanUp(); 30 | }); 31 | 32 | cleanUp(); 33 | 34 | /* 35 | a b c x y m 36 | | |/ |/ 37 | d e z 38 | | | 39 | f g 40 | | / 41 | h 42 | */ 43 | test("run-targets-only", async t => { 44 | // Tree 1: only target a 45 | await oak_run({ filename: env("Oakfile"), targets: ["a"] }); 46 | const t1 = await getTree(outs, env); 47 | 48 | t.equal(t1.get("oak_data/a").content, "a"); 49 | // all other recipes should not have been created 50 | outs.map(out => { 51 | if (out != "oak_data/a") t.equal(t1.get(out).stat, null); 52 | }); 53 | 54 | // Tree 2: target d, so a stays same and everything else still empty 55 | await oak_run({ filename: env("Oakfile"), targets: ["d"] }); 56 | const t2 = await getTree(outs, env); 57 | 58 | t.equal( 59 | t1.get("oak_data/a").stat.mtime.getTime(), 60 | t2.get("oak_data/a").stat.mtime.getTime(), 61 | "a should not update" 62 | ); 63 | t.equal(t2.get("oak_data/d").content, "a"); 64 | t.equal(t2.get("oak_data/b").stat, null); 65 | t.true(t2.get("oak_data/a").stat.mtime < t2.get("oak_data/d").stat.mtime); 66 | 67 | // Tree 3: target h/x, so adfh built, x built 68 | await oak_run({ filename: env("Oakfile"), targets: ["h", "x"] }); 69 | const t3 = await getTree(outs, env); 70 | t.equal(t3.get("oak_data/h").content, "abc"); 71 | t.equal(t3.get("oak_data/x").content, "x"); 72 | 73 | // Tree 4: target z/m, so xy built, m built 74 | await oak_run({ filename: env("Oakfile"), targets: ["z", "m"] }); 75 | const t4 = await getTree(outs, env); 76 | t.equal(t4.get("oak_data/z").content, "xy"); 77 | t.equal(t4.get("oak_data/m").content, "m"); 78 | t.true(t4.get("oak_data/x").stat.mtime < t4.get("oak_data/z").stat.mtime); 79 | 80 | // Tree 5: only target h, so adfbceg built 81 | await oak_run({ filename: env("Oakfile"), targets: ["h"] }); 82 | const t5 = await getTree(outs, env); 83 | t.equal( 84 | t4.get("oak_data/h").stat.mtime.getTime(), 85 | t5.get("oak_data/h").stat.mtime.getTime() 86 | ); 87 | 88 | // Tree 6: touch a, so dfh updates 89 | await touch( 90 | env("oak_data/a"), 91 | t5.get("oak_data/h").stat.atime, 92 | t5.get("oak_data/h").stat.mtime 93 | ); 94 | await oak_run({ filename: env("Oakfile"), targets: ["h"] }); 95 | const t6 = await getTree(outs, env); 96 | t.true( 97 | t5.get("oak_data/a").stat.mtime.getTime() < 98 | t6.get("oak_data/a").stat.mtime.getTime() 99 | ); 100 | t.true( 101 | t5.get("oak_data/f").stat.mtime.getTime() < 102 | t6.get("oak_data/f").stat.mtime.getTime() 103 | ); 104 | t.true( 105 | t5.get("oak_data/h").stat.mtime.getTime() < 106 | t6.get("oak_data/h").stat.mtime.getTime() 107 | ); 108 | 109 | t.end(); 110 | }); 111 | -------------------------------------------------------------------------------- /examples/pudding-hypen-names/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "starter", 3 | "version": "3.0.0", 4 | "description": "Starter template for Pudding stories", 5 | "author": "Russell Goldenberg", 6 | "license": "MIT", 7 | "engines": { 8 | "node": ">=8.0.0" 9 | }, 10 | "dependencies": { 11 | "cheerio": "^1.0.0-rc.2", 12 | "d3": "^5.9.1", 13 | "fs": "0.0.1-security", 14 | "lodash": "^4.17.11", 15 | "lodash.debounce": "^4.0.8", 16 | "nouislider": "^12.1.0", 17 | "promise-polyfill": "^8.1.0", 18 | "superagent": "^4.1.0", 19 | "whatwg-fetch": "3.0.0" 20 | }, 21 | "devDependencies": { 22 | "@babel/core": "7.2.2", 23 | "@babel/plugin-proposal-object-rest-spread": "7.2.0", 24 | "@babel/preset-env": "7.2.3", 25 | "@babel/register": "7.0.0", 26 | "archieml": "^0.4.2", 27 | "babel-loader": "8.0.5", 28 | "browser-sync": "^2.26.3", 29 | "d3-dsv": "1.0.10", 30 | "del": "^3.0.0", 31 | "eslint": "^5.12.0", 32 | "eslint-config-airbnb-base": "^13.1.0", 33 | "eslint-config-prettier": "^3.4.0", 34 | "eslint-plugin-import": "^2.14.0", 35 | "eslint-plugin-prettier": "^3.0.1", 36 | "gulp": "^3.9.1", 37 | "gulp-autoprefixer": "^6.0.0", 38 | "gulp-clean-css": "^4.0.0", 39 | "gulp-combine-mq": "^0.4.0", 40 | "gulp-file-include": "^2.0.1", 41 | "gulp-hb": "^7.0.1", 42 | "gulp-htmlmin": "^5.0.1", 43 | "gulp-notify": "^3.2.0", 44 | "gulp-plumber": "^1.2.1", 45 | "gulp-rename": "^1.4.0", 46 | "gulp-replace": "^1.0.0", 47 | "gulp-smoosher": "^0.0.9", 48 | "gulp-sourcemaps": "^2.6.4", 49 | "gulp-stylus": "^2.7.0", 50 | "gulp-util": "^3.0.7", 51 | "prettier": "^1.15.3", 52 | "request": "^2.88.0", 53 | "require-dir": "^1.2.0", 54 | "run-sequence": "^2.2.0", 55 | "source-map": "^0.7.3", 56 | "webpack": "^4.28.4", 57 | "webpack-stream": "^5.2.1" 58 | }, 59 | "main": "gulpfile.js", 60 | "directories": { 61 | "doc": "docs" 62 | }, 63 | "scripts": { 64 | "wnba-get-names-html": "node process/wnba/s1_get-names-html.js", 65 | "wnba-get-names-list": "node process/wnba/s2_get-names-list.js", 66 | "nba-get-names-html": "node process/nba/s1_get-names-html.js", 67 | "nba-get-names-list": "node process/nba/s2_get-names-list.js", 68 | "nfl-get-names-html": "node process/nfl/s1_get-names-html.js", 69 | "nfl-get-names-list": "node process/nfl/s2_get-names-list.js", 70 | "mlb-get-names-html": "node process/mlb/s1_get-names-html.js", 71 | "mlb-get-names-list": "node process/mlb/s2_get-names-list.js", 72 | "nhl-get-names-html": "node process/nhl/s1_get-names-html.js", 73 | "nhl-get-names-list": "node process/nhl/s2_get-names-list.js", 74 | "mls-get-names-list": "node process/mls/s1_get-names-list.js", 75 | "mls-format-years": "node process/mls/s2_format-years.js", 76 | "nwsl-get-names-html": "node process/nwsl/s1_get-names-html.js", 77 | "nwsl-get-names-list": "node process/nwsl/s2_get-names-list.js", 78 | "nwsl-format-years": "node process/nwsl/s3_format-years.js", 79 | "congress-get-names-html": "node process/congress/s1_get-names-html.js", 80 | "congress-get-names-list": "node process/congress/s2_get-names-list.js", 81 | "combine-all-names": "node process/combine-all-names.js" 82 | }, 83 | "repository": { 84 | "type": "git", 85 | "url": "git+https://github.com/polygraph-cool/starter.git" 86 | }, 87 | "keywords": [], 88 | "bugs": { 89 | "url": "https://github.com/polygraph-cool/starter/issues" 90 | }, 91 | "homepage": "https://github.com/polygraph-cool/starter#readme" 92 | } 93 | -------------------------------------------------------------------------------- /examples/school-accidents/clean_accidents.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from datetime import datetime\n", 11 | "from pytz import timezone" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "tags": [ 19 | "parameters" 20 | ] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "raw_accidents_path = \"./oak_data/raw_accidents.csv\"\n", 25 | "accidents_path = \"./oak_data/accidents.csv\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "df = pd.read_csv(raw_accidents_path, dtype={\n", 35 | " \"Time Occurred\": str\n", 36 | "})" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "tz = timezone('US/Pacific')\n", 46 | "\n", 47 | "def string_to_unix(row):\n", 48 | " d, t = row['Date Occurred'], row['Time Occurred']\n", 49 | " s = \"{} {}\".format(d, t)\n", 50 | " return tz.localize(datetime.strptime(s, '%m/%d/%Y %H%M'))" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "df['occured'] = df.apply(string_to_unix, axis=1)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "df = df[df['occured'].apply(lambda d: d.year == 2019)]" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 7, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "def extract_location(location):\n", 78 | " s = location.strip()\n", 79 | " assert s[0] == '('\n", 80 | " assert s[-1] == ')'\n", 81 | " assert s.find(',') > -1\n", 82 | " lat = float(s[1:s.find(',')].strip())\n", 83 | " lon = float(s[s.find(',')+1:-1].strip())\n", 84 | " return pd.Series((lat, lon))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 8, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "df[['latitude', 'longitude']] = df['Location'].apply(extract_location)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 9, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "df = df[(df['latitude'] != 0) & (df['longitude'] != 0)]" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 10, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "df.to_csv(accidents_path)" 112 | ] 113 | } 114 | ], 115 | "metadata": { 116 | "celltoolbar": "Tags", 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.6.10" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 4 137 | } 138 | -------------------------------------------------------------------------------- /tests/sqlite/test.ts: -------------------------------------------------------------------------------- 1 | import test from "tape"; 2 | import { removeSync, writeFileSync } from "fs-extra"; 3 | import { envFile, open } from "../utils"; 4 | import { oak_run } from "../../src/core/run"; 5 | 6 | const env = envFile(__dirname); 7 | 8 | function cleanUp() { 9 | removeSync(env.data("")); 10 | removeSync(env(".oak")); 11 | removeSync(env("Oakfile")); 12 | } 13 | 14 | test.onFinish(() => { 15 | cleanUp(); 16 | }); 17 | 18 | cleanUp(); 19 | function writeOakfile(contents: string) { 20 | writeFileSync(env("Oakfile"), contents, "utf8"); 21 | } 22 | 23 | test("sqlite", async t => { 24 | // Test #1: Regular Oakfile. 25 | writeOakfile(` 26 | text = "originalText" 27 | 28 | a = new Task({ 29 | target: "a", 30 | run: a => shell\`echo "Running a..."; \ 31 | echo -n "\${text}" > \${a}\` 32 | }) 33 | 34 | b = new Task({ 35 | target: "b", 36 | run: b => shell\`echo "Running b..."; \ 37 | echo -n "b" > \${b}\` 38 | }) 39 | 40 | c = new Task({ 41 | target: "c", 42 | run: c => shell\`echo "Running c..."; \ 43 | cat \${a} \${b} > \${c}\` 44 | }) 45 | `); 46 | 47 | await oak_run({ filename: env("Oakfile"), targets: [] }); 48 | 49 | const [a1, b1, c1] = await Promise.all([ 50 | open(env.data("a")), 51 | open(env.data("b")), 52 | open(env.data("c")) 53 | ]); 54 | 55 | t.true(a1.content === "originalText"); 56 | t.true(b1.content === "b"); 57 | t.true(c1.content === "originalTextb"); 58 | t.true(a1.stat.mtime < c1.stat.mtime); 59 | t.true(b1.stat.mtime < c1.stat.mtime); 60 | 61 | // Test 2: Change the contents of b ("b" => "newB"). 62 | // b must change, c must change, a stays the same. 63 | 64 | writeOakfile(` 65 | text = "originalText" 66 | 67 | a = new Task({ 68 | target: "a", 69 | run: a => shell\`echo "Running a..."; \ 70 | echo -n "\${text}" > \${a}\` 71 | }) 72 | 73 | b = new Task({ 74 | target: "b", 75 | run: b => shell\`echo "Running b..."; \ 76 | echo -n "newB" > \${b}\` 77 | }) 78 | 79 | c = new Task({ 80 | target: "c", 81 | run: c => shell\`echo "Running c..."; \ 82 | cat \${a} \${b} > \${c}\` 83 | }) 84 | `); 85 | 86 | await oak_run({ filename: env("Oakfile"), targets: [] }); 87 | 88 | const [a2, b2, c2] = await Promise.all([ 89 | open(env.data("a")), 90 | open(env.data("b")), 91 | open(env.data("c")) 92 | ]); 93 | 94 | t.true(a2.content === "originalText"); 95 | t.true(b2.content === "newB"); 96 | t.true(c2.content === "originalTextnewB"); 97 | t.true(a2.stat.mtime.getTime() === a1.stat.mtime.getTime()); 98 | t.true(b2.stat.mtime > b1.stat.mtime); 99 | t.true(b2.stat.mtime < c2.stat.mtime); 100 | t.true(c2.stat.mtime > c1.stat.mtime); 101 | t.true(c2.stat.mtime > a1.stat.mtime); 102 | 103 | // Test #3: text changes, so a changes, not b, and c changes. 104 | 105 | writeOakfile(` 106 | text = "newText" 107 | 108 | a = new Task({ 109 | target: "a", 110 | run: a => shell\`echo "Running a..."; \ 111 | echo -n "\${text}" > \${a}\` 112 | }) 113 | 114 | b = new Task({ 115 | target: "b", 116 | run: b => shell\`echo "Running b..."; \ 117 | echo -n "newB" > \${b}\` 118 | }) 119 | 120 | c = new Task({ 121 | target: "c", 122 | run: c => shell\`echo "Running c..."; \ 123 | cat \${a} \${b} > \${c}\` 124 | }) 125 | `); 126 | 127 | await oak_run({ filename: env("Oakfile"), targets: [] }); 128 | 129 | const [a3, b3, c3] = await Promise.all([ 130 | open(env.data("a")), 131 | open(env.data("b")), 132 | open(env.data("c")) 133 | ]); 134 | 135 | t.true(a3.content === "newText"); 136 | t.true(b3.content === "newB"); 137 | t.true(c3.content === "newTextnewB"); 138 | t.true(a3.stat.mtime > a2.stat.mtime); 139 | t.true(b3.stat.mtime.getTime() === b2.stat.mtime.getTime()); 140 | t.true(b3.stat.mtime < a3.stat.mtime); 141 | t.true(c3.stat.mtime > c2.stat.mtime); 142 | t.true(c3.stat.mtime > a3.stat.mtime); 143 | t.end(); 144 | }); 145 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/query.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os, json 4 | from shapely.geometry import shape, Point 5 | from tqdm import tqdm 6 | 7 | def query(): 8 | print('opening file...') 9 | 10 | curdir = os.path.dirname(__file__) 11 | df = pd.read_csv(open(os.path.join(curdir, '../input/colleges/MERGED2015_16_PP.csv'))) 12 | 13 | print('file opened') 14 | schools = df[ (df['CONTROL'] == 1) | (df['CONTROL'] == 2) ] # only public/private non profits 15 | schools = schools[ schools['MAIN'] == 1 ] # only main campuses 16 | schools = schools[ schools['PREDDEG'] == 3 ] # only look at predominately bachelors serving institutions 17 | schools = schools[ schools['DISTANCEONLY'] == 0 ] # Only in-person colleges 18 | schools = schools[ schools['CURROPER'] == 1 ] # Only currently operating 19 | 20 | 21 | # Dropping NaN values for certain columns 22 | schools = schools[ (~np.isnan(schools['LATITUDE'])) ] 23 | schools = schools[ (~np.isnan(schools['LONGITUDE'])) ] 24 | 25 | 26 | #Personal Preference 27 | schools = schools[ schools['UGDS'] > 1800 ] # reduce # of schools 28 | 29 | cols = [ 30 | 'UNITID', # id 31 | 'INSTNM', # name 32 | 'CITY', # city 33 | 'STABBR', # state 34 | 'ZIP', # zip 35 | 'INSTURL',# website 36 | 'UGDS', # size 37 | 'LATITUDE', 38 | 'LONGITUDE', 39 | 'ADM_RATE', # admit rate 40 | 'HIGHDEG', #highest degree offered 41 | 'CONTROL', # public/private 42 | 'CCBASIC', # Carnegie Classification -- basic 43 | 'CCUGPROF', # Carnegie Classification -- undergraduate profile 44 | 'CCSIZSET', # Carnegie Classification -- size and setting 45 | 'TRIBAL', # Flag for tribal college and university 46 | 'NANTI', #Flag for Native American non-tribal institution 47 | 'UGDS_AIAN', # Total share of enrollment of undergraduate degree-seeking students who are American Indian/Alaska Native 48 | 'UG_AIANOLD', #Total share of enrollment of undergraduate students who are Asian/Pacific Islander 49 | 50 | ] 51 | 52 | schools = schools[cols] 53 | schools = schools.sort_values(['UGDS']) 54 | 55 | curdir = os.path.dirname(__file__) 56 | gj = json.load(open(os.path.join(curdir, '../input/nativelands/indigenousTerritories.geojson'))) 57 | 58 | territory_series = pd.Series(index=schools.index) 59 | 60 | territories = gj.get('features') 61 | territories_polygons = [] 62 | 63 | for i, t in enumerate(territories): 64 | polygon = shape(t.get('geometry')) 65 | territories_polygons.append({ 66 | 'polygon':polygon, 67 | 'properties': t.get('properties'), 68 | 'id':t.get('id'), 69 | 'index':i, 70 | }) 71 | t['properties']['colleges'] = [] 72 | 73 | 74 | with tqdm(total=len(schools)) as pbar: 75 | for i, s in schools.iterrows(): 76 | p = Point(s['LONGITUDE'], s['LATITUDE']) 77 | territories_match = [] 78 | 79 | for t_i, t in enumerate(territories_polygons): 80 | if t['polygon'].contains(p): 81 | territories_match.append({ 82 | 'id':t.get('id'), 83 | 'name': t.get('properties').get('Name') 84 | }) 85 | gj.get('features')[t_i]['properties']['colleges'].append(s['UNITID']) 86 | territory_series[s.name] = json.dumps(territories_match) 87 | 88 | pbar.update(1) 89 | 90 | schools['territories'] = territory_series 91 | 92 | #schools.to_csv('schools.csv') 93 | schools.to_csv('../display/public/schools.csv') 94 | json.dump(gj, open('../display/public/territories.geojson', 'w+')) 95 | 96 | def main(): 97 | query() 98 | 99 | if __name__ == '__main__': 100 | main() 101 | 102 | 103 | -------------------------------------------------------------------------------- /examples/ghcdn/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "45f209d34ad317d6cd7b0ccba1d949ab165213ce748feec1b64c6fa8218943a4" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.6" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "numpy": { 20 | "hashes": [ 21 | "sha256:1786a08236f2c92ae0e70423c45e1e62788ed33028f94ca99c4df03f5be6b3c6", 22 | "sha256:17aa7a81fe7599a10f2b7d95856dc5cf84a4eefa45bc96123cbbc3ebc568994e", 23 | "sha256:20b26aaa5b3da029942cdcce719b363dbe58696ad182aff0e5dcb1687ec946dc", 24 | "sha256:2d75908ab3ced4223ccba595b48e538afa5ecc37405923d1fea6906d7c3a50bc", 25 | "sha256:39d2c685af15d3ce682c99ce5925cc66efc824652e10990d2462dfe9b8918c6a", 26 | "sha256:56bc8ded6fcd9adea90f65377438f9fea8c05fcf7c5ba766bef258d0da1554aa", 27 | "sha256:590355aeade1a2eaba17617c19edccb7db8d78760175256e3cf94590a1a964f3", 28 | "sha256:70a840a26f4e61defa7bdf811d7498a284ced303dfbc35acb7be12a39b2aa121", 29 | "sha256:77c3bfe65d8560487052ad55c6998a04b654c2fbc36d546aef2b2e511e760971", 30 | "sha256:9537eecf179f566fd1c160a2e912ca0b8e02d773af0a7a1120ad4f7507cd0d26", 31 | "sha256:9acdf933c1fd263c513a2df3dceecea6f3ff4419d80bf238510976bf9bcb26cd", 32 | "sha256:ae0975f42ab1f28364dcda3dde3cf6c1ddab3e1d4b2909da0cb0191fa9ca0480", 33 | "sha256:b3af02ecc999c8003e538e60c89a2b37646b39b688d4e44d7373e11c2debabec", 34 | "sha256:b6ff59cee96b454516e47e7721098e6ceebef435e3e21ac2d6c3b8b02628eb77", 35 | "sha256:b765ed3930b92812aa698a455847141869ef755a87e099fddd4ccf9d81fffb57", 36 | "sha256:c98c5ffd7d41611407a1103ae11c8b634ad6a43606eca3e2a5a269e5d6e8eb07", 37 | "sha256:cf7eb6b1025d3e169989416b1adcd676624c2dbed9e3bcb7137f51bfc8cc2572", 38 | "sha256:d92350c22b150c1cae7ebb0ee8b5670cc84848f6359cf6b5d8f86617098a9b73", 39 | "sha256:e422c3152921cece8b6a2fb6b0b4d73b6579bd20ae075e7d15143e711f3ca2ca", 40 | "sha256:e840f552a509e3380b0f0ec977e8124d0dc34dc0e68289ca28f4d7c1d0d79474", 41 | "sha256:f3d0a94ad151870978fb93538e95411c83899c9dc63e6fb65542f769568ecfa5" 42 | ], 43 | "version": "==1.18.1" 44 | }, 45 | "pandas": { 46 | "hashes": [ 47 | "sha256:04fe02d492d917bbdf314f63517616c1cc7ac7c25495f322c7df5745583bf548", 48 | "sha256:137afc43ce7bd19b129dd0211177d03307080a728072e0a474de113ffec7f3c9", 49 | "sha256:1a96b3e5172f194036d384fd9e853cbf94c42ec13bfebceb1eb0175c96f4e5d3", 50 | "sha256:37d2b9f7301177e7ba2de1ab8be929a0e2625821d1d21de5f2f2eddfa16742b4", 51 | "sha256:3c76643abfe83f4f3a107d06bea64d4cf702afc97a7f3a3c54275f48c7378c54", 52 | "sha256:4269c698d3f76889520b9e022702c975b5b19a63705a2e098694f5f8719c7287", 53 | "sha256:4d4af03db48a9b292f700c4d5df52645e5a59046800594c46e53b0518ecf3ade", 54 | "sha256:7034fd811df432465fe2fec64637db84600b5f1d0e9d1123195360e2f9bf4b7d", 55 | "sha256:76334ba36aa42f93b6b47b79cbc32187d3a178a4ab1c3a478c8f4198bcd93a73", 56 | "sha256:852cac070c0928a2374854df312ba655533ff324bd0edc9b36d89adbc7b90263", 57 | "sha256:9464f4ff95fd8f4c4a5245819e353052a0c501dd2fb027b294b005ed25f4d992", 58 | "sha256:dac3bf7495c7ce6a72dff2158c8ead0f377832491a672145829ac06d64782192", 59 | "sha256:e0e752699b4be387783506d34f12bef063b76ce1695aabfb0cd15bde82a3a5a7", 60 | "sha256:e462ca4a59daea2ba73ac87186d638d7a43a86ec063705cf9cd215b0fafa8c0e" 61 | ], 62 | "index": "pypi", 63 | "version": "==1.0.2" 64 | }, 65 | "python-dateutil": { 66 | "hashes": [ 67 | "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", 68 | "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" 69 | ], 70 | "version": "==2.8.1" 71 | }, 72 | "pytz": { 73 | "hashes": [ 74 | "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d", 75 | "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be" 76 | ], 77 | "version": "==2019.3" 78 | }, 79 | "six": { 80 | "hashes": [ 81 | "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a", 82 | "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c" 83 | ], 84 | "version": "==1.14.0" 85 | } 86 | }, 87 | "develop": {} 88 | } 89 | -------------------------------------------------------------------------------- /src/cli.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { logsCommand } from "./commands/logs"; 4 | import { pathCommand } from "./commands/path"; 5 | import { runCommand } from "./commands/run"; 6 | import { versionCommand } from "./commands/version"; 7 | 8 | import { 9 | CommandLineStringParameter, 10 | CommandLineStringListParameter, 11 | CommandLineAction, 12 | CommandLineParser, 13 | CommandLineFlagParameter, 14 | } from "@rushstack/ts-command-line"; 15 | 16 | class LogsAction extends CommandLineAction { 17 | private _filename: CommandLineStringParameter; 18 | private _targets: CommandLineStringListParameter; 19 | public constructor() { 20 | super({ 21 | actionName: "logs", 22 | summary: "Show the logs from a previous oak run.", 23 | documentation: "TODO", 24 | }); 25 | } 26 | protected async onExecute(): Promise { 27 | await logsCommand({ 28 | filename: this._filename.value, 29 | targets: this._targets.values, 30 | }); 31 | return; 32 | } 33 | 34 | protected onDefineParameters(): void { 35 | this._filename = this.defineStringParameter({ 36 | argumentName: "FILENAME", 37 | parameterLongName: "--file", 38 | parameterShortName: "-f", 39 | description: "Path to Oakfile.", 40 | defaultValue: "./Oakfile", 41 | }); 42 | this._targets = this.defineStringListParameter({ 43 | argumentName: "TARGET", 44 | parameterLongName: "--target", 45 | parameterShortName: "-t", 46 | description: "Task name associated with the log.", 47 | required: true, 48 | }); 49 | } 50 | } 51 | 52 | class PathAction extends CommandLineAction { 53 | private _filename: CommandLineStringParameter; 54 | private _targets: CommandLineStringListParameter; 55 | public constructor() { 56 | super({ 57 | actionName: "path", 58 | summary: "Get the target's path of an Oak Task.", 59 | documentation: "TODO", 60 | }); 61 | } 62 | protected async onExecute(): Promise { 63 | await pathCommand({ 64 | filename: this._filename.value, 65 | targets: this._targets.values, 66 | }); 67 | return; 68 | } 69 | 70 | protected onDefineParameters(): void { 71 | this._filename = this.defineStringParameter({ 72 | argumentName: "FILENAME", 73 | parameterLongName: "--file", 74 | parameterShortName: "-f", 75 | description: "Path to Oakfile.", 76 | defaultValue: "./Oakfile", 77 | }); 78 | this._targets = this.defineStringListParameter({ 79 | argumentName: "TARGET", 80 | parameterLongName: "--target", 81 | parameterShortName: "-t", 82 | description: "Task name associated with the log.", 83 | required: true, 84 | }); 85 | } 86 | } 87 | 88 | class RunAction extends CommandLineAction { 89 | private _filename: CommandLineStringParameter; 90 | private _redefines: CommandLineStringListParameter; 91 | private _targets: CommandLineStringListParameter; 92 | 93 | public constructor() { 94 | super({ 95 | actionName: "run", 96 | summary: "Run an Oakfile.", 97 | documentation: "TODO", 98 | }); 99 | } 100 | protected async onExecute(): Promise { 101 | await runCommand({ 102 | filename: this._filename.value, 103 | targets: this._targets.values, 104 | redefines: this._redefines.values, 105 | }); 106 | return; 107 | } 108 | 109 | protected onDefineParameters(): void { 110 | this._filename = this.defineStringParameter({ 111 | argumentName: "FILENAME", 112 | parameterLongName: "--file", 113 | parameterShortName: "-f", 114 | description: "Path to Oakfile.", 115 | defaultValue: "./Oakfile", 116 | }); 117 | this._redefines = this.defineStringListParameter({ 118 | argumentName: "CELLDEFINITION", 119 | parameterLongName: "--redefine", 120 | description: "Code that redefines a cell in the Oakfile.", 121 | }); 122 | this._targets = this.defineStringListParameter({ 123 | argumentName: "TARGETS", 124 | parameterLongName: "--targets", 125 | parameterShortName: "-t", 126 | description: "List of target names to resolve.", 127 | }); 128 | } 129 | } 130 | 131 | 132 | class VersionAction extends CommandLineAction { 133 | public constructor() { 134 | super({ 135 | actionName: "version", 136 | summary: "Print version of oak.", 137 | documentation: "TODO", 138 | }); 139 | } 140 | protected onDefineParameters(): void {} 141 | protected async onExecute(): Promise { 142 | versionCommand(); 143 | } 144 | } 145 | class OakCommandLine extends CommandLineParser { 146 | public constructor() { 147 | super({ 148 | toolFilename: "oak", 149 | toolDescription: "CLI for oak.", 150 | }); 151 | 152 | this.addAction(new LogsAction()); 153 | this.addAction(new PathAction()); 154 | this.addAction(new RunAction()); 155 | this.addAction(new VersionAction()); 156 | } 157 | 158 | protected onDefineParameters(): void {} 159 | 160 | protected async onExecute(): Promise { 161 | await super.onExecute(); 162 | return; 163 | } 164 | } 165 | (async function main(): Promise { 166 | const cli: OakCommandLine = new OakCommandLine(); 167 | await cli.execute(); 168 | return; 169 | })(); 170 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # oak 2 | 3 | A tool for reproducible, customizable data workflows. 4 | 5 | **Note** - `oak` is still under development - Watch this repo or [follow me on twitter](https://twitter.com/agarcia_me) to keep updated! 6 | 7 | `oak` works well with projects that have a chain (or tree) of scripts that process files. For example, you may have a series of python/R/shell scripts that all take in a file as an input, processes it in some way, then outputs a new file (which then is used as an input to the next script). This is an ETL (Extract, Transform, Load) workflow - and `oak` makes it easier to define, develop, and re-use these workflows. 8 | 9 | ## Benefits of `oak` 10 | 11 | ### Reproducibility 12 | 13 | 14 | 15 | ### Language Agnostic 16 | 17 | While `oak` is written in TypeScript, you can use the tool to kickoff any process in any language - for example, this is an `Oakfile` that uses Python, R, and Node.JS: 18 | 19 | ```javascript 20 | scraped_data = new Task({ 21 | path: "scraped_data.csv", 22 | scraped_data => shell`python scrape.py > ${scraped_data}` 23 | }); 24 | 25 | analyzed_data = new Task({ 26 | path: "analysis.csv", 27 | analyzed_data => 28 | shell`Rscript analysis.R --input=${scraped_data} > ${analyzed_data}` 29 | }); 30 | 31 | graphic = new Task({ 32 | path: "graphic.jpg", 33 | graphic => 34 | shell`node generate-graphic.js --input=${analyzed_data} > ${graphic}` 35 | }); 36 | ``` 37 | 38 | In this `Oakfile`, the script `scrape.py` does some form of web-scraping, and then formats it as CSV and is outputed into `scraped_data.csv`. Then, `analysis.R` is ran, using `scraped_data.csv` as input, and outputs into `analysis.csv`. Finally, `generate-graphic.js` is called, taking `analysis.csv` as input and outputs into `graphic.jpg`. 39 | 40 | In order to do all the above, all you would have to run is `oak static`. Assuming your code works and all dependencies are installed, it would just work! 41 | 42 | You're able to call any language, tool, or program that's accessible through the command line - which is basically everything! 43 | 44 | ### Better Documentation 45 | 46 | With one command, `oak print --output=png`, you can create a graphic that explains your entire data workflow: 47 | 48 | ![](./assets/example.png) 49 | 50 | This workflow downloads a dataset from the California government containing data about all K-12 schools in the state (`cde_text`), then filters out for public schools, charter schools, public/charter schools in Los Angeles country, and then generates `.geojson` files for all those combinations. 51 | 52 | _Note_ - this feature is still being worked on, and this specific example is kindof a mess... 53 | 54 | ## Examples of when to use `oak` 55 | 56 | - You have a series of python scripts that convert raw PDFs into CSVs, which you then generate graphs for 57 | - You do some light scraping of a website, generate a dataset, then clean that dataset with R 58 | - You have a giant sqlite database, and a series of shell/python/perl/ scripts that eventually generate a small, cleaned CSV file. 59 | - You have code from 4 years ago that parsed election results and generated maps, and you want to re-use that workflow in the next election (with little headache) 60 | 61 | ## When to NOT use `oak` 62 | 63 | - In a high performance, production setting - `oak` is meant to be called by developers manually, not be some production server many times per minute 64 | - For scheduling or monitoring workflows - see [Airflow](https://airflow.apache.org/index.html) 65 | 66 | ## Why not `make`? 67 | 68 | `make` is another CLI tool that comes pre-installed in many linux/mac environments and works similar to `oak` - you write a `Makefile` that creates a DAG of file dependencies, which, when ran, only builds targets that are out of date. 69 | 70 | `oak` is meant to be an easier to use, more extensible version of `make`. `make` can be very frustrating to use, for data analysis. For example: 71 | 72 | - Doesn't work for windows, without hacky ports 73 | - "Correct" syntax depends on which `make` tool you use 74 | - The teensiest syntax error gives you the most cryptic error messages possible 75 | - Something as simple as making a variable is a confusing process 76 | - Documentation is decades old 77 | - Examples online are primarly meant for compiling code 78 | - Importing a `Makefile` from another `Makefile` to borrow code is impossible (or very complicated) 79 | 80 | `oak` tries to avoid these problems. `make` is great if you're a linux low-level programmer trying to compile a codebase of millions of files, but if you just want to better define your data workflow, it can be overkill. 81 | 82 | #### Why not Airflow/Mara/Bonobo/some other ETL framework? 83 | 84 | TODO add more context/links 85 | 86 | ## Installing 87 | 88 | ``` 89 | npm install -g @alex.garcia/oak 90 | ``` 91 | 92 | ## Usage 93 | 94 | - `oak init` - create `Oakfile` in current directory 95 | - `oak static` - run the `Oakfile`, run recipes that are out of date 96 | - `oak print` - Print out dependencies of recipes in `Oakfile`. Can also use `--output dot` for printing graphviz dot notation 97 | 98 | ## `oak` Behind the scenes 99 | 100 | `oak` is built on the ObservableHQ [runtime](https://github.com/observablehq/runtime) and [parser](https://github.com/observablehq/runtime). `oak` runs a given `Oakfile`, which is written by users in javascript, with similar syntax to Observable notebooks. 101 | -------------------------------------------------------------------------------- /src/decorator.ts: -------------------------------------------------------------------------------- 1 | import { CellSignature, getSignature, hashString } from "./utils"; 2 | import { OakDB } from "./db"; 3 | import Task from "./Task"; 4 | import { dirname, join } from "path"; 5 | 6 | export type TaskHookDecoratorArguments = { 7 | cellFunction: (...any) => any; 8 | cellName: string; 9 | cellReferences: string[]; 10 | cellSignature: CellSignature; 11 | baseModuleDir: string; 12 | oakfilePath: string; 13 | importId: string; 14 | }; 15 | type TaskHookCellArguments = any[]; 16 | export type TaskHookTaskContext = { 17 | dependenciesSignature: string; 18 | currentTargetSignature: string; 19 | }; 20 | type TaskHookArguments = [ 21 | any, 22 | TaskHookDecoratorArguments, 23 | TaskHookCellArguments, 24 | TaskHookTaskContext 25 | ]; 26 | 27 | type TaskHook = (...args: TaskHookArguments) => any; 28 | 29 | export default function decorator( 30 | hooks: { 31 | onTaskUpToDate: TaskHook; 32 | onTaskCellDefinitionChanged: TaskHook; 33 | onTaskDependencyChanged: TaskHook; 34 | onTaskTargetChanged: TaskHook; 35 | onTaskTargetMissing: TaskHook; 36 | }, 37 | oakDB: OakDB, 38 | customFreshHook?: { 39 | check: (...args: TaskHookArguments) => boolean; 40 | value: (...args: TaskHookArguments) => any; 41 | } 42 | ) { 43 | return function( 44 | cellFunction: (...any) => any, 45 | cellName: string, 46 | cellReferences: string[], 47 | cellSignature: CellSignature, 48 | baseModuleDir: string, 49 | oakfilePath: string, 50 | importId: string 51 | ): (...any) => any { 52 | return async function(...dependencies) { 53 | let currCell = await cellFunction(...dependencies); 54 | if (!(currCell instanceof Task)) { 55 | return currCell; 56 | } 57 | const lastTaskExection = await oakDB.getLastRelatedTaskExection( 58 | cellSignature.ancestorHash 59 | ); 60 | 61 | await currCell.updateBasePath(baseModuleDir); 62 | 63 | // dont try and get Task for cell dependencies like `Task` or `shell` 64 | const taskDependencies = dependencies.filter( 65 | dependency => dependency instanceof Task 66 | ); 67 | 68 | const dependenciesSignatures: string[] = await Promise.all([ 69 | ...taskDependencies 70 | .map(t => t.target) 71 | .map(async path => { 72 | const sig = await getSignature(path); 73 | if (!sig) return ""; 74 | /*throw Error( 75 | `Problem getting signature for ${path}. Does the file exist?` 76 | );*/ 77 | return sig; 78 | }), 79 | ...currCell.watch.map(async path => { 80 | const sig = await getSignature(join(dirname(oakfilePath), path)); 81 | if (!sig) return ""; 82 | /*throw Error( 83 | `Problem getting signature for ${path}. Does the file exist?` 84 | );*/ 85 | return sig; 86 | }), 87 | ]); 88 | const dependenciesSignature = hashString(dependenciesSignatures.join("")); 89 | const currentTargetSignature = await getSignature(currCell.target); 90 | 91 | const decoratorArgs = { 92 | cellFunction, 93 | cellName, 94 | cellReferences, 95 | cellSignature, 96 | baseModuleDir, 97 | oakfilePath, 98 | importId, 99 | }; 100 | const taskContext = { 101 | dependenciesSignature, 102 | currentTargetSignature 103 | }; 104 | 105 | if ( 106 | customFreshHook && 107 | customFreshHook.check( 108 | currCell, 109 | decoratorArgs, 110 | dependencies, 111 | taskContext 112 | ) 113 | ) { 114 | return await customFreshHook.value( 115 | currCell, 116 | decoratorArgs, 117 | dependencies, 118 | taskContext 119 | ); 120 | } 121 | // no output target. Don't do if the task has no target. 122 | if (currCell.target && currentTargetSignature === null) { 123 | return await hooks.onTaskTargetMissing( 124 | currCell, 125 | decoratorArgs, 126 | dependencies, 127 | taskContext 128 | ); 129 | } 130 | 131 | // cell definition changed 132 | if (!lastTaskExection) { 133 | return await hooks.onTaskCellDefinitionChanged( 134 | currCell, 135 | decoratorArgs, 136 | dependencies, 137 | taskContext 138 | ); 139 | } 140 | 141 | // out of date dependency 142 | if (lastTaskExection.dependenciesSignature !== dependenciesSignature) { 143 | return await hooks.onTaskDependencyChanged( 144 | currCell, 145 | decoratorArgs, 146 | dependencies, 147 | taskContext 148 | ); 149 | } 150 | 151 | // target has changed. Ignorable 152 | if ( 153 | !currCell.freshIgnoreTarget && 154 | lastTaskExection.targetSignature !== currentTargetSignature 155 | ) { 156 | return await hooks.onTaskTargetChanged( 157 | currCell, 158 | decoratorArgs, 159 | dependencies, 160 | taskContext 161 | ); 162 | } 163 | 164 | return await hooks.onTaskUpToDate( 165 | currCell, 166 | decoratorArgs, 167 | dependencies, 168 | taskContext 169 | ); 170 | }; 171 | }; 172 | } 173 | -------------------------------------------------------------------------------- /site/pages/index.js: -------------------------------------------------------------------------------- 1 | import Oakfile from "../components/Oakfile"; 2 | import Head from "next/head"; 3 | 4 | const Home = () => ( 5 |
    6 | 7 | Oak - Powerful, reproducible data pipelines 8 | 9 | 10 | 11 |
    12 |

    Oak

    13 | 14 |

    15 | A powerful new way to make reproducible data pipelines. 16 |

    17 |
    18 | 19 |
    20 |
    21 |
    22 | A Makefile-like runtime, built with the Observable notebook runtime. 23 |
    24 |
    Reproduce your analysis. Reuse your pipelines.
    25 | 32 | 33 | 34 |
    35 | 54 |
    55 | 56 | 65 | 66 | 192 | 193 | 206 |
    207 | ); 208 | 209 | export default Home; 210 | -------------------------------------------------------------------------------- /examples/native-lands-colleges/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "917aab58a7a1ca1bf6c4a0a97e354094b8b3fdb911640ae42254c198791e2ca4" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.6" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "numpy": { 20 | "hashes": [ 21 | "sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e", 22 | "sha256:0cdd229a53d2720d21175012ab0599665f8c9588b3b8ffa6095dd7b90f0691dd", 23 | "sha256:312bb18e95218bedc3563f26fcc9c1c6bfaaf9d453d15942c0839acdd7e4c473", 24 | "sha256:464b1c48baf49e8505b1bb754c47a013d2c305c5b14269b5c85ea0625b6a988a", 25 | "sha256:5adfde7bd3ee4864536e230bcab1c673f866736698724d5d28c11a4d63672658", 26 | "sha256:7724e9e31ee72389d522b88c0d4201f24edc34277999701ccd4a5392e7d8af61", 27 | "sha256:8d36f7c53ae741e23f54793ffefb2912340b800476eb0a831c6eb602e204c5c4", 28 | "sha256:910d2272403c2ea8a52d9159827dc9f7c27fb4b263749dca884e2e4a8af3b302", 29 | "sha256:951fefe2fb73f84c620bec4e001e80a80ddaa1b84dce244ded7f1e0cbe0ed34a", 30 | "sha256:9588c6b4157f493edeb9378788dcd02cb9e6a6aeaa518b511a1c79d06cbd8094", 31 | "sha256:9ce8300950f2f1d29d0e49c28ebfff0d2f1e2a7444830fbb0b913c7c08f31511", 32 | "sha256:be39cca66cc6806652da97103605c7b65ee4442c638f04ff064a7efd9a81d50a", 33 | "sha256:c3ab2d835b95ccb59d11dfcd56eb0480daea57cdf95d686d22eff35584bc4554", 34 | "sha256:eb0fc4a492cb896346c9e2c7a22eae3e766d407df3eb20f4ce027f23f76e4c54", 35 | "sha256:ec0c56eae6cee6299f41e780a0280318a93db519bbb2906103c43f3e2be1206c", 36 | "sha256:f4e4612de60a4f1c4d06c8c2857cdcb2b8b5289189a12053f37d3f41f06c60d0" 37 | ], 38 | "version": "==1.17.0" 39 | }, 40 | "pandas": { 41 | "hashes": [ 42 | "sha256:074a032f99bb55d178b93bd98999c971542f19317829af08c99504febd9e9b8b", 43 | "sha256:20f1728182b49575c2f6f681b3e2af5fac9e84abdf29488e76d569a7969b362e", 44 | "sha256:2745ba6e16c34d13d765c3657bb64fa20a0e2daf503e6216a36ed61770066179", 45 | "sha256:32c44e5b628c48ba17703f734d59f369d4cdcb4239ef26047d6c8a8bfda29a6b", 46 | "sha256:3b9f7dcee6744d9dcdd53bce19b91d20b4311bf904303fa00ef58e7df398e901", 47 | "sha256:544f2033250980fb6f069ce4a960e5f64d99b8165d01dc39afd0b244eeeef7d7", 48 | "sha256:58f9ef68975b9f00ba96755d5702afdf039dea9acef6a0cfd8ddcde32918a79c", 49 | "sha256:9023972a92073a495eba1380824b197ad1737550fe1c4ef8322e65fe58662888", 50 | "sha256:914341ad2d5b1ea522798efa4016430b66107d05781dbfe7cf05eba8f37df995", 51 | "sha256:9d151bfb0e751e2c987f931c57792871c8d7ff292bcdfcaa7233012c367940ee", 52 | "sha256:b932b127da810fef57d427260dde1ad54542c136c44b227a1e367551bb1a684b", 53 | "sha256:cfb862aa37f4dd5be0730731fdb8185ac935aba8b51bf3bd035658111c9ee1c9", 54 | "sha256:de7ecb4b120e98b91e8a2a21f186571266a8d1faa31d92421e979c7ca67d8e5c", 55 | "sha256:df7e1933a0b83920769611c5d6b9a1bf301e3fa6a544641c6678c67621fe9843" 56 | ], 57 | "index": "pypi", 58 | "version": "==0.25.0" 59 | }, 60 | "python-dateutil": { 61 | "hashes": [ 62 | "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", 63 | "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" 64 | ], 65 | "version": "==2.8.0" 66 | }, 67 | "pytz": { 68 | "hashes": [ 69 | "sha256:26c0b32e437e54a18161324a2fca3c4b9846b74a8dccddd843113109e1116b32", 70 | "sha256:c894d57500a4cd2d5c71114aaab77dbab5eabd9022308ce5ac9bb93a60a6f0c7" 71 | ], 72 | "version": "==2019.2" 73 | }, 74 | "shapely": { 75 | "hashes": [ 76 | "sha256:0378964902f89b8dbc332e5bdfa08e0bc2f7ab39fecaeb17fbb2a7699a44fe71", 77 | "sha256:34e7c6f41fb27906ccdf2514ee44a5774b90b39a256b6511a6a57d11ffe64999", 78 | "sha256:3ca69d4b12e2b05b549465822744b6a3a1095d8488cc27b2728a06d3c07d0eee", 79 | "sha256:3e9388f29bd81fcd4fa5c35125e1fbd4975ee36971a87a90c093f032d0e9de24", 80 | "sha256:3ef28e3f20a1c37f5b99ea8cf8dcb58e2f1a8762d65ed2d21fd92bf1d4811182", 81 | "sha256:523c94403047eb6cacd7fc1863ebef06e26c04d8a4e7f8f182d49cd206fe787e", 82 | "sha256:5d22a1a705c2f70f61ccadc696e33d922c1a92e00df8e1d58a6ade14dd7e3b4f", 83 | "sha256:714b6680215554731389a1bbdae4cec61741aa4726921fa2b2b96a6f578a2534", 84 | "sha256:7dfe1528650c3f0dc82f41a74cf4f72018288db9bfb75dcd08f6f04233ec7e78", 85 | "sha256:ba58b21b9cf3c33725f7f530febff9ed6a6846f9d0bf8a120fc74683ff919f89", 86 | "sha256:c4b87bb61fc3de59fc1f85e71a79b0c709dc68364d9584473697aad4aa13240f", 87 | "sha256:ebb4d2bee7fac3f6c891fcdafaa17f72ab9c6480f6d00de0b2dc9a5137dfe342" 88 | ], 89 | "index": "pypi", 90 | "version": "==1.6.4.post2" 91 | }, 92 | "six": { 93 | "hashes": [ 94 | "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", 95 | "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" 96 | ], 97 | "version": "==1.12.0" 98 | }, 99 | "tqdm": { 100 | "hashes": [ 101 | "sha256:1dc82f87a8726602fa7177a091b5e8691d6523138a8f7acd08e58088f51e389f", 102 | "sha256:47220a4f2aeebbc74b0ab317584264ea44c745e1fd5ff316b675cd0aff8afad8" 103 | ], 104 | "index": "pypi", 105 | "version": "==4.33.0" 106 | } 107 | }, 108 | "develop": {} 109 | } 110 | -------------------------------------------------------------------------------- /src/commands/run/ui.tsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import { render, Box, Static, Text } from "ink"; 3 | import Spinner from "ink-spinner"; 4 | import { EventEmitter } from "events"; 5 | import { OrderedMap, List } from "immutable"; 6 | 7 | type AppCell = { 8 | status: "p" | "f" | "r" | "o"; 9 | logfile?: string; 10 | logLines: string[]; 11 | task?: boolean; 12 | fresh?: boolean; 13 | executing?: boolean; 14 | target?: string; 15 | taskStatusWhy?: string; 16 | taskRunStatus?: string; 17 | taskRunExitcode?: string; 18 | }; 19 | 20 | function AppCellLineIcon(props) { 21 | const { cell } = props; 22 | if (cell.fresh) return {"\u{2192}"}; 23 | switch (cell.status) { 24 | case "p": 25 | return ( 26 | 27 | 28 | 29 | ); 30 | case "o": 31 | return {"\u{2022}"}; 32 | case "f": 33 | return {"✔️"}; 34 | case "r": 35 | return {"✖"}; 36 | } 37 | } 38 | 39 | function AppCellLineText(props: { cell: AppCell }) { 40 | const { cell } = props; 41 | let label, dimLabel; 42 | if (cell.task && cell.fresh) label = " - Target fresh, skipped."; 43 | else if (cell.task && cell.status === "f") label = " - Task run complete!"; 44 | else if (cell.task && cell.status === "r") label = " - Task run failed"; 45 | else if (cell.task && cell.executing) label = " - Task running..."; 46 | else label = ""; 47 | 48 | if (cell.task && cell.taskRunExitcode) 49 | dimLabel = `Proccess failed with ${cell.taskRunExitcode} `; 50 | else if (cell.taskStatusWhy) dimLabel = `${cell.taskStatusWhy}`; 51 | 52 | return ( 53 | 54 | {label} {dimLabel ? {dimLabel} : null} 55 | 56 | ); 57 | } 58 | 59 | function cellColor(cell: AppCell) { 60 | if (cell.task && cell.fresh) return "yellow"; 61 | if (cell.task && cell.executing) return "magenta"; 62 | if (cell.status === "p") return "blueBright"; 63 | if (cell.status === "f") return "greenBright"; 64 | if (cell.status === "r") return "red"; 65 | return "grey"; 66 | } 67 | function AppCellLine(props: { cell: AppCell; name: string }) { 68 | const { cell, name } = props; 69 | const color = cellColor(cell); 70 | return ( 71 | 72 | 73 | 74 | 75 | 76 | {` `} 77 | 78 | {name} 79 | 80 | 81 | 82 | 83 | {cell.target && !cell.fresh ? ( 84 | 85 | 86 | {cell.target ? ( 87 | 88 | target: {cell.target} 89 | 90 | ) : null} 91 | {cell.logfile ? ( 92 | 93 | logs: {cell.logfile} 94 | 95 | ) : null} 96 | 97 | 98 | ) : null} 99 | 100 | {cell.status === "p" && } 101 | 102 | ); 103 | } 104 | 105 | function Logs({ lines, borderColor = "" }) { 106 | if (lines.length === 0) return null; 107 | return ( 108 | 115 | {lines.map((line, i) => ( 116 | 117 | {line || ""} 118 | 119 | ))} 120 | 121 | ); 122 | } 123 | class App extends Component { 124 | props: { runEvents: EventEmitter; runHash: string }; 125 | state: { 126 | cells: OrderedMap; 127 | messages: List; 128 | }; 129 | constructor(props) { 130 | super(props); 131 | this.state = { 132 | cells: OrderedMap(), 133 | messages: List(), 134 | }; 135 | } 136 | _onInspector(data: any) { 137 | const cellName = data.cell; 138 | const { cells } = this.state; 139 | const newCell: AppCell = Object.assign(cells.get(cellName) ?? {}, { 140 | status: data.status.charAt(0), 141 | logLines: [], 142 | }); 143 | this.setState({ cells: cells.set(cellName, newCell) }); 144 | } 145 | _onTaskStatus(data: any) { 146 | const cellName = data.task; 147 | const { cells } = this.state; 148 | const newCell: AppCell = Object.assign(cells.get(cellName), { 149 | task: true, 150 | executing: false, 151 | target: data.target, 152 | fresh: data.status === "fresh", 153 | taskStatusWhy: data.why, 154 | }); 155 | this.setState({ cells: cells.set(cellName, newCell) }); 156 | } 157 | _onTaskRun(data: any) { 158 | const cellName = data.task; 159 | const { cells } = this.state; 160 | const newCell: AppCell = Object.assign(cells.get(cellName), { 161 | executing: data.event === "start", 162 | logfile: data.logfile, 163 | taskRunStatus: data.status, 164 | taskRunExitcode: data.exitcode, 165 | //why: data.why 166 | }); 167 | this.setState({ cells: cells.set(cellName, newCell) }); 168 | } 169 | componentDidMount() { 170 | this.props.runEvents.on("log", data => { 171 | this.setState({ messages: this.state.messages.push(data) }); 172 | switch (data.type) { 173 | case "inspector": 174 | return this._onInspector(data); 175 | case "task-status": 176 | return this._onTaskStatus(data); 177 | case "task-run": 178 | return this._onTaskRun(data); 179 | } 180 | }); 181 | } 182 | componentWillUnmount() { 183 | this.props.runEvents.removeAllListeners("log"); 184 | } 185 | render() { 186 | const tasks = Array.from(this.state.cells).filter( 187 | ([name, cell]) => cell.task 188 | ); 189 | const debug = true; 190 | return ( 191 | 192 | {debug ? ( 193 | 194 | {(item, i) => ( 195 | {JSON.stringify(item)} 196 | )} 197 | 198 | ) : null} 199 | { 200 | 201 | 202 | Running Oakfile at [TODO] {this.props.runHash} 203 | 204 | 205 | } 206 | {tasks.length ? ( 207 | 208 | 209 | Tasks 210 | 211 | 212 | ) : null} 213 | {tasks.map(([name, cell]) => ( 214 | 215 | ))} 216 | 217 | ); 218 | } 219 | } 220 | 221 | export function runInkApp(runEvents: EventEmitter, runHash: string) { 222 | return render(); 223 | } 224 | -------------------------------------------------------------------------------- /examples/mbostock-carto/Oakfile: -------------------------------------------------------------------------------- 1 | fips = "48"; 2 | year = "2014"; 3 | projection = 4 | "d3.geoConicEqualArea().parallels([34, 40.5]).rotate([120, 0]).fitSize([960, 960], d)"; 5 | 6 | raw_state_zip = new Task({ 7 | target: "raw_state.zip", 8 | run: raw_state_zip => 9 | shell`wget -O ${raw_state_zip} 'http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_${year}_${fips}_tract_500k.zip'` 10 | }); 11 | 12 | raw_state_dir = new Task({ 13 | target: "raw_state", 14 | run: raw_state_dir => 15 | shell`mkdir -p ${raw_state_dir} && unzip -o -d ${raw_state_dir} ${raw_state_zip}` 16 | }); 17 | 18 | state_json = new Task({ 19 | target: "state.json", 20 | run: state_json => 21 | shell`shp2json ${raw_state_dir}/cb_${year}_${fips}_tract_500k.shp -o ${state_json}` 22 | }); 23 | 24 | state_albers = new Task({ 25 | target: "state-albers.json", 26 | run: state_albers => 27 | shell`geoproject --out ${state_albers} '${projection}' ${state_json}` 28 | }); 29 | 30 | state_albers_svg = new Task({ 31 | target: "state-albers.svg", 32 | run: state_albers_svg => 33 | shell`geo2svg --out ${state_albers_svg} -w 960 -h 960 ${state_albers}` 34 | }); 35 | 36 | state_albers_ndjson = new Task({ 37 | target: "state-albers.ndjson", 38 | run: state_albers_ndjson => 39 | shell`ndjson-split 'd.features' < ${state_albers} > ${state_albers_ndjson}` 40 | }); 41 | 42 | state_albers_id = new Task({ 43 | target: "state-albers-id.ndjson", 44 | run: state_albers_id => 45 | shell`ndjson-map 'd.id = d.properties.GEOID.slice(2), d' < ${state_albers_ndjson} > ${state_albers_id}` 46 | }); 47 | 48 | census_pop_tract_raw = new Task({ 49 | target: "census_pop_tract.json", 50 | run: census_tract_raw => 51 | shell`wget -O ${census_tract_raw} "https://api.census.gov/data/2018/acs/acs5?get=B01003_001E&for=tract:*&in=state:${fips}"` 52 | }); 53 | 54 | census_pop_tract_ndjson = new Task({ 55 | target: "census_pop_tract.ndjson", 56 | run: census_pop_tract_ndjson => 57 | shell`ndjson-cat ${census_pop_tract_raw} \ 58 | | ndjson-split 'd.slice(1)' \ 59 | | ndjson-map '{id: d[2] + d[3], B01003: +d[0]}' \ 60 | > ${census_pop_tract_ndjson}` 61 | }); 62 | 63 | state_pop_join_ndjson = new Task({ 64 | target: "state-albers-census-pop-join.ndjson", 65 | run: state_pop_join_ndjson => 66 | shell`ndjson-join 'd.id' ${state_albers_id} ${census_pop_tract_ndjson} > ${state_pop_join_ndjson}` 67 | }); 68 | 69 | state_density_ndjson = new Task({ 70 | target: "state-albers-density.ndjson", 71 | run: state_density_ndjson => 72 | shell`ndjson-map 'd[0].properties = {density: Math.floor(d[1].B01003 / d[0].properties.ALAND * 2589975.2356)}, d[0]' \ 73 | < ${state_pop_join_ndjson} > ${state_density_ndjson} ` 74 | }); 75 | 76 | state_density_json = new Task({ 77 | target: "state-albers-density.json", 78 | run: state_density_json => 79 | shell`ndjson-reduce < ${state_density_ndjson} | ndjson-map '{type: "FeatureCollection", features:d}' > ${state_density_json}` 80 | }); 81 | 82 | state_density_color_ndjson = new Task({ 83 | target: "state-albers-color.ndjson", 84 | run: state_density_color_ndjson => shell`ndjson-map -r d3 '(d.properties.fill = d3.scaleSequential(d3.interpolateViridis).domain([0, 4000])(d.properties.density), d)' \ 85 | < ${state_density_ndjson} \ 86 | > ${state_density_color_ndjson}` 87 | }); 88 | 89 | state_density_color_svg = new Task({ 90 | target: "state-albers-color.svg", 91 | run: state_density_color_svg => shell`geo2svg -n --stroke none -p 1 -w 960 -h 960 \ 92 | < ${state_density_color_ndjson} \ 93 | > ${state_density_color_svg}` 94 | }); 95 | 96 | state_tracts_topo = new Task({ 97 | target: "state-tracts-topo.json", 98 | run: state_tracts_topo => 99 | shell`geo2topo -n --out ${state_tracts_topo} tracts=${state_density_ndjson}` 100 | }); 101 | 102 | state_tracts_simple_topo = new Task({ 103 | target: "state-tracts-simple-topo.json", 104 | run: state_tracts_simple_topo => 105 | shell`toposimplify -p 1 -f --out ${state_tracts_simple_topo} ${state_tracts_topo}` 106 | }); 107 | 108 | state_tracts_quantized_topo = new Task({ 109 | target: "state-tracts-quantized-topo.json", 110 | run: state_tracts_quantized_topo => 111 | shell`topoquantize --out ${state_tracts_quantized_topo} 1e5 ${state_tracts_simple_topo}` 112 | }); 113 | 114 | state_county_merge_topo = new Task({ 115 | target: "state-merge-topo.json", 116 | run: state_county_merge_topo => 117 | shell`topomerge -k 'd.id.slice(0,3)' --out ${state_county_merge_topo} counties=tracts ${state_tracts_quantized_topo}` 118 | }); 119 | 120 | state_topo = new Task({ 121 | target: "state-topo.json", 122 | run: state_topo => 123 | shell`topomerge --mesh -f 'a !== b' --out ${state_topo} counties=counties ${state_county_merge_topo}` 124 | }); 125 | 126 | state_density_color_sqrt_svg = new Task({ 127 | target: "state-albers-color-sqrt.svg", 128 | run: state_density_color_sqrt_svg => shell`topo2geo --in ${state_topo} tracts=- \ 129 | | ndjson-map -r d3 'z = d3.scaleSequential(d3.interpolateViridis).domain([0, 100]), d.features.forEach(f => f.properties.fill = z(Math.sqrt(f.properties.density))), d' \ 130 | | ndjson-split 'd.features' \ 131 | | geo2svg -n --stroke none -p 1 -w 960 -h 960 --out ${state_density_color_sqrt_svg}` 132 | }); 133 | 134 | state_density_color_log_svg = new Task({ 135 | target: "state-albers-color-log.svg", 136 | run: state_density_color_log_svg => shell`topo2geo --in ${state_topo} tracts=- \ 137 | | ndjson-map -r d3 'z = d3.scaleLog().domain(d3.extent(d.features.filter(f => f.properties.density), f => f.properties.density)).interpolate(() => d3.interpolateViridis), d.features.forEach(f => f.properties.fill = z(f.properties.density)), d' \ 138 | | ndjson-split 'd.features' \ 139 | | geo2svg -n --stroke none -p 1 -w 960 -h 960 --out ${state_density_color_log_svg}` 140 | }); 141 | 142 | state_density_color_quantile_svg = new Task({ 143 | target: "state-albers-color-quantile.svg", 144 | run: state_density_color_quantile_svg => shell`topo2geo --in ${state_topo} tracts=- \ 145 | | ndjson-map -r d3 'z = d3.scaleQuantile().domain(d.features.map(f => f.properties.density)).range(d3.quantize(d3.interpolateViridis, 256)), d.features.forEach(f => f.properties.fill = z(f.properties.density)), d' \ 146 | | ndjson-split 'd.features' \ 147 | | geo2svg -n --stroke none -p 1 -w 960 -h 960 --out ${state_density_color_quantile_svg}` 148 | }); 149 | 150 | state_density_color_threshold_svg = new Task({ 151 | target: "state-albers-color-threshold.svg", 152 | run: state_density_color_threshold_svg => shell`topo2geo --in ${state_topo} tracts=- \ 153 | | ndjson-map -r d3 -r d3-scale-chromatic 'z = d3.scaleThreshold().domain([1, 10, 50, 200, 500, 1000, 2000, 4000]).range(d3.schemeOrRd[9]), d.features.forEach(f => f.properties.fill = z(f.properties.density)), d' \ 154 | | ndjson-split 'd.features' \ 155 | | geo2svg -n --stroke none -p 1 -w 960 -h 960 --out ${state_density_color_threshold_svg}` 156 | }); 157 | 158 | state_svg = new Task({ 159 | target: "state.svg", 160 | run: state_svg => shell`(topo2geo --in ${state_topo} tracts=- \ 161 | | ndjson-map -r d3 -r d3-scale-chromatic 'z = d3.scaleThreshold().domain([1, 10, 50, 200, 500, 1000, 2000, 4000]).range(d3.schemeOrRd[9]), d.features.forEach(f => f.properties.fill = z(f.properties.density)), d' \ 162 | | ndjson-split 'd.features'; \ 163 | topo2geo --in ${state_topo} counties=- \ 164 | | ndjson-map 'd.properties = {"stroke": "#000", "stroke-opacity": 0.3}, d')\ 165 | | geo2svg -n --stroke none -p 1 -w 960 -h 960 --out ${state_svg}` 166 | }); 167 | --------------------------------------------------------------------------------