├── src ├── globals.d.ts ├── index.ts ├── pandas.ts ├── Decoder.ts ├── PythonShell.ts └── py.ts ├── tests ├── config.ts ├── tsconfig.json ├── PythonShell.test.ts ├── pandas.test.ts └── py.test.ts ├── .eslintignore ├── .prettierignore ├── tsconfig.json ├── .babelrc.js ├── .circleci └── config.yml ├── .eslintrc.json ├── LICENSE ├── .gitignore ├── package.json └── README.md /src/globals.d.ts: -------------------------------------------------------------------------------- 1 | type Dict = Record; 2 | -------------------------------------------------------------------------------- /tests/config.ts: -------------------------------------------------------------------------------- 1 | export const PYTHON_PATH = process.env.HOPI_TEST_PYTHON_PATH || 'python'; 2 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | **/coverage/** 2 | **/lib/** 3 | **/fixtures/** 4 | **/flow-typed/** 5 | **/node_modules/** 6 | **/CHANGELOG.md 7 | **/package.json 8 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | **/coverage/** 2 | **/lib/** 3 | **/fixtures/** 4 | **/flow-typed/** 5 | **/node_modules/** 6 | **/CHANGELOG.md 7 | **/package.json 8 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@4c/tsconfig/node", 3 | "compilerOptions": { 4 | "rootDir": "src" 5 | }, 6 | "include": ["src/**/*.ts"] 7 | } 8 | -------------------------------------------------------------------------------- /tests/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@4c/tsconfig/node", 3 | "compilerOptions": { 4 | "types": ["jest", "node"] 5 | }, 6 | "include": ["**/*.ts", "../src/**/*.d.ts"] 7 | } 8 | -------------------------------------------------------------------------------- /.babelrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | [ 4 | '@4c', 5 | { 6 | target: 'node', 7 | }, 8 | ], 9 | '@babel/preset-typescript', 10 | ], 11 | }; 12 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import { Decoder, TypeDecoder } from './Decoder'; 2 | import PythonShell from './PythonShell'; 3 | import { createPythonEnv, kwargs } from './py'; 4 | 5 | export { createPythonEnv, Decoder, kwargs, PythonShell, TypeDecoder }; 6 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | jobs: 4 | test: 5 | docker: 6 | - image: nikolaik/python-nodejs:python3.8-nodejs14 7 | steps: 8 | - checkout 9 | - run: pip install pandas 10 | - run: yarn 11 | - run: yarn test 12 | 13 | workflows: 14 | version: 2 15 | ci: 16 | jobs: 17 | - test 18 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "4catalyzer", 4 | "4catalyzer-jest", 5 | "prettier", 6 | "4catalyzer-typescript" 7 | ], 8 | "plugins": ["prettier"], 9 | "env": { 10 | "node": true 11 | }, 12 | "rules": { 13 | "prettier/prettier": "error", 14 | "global-require": "off", 15 | "import/no-dynamic-require": "off" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Giacomo Tagliabue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | es/ 3 | 4 | # Logs 5 | logs 6 | *.log 7 | npm-debug.log* 8 | yarn-debug.log* 9 | yarn-error.log* 10 | 11 | # Runtime data 12 | pids 13 | *.pid 14 | *.seed 15 | *.pid.lock 16 | 17 | # Directory for instrumented libs generated by jscoverage/JSCover 18 | lib-cov 19 | 20 | # Coverage directory used by tools like istanbul 21 | coverage 22 | 23 | # nyc test coverage 24 | .nyc_output 25 | 26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 27 | .grunt 28 | 29 | # Bower dependency directory (https://bower.io/) 30 | bower_components 31 | 32 | # node-waf configuration 33 | .lock-wscript 34 | 35 | # Compiled binary addons (http://nodejs.org/api/addons.html) 36 | build/Release 37 | 38 | # Dependency directories 39 | node_modules/ 40 | jspm_packages/ 41 | 42 | # Typescript v1 declaration files 43 | typings/ 44 | 45 | # Optional npm cache directory 46 | .npm 47 | 48 | # Optional eslint cache 49 | .eslintcache 50 | 51 | # Optional REPL history 52 | .node_repl_history 53 | 54 | # Output of 'npm pack' 55 | *.tgz 56 | 57 | # Yarn Integrity file 58 | .yarn-integrity 59 | 60 | # dotenv environment variables file 61 | .env 62 | 63 | -------------------------------------------------------------------------------- /src/pandas.ts: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line import/no-extraneous-dependencies 2 | import { DateTime, Duration } from 'luxon'; 3 | 4 | import { Json, TypeDecoder } from './Decoder'; 5 | import PythonShell from './PythonShell'; 6 | 7 | const decoders: TypeDecoder[] = [ 8 | { 9 | typeName: 'numpy.int64', 10 | encode: 'lambda v: int(v)', 11 | decode: (v) => v, 12 | }, 13 | { 14 | typeName: 'numpy.float64', 15 | encode: 'lambda v: float(v)', 16 | decode: (v) => v, 17 | }, 18 | { 19 | typeName: 'pandas.core.series.Series', 20 | encode: 'lambda v: v.to_list()', 21 | decode: (v, decode) => decode(v), 22 | }, 23 | { 24 | typeName: 'pandas.core.frame.DataFrame', 25 | encode: 'lambda v: [list(list(x) for x in v.values), list(v.columns)]', 26 | decode: (rawV, decode) => { 27 | const [values, columns] = rawV as [Json[][], string[]]; 28 | return values.map((v) => { 29 | const row: Dict = {}; 30 | v.forEach((vv, i) => { 31 | row[columns[i]] = decode(vv); 32 | }); 33 | return row; 34 | }); 35 | }, 36 | }, 37 | { 38 | typeName: 'pandas._libs.tslibs.timestamps.Timestamp', 39 | encode: 'lambda v: v.isoformat()', 40 | decode: (s: string) => DateTime.fromISO(s), 41 | }, 42 | { 43 | typeName: 'pandas._libs.tslibs.timedeltas.Timedelta', 44 | encode: 'lambda v: v.total_seconds() * 1000', 45 | decode: (s: number) => Duration.fromMillis(s), 46 | }, 47 | ]; 48 | 49 | export async function addPandasDecoders(shell: PythonShell) { 50 | await Promise.all(decoders.map(shell.addDecoder)); 51 | } 52 | -------------------------------------------------------------------------------- /src/Decoder.ts: -------------------------------------------------------------------------------- 1 | export interface TypeDecoder { 2 | /** The fully qualified name (module name + name) of the python type */ 3 | typeName: string; 4 | 5 | /** a stringified lambda function to convert the type to a serializable type */ 6 | encode: string; 7 | 8 | /** a function that converts the decoded json */ 9 | decode: (value: Json, fullDecode: Decoder['decode']) => any; 10 | } 11 | 12 | export type Json = 13 | | Json[] 14 | | { [idx: string]: Json } 15 | | string 16 | | number 17 | | boolean 18 | | null; 19 | 20 | function mapValues(o: Dict, mapper: (v: T1) => T2) { 21 | const ret: Dict = {}; 22 | 23 | Object.entries(o).forEach(([k, v]) => { 24 | ret[k] = mapper(v); 25 | }); 26 | 27 | return ret; 28 | } 29 | 30 | export class Decoder { 31 | decoders: Dict = {}; 32 | 33 | decode = (val: Json): any => { 34 | if (Array.isArray(val)) { 35 | return val.map(this.decode); 36 | } 37 | if (val instanceof Object) { 38 | if ('%%hopi_v%%' in val && '%%hopi_t%%' in val) { 39 | const typeName = val['%%hopi_t%%'] as string; 40 | const rawValue = val['%%hopi_v%%']; 41 | const decode = this.decoders[typeName]; 42 | if (!decode) { 43 | throw new Error(`Decoder not registered for type ${typeName}`); 44 | } 45 | return decode(rawValue, this.decode); 46 | } 47 | 48 | return mapValues(val, this.decode); 49 | } 50 | return val; 51 | }; 52 | 53 | add(decoder: TypeDecoder) { 54 | this.decoders[decoder.typeName] = decoder.decode; 55 | } 56 | 57 | parseJson(text: string) { 58 | const parsed: Json = JSON.parse(text); 59 | return this.decode(parsed); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hopi", 3 | "version": "0.2.0-beta.1", 4 | "main": "lib/index.js", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/itajaja/hopi.git" 8 | }, 9 | "author": "Giacomo Tagliabue", 10 | "license": "MIT", 11 | "scripts": { 12 | "build": "4c build src", 13 | "format": "4c format src './*'", 14 | "lint": "4c lint src './*'", 15 | "prepublishOnly": "yarn run build", 16 | "release": "4c release", 17 | "tdd": "jest --watch", 18 | "test": "yarn lint && yarn typecheck && jest", 19 | "testonly": "jest", 20 | "typecheck": "tsc --noEmit && tsc --noEmit -p tests" 21 | }, 22 | "publishConfig": { 23 | "access": "public" 24 | }, 25 | "prettier": "@4c/prettier-config", 26 | "husky": { 27 | "hooks": { 28 | "pre-commit": "lint-staged" 29 | } 30 | }, 31 | "lint-staged": { 32 | "*": [ 33 | "yarn 4c lint --fix", 34 | "git add" 35 | ] 36 | }, 37 | "jest": { 38 | "preset": "@4c/jest-preset", 39 | "testEnvironment": "node", 40 | "testMatch": [ 41 | "**/tests/*.test.ts" 42 | ] 43 | }, 44 | "release": { 45 | "publishDir": "lib" 46 | }, 47 | "devDependencies": { 48 | "@4c/babel-preset": "^7.1.0", 49 | "@4c/cli": "^2.0.1", 50 | "@4c/jest-preset": "^1.4.5", 51 | "@4c/prettier-config": "^1.1.0", 52 | "@4c/tsconfig": "^0.3.1", 53 | "@babel/preset-typescript": "^7.8.3", 54 | "@types/jest": "^26.0.8", 55 | "@types/node": "*", 56 | "@typescript-eslint/eslint-plugin": "^2.19.2", 57 | "@typescript-eslint/parser": "^3.7.1", 58 | "eslint-config-4catalyzer": "^1.1.0", 59 | "eslint-config-4catalyzer-jest": "^2.0.4", 60 | "eslint-config-4catalyzer-typescript": "^1.1.6", 61 | "eslint-config-prettier": "^6.10.0", 62 | "eslint-plugin-import": "^2.20.1", 63 | "eslint-plugin-jest": "^23.7.0", 64 | "eslint-plugin-prettier": "^3.1.2", 65 | "husky": "^3.0.9", 66 | "jest": "^26.0.0", 67 | "lint-staged": "^9.4.2", 68 | "prettier": "^2.0.1", 69 | "typescript": "^3.7.5" 70 | }, 71 | "optionalDependencies": { 72 | "@types/luxon": "^1.24.3", 73 | "luxon": "^1.24.1" 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /tests/PythonShell.test.ts: -------------------------------------------------------------------------------- 1 | import { PythonShell } from '../src'; 2 | import { PYTHON_PATH } from './config'; 3 | 4 | let shell: PythonShell; 5 | 6 | beforeEach(() => { 7 | shell = new PythonShell({ pythonPath: PYTHON_PATH }); 8 | }); 9 | 10 | afterEach(() => { 11 | return shell.kill(); 12 | }); 13 | 14 | test('sendAndReceive', async () => { 15 | await shell.sendAndReceive('EXEC', 'a = 0'); 16 | 17 | const promises: Promise[] = []; 18 | for (let i = 0; i < 10000; i++) { 19 | promises.push(shell.sendAndReceive('EXEC', 'a = a+1')); 20 | } 21 | 22 | await Promise.all(promises); 23 | 24 | const result = await shell.sendAndReceive('EVAL', 'a'); 25 | expect(result).toEqual(10000); 26 | }); 27 | 28 | test('default decoders', async () => { 29 | const listResult = await shell.sendAndReceive('EVAL', "[1, 2, '3']"); 30 | expect(listResult).toEqual([1, 2, '3']); 31 | const tupleResult = await shell.sendAndReceive('EVAL', "(1, 2, '3')"); 32 | expect(tupleResult).toEqual([1, 2, '3']); 33 | const dictResult = await shell.sendAndReceive('EVAL', '{1: 2}'); 34 | expect(dictResult).toEqual({ 1: 2 }); 35 | 36 | await expect(shell.sendAndReceive('EVAL', 'set()')).rejects.toThrow( 37 | "TypeError('Object of type set is not JSON serializable')", 38 | ); 39 | 40 | const int = await shell.sendAndReceive('EVAL', '1'); 41 | const float = await shell.sendAndReceive('EVAL', '1.1'); 42 | const str = await shell.sendAndReceive('EVAL', '"foo"'); 43 | const none = await shell.sendAndReceive('EVAL', 'None'); 44 | const boolTrue = await shell.sendAndReceive('EVAL', 'True'); 45 | const boolFalse = await shell.sendAndReceive('EVAL', 'False'); 46 | 47 | expect(int).toEqual(1); 48 | expect(float).toEqual(1.1); 49 | expect(str).toEqual('foo'); 50 | expect(none).toEqual(null); 51 | expect(boolTrue).toEqual(true); 52 | expect(boolFalse).toEqual(false); 53 | }); 54 | 55 | test('custom decoders', async () => { 56 | await shell.addDecoder({ 57 | typeName: 'pandas.core.series.Series', 58 | encode: 'lambda v: v.values', 59 | decode: (v, decode) => decode(v), 60 | }); 61 | await shell.addDecoder({ 62 | typeName: 'numpy.ndarray', 63 | encode: 'list', 64 | decode: (v, decode) => decode(v), 65 | }); 66 | await shell.addDecoder({ 67 | typeName: 'pandas._libs.tslibs.timestamps.Timestamp', 68 | encode: 'lambda v: v.isoformat()', 69 | decode: (s: string) => new Date(s).toDateString(), 70 | }); 71 | 72 | await shell.sendAndReceive('EXEC', 'import pandas as pd'); 73 | const ret = await shell.sendAndReceive( 74 | 'EVAL', 75 | 'pd.Series(["a", 1, pd.Timestamp("2020-01-01")])', 76 | ); 77 | expect(ret).toMatchInlineSnapshot(` 78 | Array [ 79 | "a", 80 | 1, 81 | "Wed Jan 01 2020", 82 | ] 83 | `); 84 | }); 85 | -------------------------------------------------------------------------------- /tests/pandas.test.ts: -------------------------------------------------------------------------------- 1 | import { Settings } from 'luxon'; 2 | 3 | import { createPythonEnv } from '../src'; 4 | import { addPandasDecoders } from '../src/pandas'; 5 | import { PythonEnv, kwargs } from '../src/py'; 6 | import { PYTHON_PATH } from './config'; 7 | 8 | let py: PythonEnv; 9 | 10 | // to make the snapshots deterministic in any timezone 11 | Settings.defaultZoneName = 'UTC'; 12 | 13 | beforeEach(async () => { 14 | py = createPythonEnv({ pythonPath: PYTHON_PATH }); 15 | await addPandasDecoders(py.shell); 16 | }); 17 | 18 | afterEach(() => { 19 | return py.shell.kill(); 20 | }); 21 | 22 | test('series', async () => { 23 | const pd = await py.import('pandas'); 24 | const intSeries = pd.Series([1, 2]); 25 | expect(await intSeries._).toEqual([1, 2]); 26 | 27 | const floatSeries = pd.Series([1.1, 2.1]); 28 | expect(await floatSeries._).toEqual([1.1, 2.1]); 29 | 30 | const stringSeries = pd.Series(['foo', 'bar']); 31 | expect(await stringSeries._).toEqual(['foo', 'bar']); 32 | 33 | const dateSeries = pd.Series([ 34 | pd.Timestamp('2020-01-01'), 35 | pd.Timestamp('2020-01-02'), 36 | ]); 37 | expect(await dateSeries._).toMatchInlineSnapshot(` 38 | Array [ 39 | "2020-01-01T00:00:00.000Z", 40 | "2020-01-02T00:00:00.000Z", 41 | ] 42 | `); 43 | }); 44 | 45 | test('dataframe', async () => { 46 | const pd = await py.import('pandas'); 47 | const range = py`range`; 48 | const dateList = pd.date_range('2019-01-01', '2019-01-06'); 49 | const df = pd.DataFrame( 50 | { 51 | a: range(0, 6), 52 | b: ['a', 'b', 'c', 'd', 'e', 'f'], 53 | c: dateList, 54 | d: dateList` - ${dateList}[::-1]`, 55 | e: [{}, [], py`[]`, 4, '-', [pd.Timestamp('2020')]], 56 | }, 57 | kwargs({ index: dateList }), 58 | ); 59 | 60 | expect(await df._).toMatchInlineSnapshot(` 61 | Array [ 62 | Object { 63 | "a": 0, 64 | "b": "a", 65 | "c": "2019-01-01T00:00:00.000Z", 66 | "d": "PT-432000S", 67 | "e": Object {}, 68 | }, 69 | Object { 70 | "a": 1, 71 | "b": "b", 72 | "c": "2019-01-02T00:00:00.000Z", 73 | "d": "PT-259200S", 74 | "e": Array [], 75 | }, 76 | Object { 77 | "a": 2, 78 | "b": "c", 79 | "c": "2019-01-03T00:00:00.000Z", 80 | "d": "PT-86400S", 81 | "e": Array [], 82 | }, 83 | Object { 84 | "a": 3, 85 | "b": "d", 86 | "c": "2019-01-04T00:00:00.000Z", 87 | "d": "PT86400S", 88 | "e": 4, 89 | }, 90 | Object { 91 | "a": 4, 92 | "b": "e", 93 | "c": "2019-01-05T00:00:00.000Z", 94 | "d": "PT259200S", 95 | "e": "-", 96 | }, 97 | Object { 98 | "a": 5, 99 | "b": "f", 100 | "c": "2019-01-06T00:00:00.000Z", 101 | "d": "PT432000S", 102 | "e": Array [ 103 | "2020-01-01T00:00:00.000Z", 104 | ], 105 | }, 106 | ] 107 | `); 108 | }); 109 | -------------------------------------------------------------------------------- /src/PythonShell.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import { spawn } from 'child_process'; 3 | 4 | import { Decoder, TypeDecoder } from './Decoder'; 5 | 6 | const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); 7 | 8 | function getPythonScript(debug: boolean) { 9 | return ` 10 | import sys 11 | import json 12 | 13 | encoders = {} 14 | 15 | class Encoder(json.JSONEncoder): 16 | def default(self, o): 17 | o_type = type(o) 18 | typename = f"{o_type.__module__}.{o_type.__name__}" 19 | if typename in encoders: 20 | return { 21 | "%%hopi_v%%": encoders[typename](o), 22 | "%%hopi_t%%": typename, 23 | } 24 | 25 | return super().default(o) 26 | 27 | 28 | 29 | for line in sys.stdin: 30 | l = line[:-1] 31 | ${debug ? 'sys.stderr.write(l)' : ''} 32 | i1 = line.index("=") 33 | i2 = line.index("=", i1 + 1) 34 | 35 | msg_id = line[:i1] 36 | cmd = line[i1 + 1 : i2] 37 | data = line[i2 + 1 :] 38 | 39 | status = "PASS" 40 | try: 41 | if cmd == "EVAL": 42 | result = eval(data) 43 | result = json.dumps(result, cls=Encoder) 44 | 45 | elif cmd == "EXEC": 46 | exec(data) 47 | result = 0 48 | except BaseException as e: 49 | result = e.__repr__() 50 | status = "FAIL" 51 | 52 | print(f"{msg_id}={status}={result}") 53 | `; 54 | } 55 | 56 | interface Message { 57 | status: 'PASS' | 'FAIL'; 58 | data: string; 59 | } 60 | 61 | export interface PythonShellConfig { 62 | pythonPath: string; 63 | debug?: boolean; 64 | } 65 | 66 | export default class PythonShell { 67 | private messages = new Map(); 68 | 69 | private msgCounter = 0; 70 | 71 | proc: ReturnType; 72 | 73 | decoder = new Decoder(); 74 | 75 | constructor({ pythonPath, debug = false }: PythonShellConfig) { 76 | const pythonScript = getPythonScript(debug); 77 | this.proc = spawn(pythonPath, ['-u', '-c', pythonScript]); 78 | 79 | this.proc.stderr!.on('data', (d) => { 80 | console.warn(`STDERR: ${d}`); 81 | }); 82 | 83 | this.proc.stdout!.on('data', (c: string) => { 84 | c.toString().split('\n').filter(Boolean).forEach(this.onResponse); 85 | }); 86 | } 87 | 88 | onResponse = (msg: string) => { 89 | const i1 = msg.indexOf('='); 90 | const i2 = msg.indexOf('=', i1 + 1); 91 | 92 | const msgId = msg.substring(0, i1); 93 | const status = msg.substring(i1 + 1, i2) as Message['status']; 94 | const data = msg.substring(i2 + 1); 95 | 96 | this.messages.set(msgId, { status, data }); 97 | }; 98 | 99 | async receive(msgId: string) { 100 | while (!this.messages.has(msgId)) { 101 | // eslint-disable-next-line no-await-in-loop 102 | await sleep(1); 103 | } 104 | 105 | const msg = this.messages.get(msgId)!; 106 | this.messages.delete(msgId); 107 | return msg; 108 | } 109 | 110 | send(cmd: 'EVAL' | 'EXEC', msg: string) { 111 | const msgId = this.msgCounter++; 112 | this.proc.stdin!.write(`${msgId}=${cmd}=${msg}\n`); 113 | return msgId.toString(); 114 | } 115 | 116 | async sendAndReceive(cmd: 'EVAL' | 'EXEC', msg: string) { 117 | const msgId = this.send(cmd, msg); 118 | const { data, status } = await this.receive(msgId); 119 | if (status === 'FAIL') { 120 | throw new Error(data); 121 | } 122 | 123 | return this.decoder.parseJson(data); 124 | } 125 | 126 | addDecoder = async (typeDecoder: TypeDecoder) => { 127 | this.decoder.add(typeDecoder); 128 | await this.sendAndReceive( 129 | 'EXEC', 130 | `encoders["${typeDecoder.typeName}"] = ${typeDecoder.encode}`, 131 | ); 132 | }; 133 | 134 | kill() { 135 | this.proc.kill(); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /tests/py.test.ts: -------------------------------------------------------------------------------- 1 | import { createPythonEnv } from '../src'; 2 | import { PyVar, PythonEnv, kwargs } from '../src/py'; 3 | import { PYTHON_PATH } from './config'; 4 | 5 | let py: PythonEnv; 6 | 7 | beforeEach(() => { 8 | py = createPythonEnv(PYTHON_PATH); 9 | }); 10 | 11 | afterEach(() => { 12 | return py.shell.kill(); 13 | }); 14 | 15 | test('simple test', async () => { 16 | const list = py`list`; 17 | const max = py`max`; 18 | const l = list([1, 2, 3, 4, 5]); 19 | 20 | const maxValue = await max(l)._; 21 | const minValue = await py`min`(l)._; 22 | 23 | expect(maxValue).toEqual(5); 24 | expect(minValue).toEqual(1); 25 | }); 26 | 27 | test('import', async () => { 28 | const json = await py.import('json'); 29 | expect(await json.dumps({ 1: 2 })._).toEqual('{"1": 2}'); 30 | }); 31 | 32 | test('getters', async () => { 33 | await py.import('json'); 34 | const list = py`list`; 35 | const dict = py`dict`; 36 | 37 | const myList = list([1, 2, 3, 4, 5, 6]); 38 | const myDict = dict({ '10': 42 }); 39 | 40 | expect(await myList[0]._).toEqual(1); 41 | expect(await myList[-2]._).toEqual(5); 42 | 43 | // these are slightly exotic behaviors, we might revisit at a later time 44 | 45 | await expect(myList['0:3']._).rejects.toThrow( 46 | "TypeError('list indices must be integers or slices, not str')", 47 | ); 48 | expect(await myList`[0:3]`._).toEqual([1, 2, 3]); 49 | 50 | // unfortunately Proxy getters sees all keys as strings 51 | await expect(myDict[10]._).rejects.toThrow('KeyError(10)'); 52 | await expect(myDict['10']._).rejects.toThrow('KeyError(10)'); 53 | expect(await myDict`['10']`._).toEqual(42); 54 | // eslint-disable-next-line no-underscore-dangle 55 | expect(await myDict.get.__name__[1]._).toEqual('e'); 56 | }); 57 | 58 | test('templated variables', async () => { 59 | const one = py`1`; 60 | const two = py`2`; 61 | const myList = py`[${one}, ${two}]`; 62 | expect(await py`${one} + ${two}`._).toEqual(3); 63 | expect(await py`"{}-{}".format(*${myList})`._).toEqual('1-2'); 64 | await py.import('json'); 65 | 66 | expect( 67 | await py`json.dumps([${[1]}, ${{}}, ${true}, ${1.1}, ${null}, ${'abc'}])` 68 | ._, 69 | ).toMatchInlineSnapshot(`"[[1], {}, true, 1.1, null, \\"abc\\"]"`); 70 | }); 71 | 72 | test('functions', async () => { 73 | await py.x`def foo(a, b, c): return a + b + c`; 74 | const foo = py`foo`; 75 | 76 | expect(await foo(1, 2, 3)._).toEqual(6); 77 | expect(await foo(1, kwargs({ c: 2, b: 3 }))._).toEqual(6); 78 | expect(() => foo(kwargs({ a: 1 }), kwargs({ c: 2, b: 3 }))).toThrow( 79 | 'kwargs need to be last', 80 | ); 81 | }); 82 | 83 | test('nested arguments', async () => { 84 | const json = await py.import('json'); 85 | 86 | const raw = json.dumps([1, 2, { foo: py`[${{}}]` }, [3, py`4`]]); 87 | expect(await raw._).toMatchInlineSnapshot( 88 | `"[1, 2, {\\"foo\\": [{}]}, [3, 4]]"`, 89 | ); 90 | }); 91 | 92 | test('full test', async () => { 93 | const pd = await py.import('pandas'); 94 | const np = await py.import('numpy'); 95 | await py.import('json'); 96 | await py.shell.addDecoder({ 97 | typeName: 'pandas.core.series.Series', 98 | encode: 'lambda v: v.values', 99 | decode: (v, decode) => decode(v), 100 | }); 101 | await py.shell.addDecoder({ 102 | typeName: 'numpy.ndarray', 103 | encode: 'list', 104 | decode: (v) => v, 105 | }); 106 | 107 | const df = pd.DataFrame({ 108 | a: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 109 | b: [1, 3, 5, 7, 9, 10, 8, 6, 4, 2], 110 | }); 111 | await py.x`${df}['c'] = (${df}.a + ${df}.b).astype(float)`; 112 | await py.x`${df}['c'] = ${df}.c`; 113 | 114 | const ema = df.c.ewm(kwargs({ span: 3, adjust: false })).mean(); 115 | const rolling = ema.rolling(3, 0); 116 | const emaMin = rolling.min(); 117 | 118 | const signal = py`${ema} * 0`; 119 | const d = (a: PyVar, b: PyVar) => np.absolute(py`(${a} - ${b}) / ${a}`); 120 | 121 | const ones = py`${d(ema, emaMin)} > .1`; 122 | await ones.to_json()._; 123 | 124 | await py.x`${signal}.loc[${ones}] = 1`; 125 | 126 | expect(await signal._).toMatchInlineSnapshot(` 127 | Array [ 128 | 0, 129 | 1, 130 | 1, 131 | 1, 132 | 1, 133 | 1, 134 | 1, 135 | 0, 136 | 0, 137 | 0, 138 | ] 139 | `); 140 | }); 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hopi [![npm version](https://badge.fury.io/js/hopi.svg)](https://badge.fury.io/js/hopi) [![hopi](https://circleci.com/gh/itajaja/hopi.svg?style=svg)](https://circleci.com/gh/itajaja/hopi) 2 | 3 | _If it looks like python, swims like python, and quacks like python, then it probably is python._ 4 | 5 | Hopi is a Python-in-node interoperability library focused on seamlessness and developer experience. 6 | 7 | The goal of the project is to be able to make use of python libraries and features as if they were written in Javascript. 8 | The result is code where it's really hard to tell which parts are executed in python and which in node. Whether this is an actual good thing, it's up for debate, but it's certaintly fun ⭐️. 9 | 10 | Use at your own risk! Hopi is currently not production ready. The APIs might change and there might be significant performance issues. Moreover, the current iteration does not offer any GC capabilities and there is significant risk of using too much memory if the python envs are long lived. 11 | 12 | ## Getting started 13 | 14 | ```sh 15 | # yarn 16 | yarn add hopi 17 | # npm 18 | npm install hopi 19 | ``` 20 | 21 | ## Example 22 | 23 | Worth a thousand words: 24 | 25 | ```ts 26 | import { createPythonEnv, kwargs } from 'hopi'; 27 | 28 | const py = createPythonEnv('python'); 29 | 30 | async function run() { 31 | try { 32 | await shell.addDecoder({ 33 | typeName: 'pandas._libs.tslibs.timestamps.Timestamp', 34 | encode: 'lambda v: v.isoformat()', 35 | decode: (s: string) => new Date(s).toDateString(), 36 | }); 37 | 38 | const pd = await py.import('pandas'); 39 | let df = pd.read_csv( 40 | 'https://covid.ourworldindata.org/data/owid-covid-data.csv', 41 | ); 42 | df = df.assign(kwargs({ date: pd.to_datetime(df.date) })); 43 | // remove total world count 44 | df = df`[${df}.iso_code != 'OWID_WRL']`; 45 | const newCases = df.groupby('date').new_cases; 46 | const diffCases = newCases 47 | .sum() 48 | .diff() 49 | .sort_values(kwargs({ ascending: false })); 50 | 51 | const biggestIncrease = diffCases.iloc[0]; 52 | const biggestIncreaseDay = diffCases.index[0]; 53 | console.log( 54 | `the biggest increase in daily new cases was ${await biggestIncrease._} and it happened on ${( 55 | await biggestIncreaseDay._ 56 | ).toDateString()}`, 57 | ); 58 | 59 | const juneData = df`[${df}.date.between('2020-06-01', '2020-07-01')]`; 60 | const usJuneDeaths = juneData`[${juneData}.iso_code == 'USA']`.new_deaths.describe(); 61 | const median = await usJuneDeaths['50%']._; 62 | console.log( 63 | `in june, the median of daily new cases in the United states was ${median}`, 64 | ); 65 | 66 | const requests = await py.import('requests'); 67 | 68 | const resp = requests.get('https://example.com/'); 69 | await resp.raise_for_status()._; 70 | console.log(await resp.text._); 71 | } catch (e) { 72 | console.log('received an error:', e); 73 | } finally { 74 | py.shell.kill(); 75 | } 76 | } 77 | 78 | run(); 79 | ``` 80 | 81 | ## Documentation 82 | 83 | First, to create a new environment: 84 | 85 | ```ts 86 | const py = createPythonEnv('path_to_python_binary'); 87 | ``` 88 | 89 | you can use `py` to run python code directly from javaScript. 90 | 91 | > 💡In order to properly dispose of the environment, make sure you call `py.shell.kill()` at the end of your program. 92 | 93 | ### Execute code 94 | 95 | To execute any code: 96 | 97 | ```ts 98 | await py.x`import pandas`; 99 | await py.x`x = 'abc'`; 100 | await py.x`def add(x, y): 101 | return x + y`; 102 | ``` 103 | 104 | ### Evaluate code 105 | 106 | ```ts 107 | const myVal = await py.e`[1, 2, 3][-1]`; // 3 108 | console.log(myVal); // 3 109 | const myVal2 = await py.e`len({1, 2, ()}) == 3`; 110 | console.log(myVal2); // true 111 | ``` 112 | 113 | ### using PyVar 114 | 115 | `PyVar`s are powerful objects that lets you compose python constructs as javaScript and extract the results when needed. To create a `PyVar`, call `py` directly with a string template: 116 | 117 | ```ts 118 | const v = py`1 + 2`; 119 | ``` 120 | 121 | in the code above, `v` is not `3`, but rather a reference to a python variable that holds that value. To get the value, use the `_` property 122 | 123 | ```ts 124 | const result = await v._; 125 | print(v); // 3 126 | ``` 127 | 128 | `PyVars` are composable in many different ways. They can be used in `py`s string template: 129 | 130 | ```ts 131 | const v1 = py`1 + 2`; 132 | const v2 = py`3 + ${v1}`; 133 | console.log(await v2._); // 6 134 | ``` 135 | 136 | They can be called: 137 | 138 | ```ts 139 | import { kwargs } from './py'; 140 | 141 | const foo = py`lambda x: x.lower()`; 142 | console.log(await foo('my JavaScript string')._); 143 | console.log(await foo(py`"a python string!"`)._); 144 | console.log(await foo(kwargs({ x: 'string' }))); 145 | ``` 146 | 147 | They can be accessed with dot notation or square brackets notation: 148 | 149 | ```ts 150 | const myString = py`" abc "`; 151 | const upperString = myString.upper().strip()[2]; 152 | ``` 153 | 154 | > 💡Unfortunately there is a mismatch between python and JavaScript: in JavaScript dot notation and square bracket are interchangeable, while in python they mean very different things 155 | > 156 | > Therefore, There are a couple of rules that apply to dot or square brackets notations 157 | > 158 | > - number properties are passed as integers in square brackets: eg `[1]`, `[1.1]` 159 | > - strings that are not valid propertry names in python are stringified and passed in square brackets: eg `['0a']`, `['.']`, `['?']` 160 | > - everything else is passed with dot notation, eg `.foo` 161 | 162 | Lastly, `PyVars` also accept interpolated strings to be chained: 163 | 164 | ```ts 165 | const myList = py`[1, 2, 3, 4, 5]`; 166 | const val = myList`[2:4].index(3)`; 167 | // which is equivalent to 168 | const val = myList`[2:4]`.index(3); 169 | // which is equivalent to 170 | const val = myList`[2:4].index(3)`; 171 | // which is equivalent to 172 | const val = myList`[2:4]`.index`(3)`; 173 | // which is equivalent to 174 | const val = myList`[2:4]`.index`(${py`3`})`; 175 | ``` 176 | 177 | ### Decoders 178 | 179 | In order to read values from python in JavaScript, they need to be properly encoded in strings and then decoded in JavaScript. Custom decoders can be defined as such: 180 | 181 | ```ts 182 | await shell.addDecoder({ 183 | // the fully qualified type name 184 | typeName: 'pandas._libs.tslibs.timestamps.Timestamp', 185 | // stringified lambda function to encode the python value into a string 186 | encode: 'lambda v: v.isoformat()', 187 | // function to transform the value into the desired Javascript value. 188 | // The `decode` argument can be used to recursively call the full decoder 189 | decode: (v, decode) => new Date(v).toDateString(), 190 | }); 191 | ``` 192 | 193 | ## TODO 194 | 195 | - [ ] gc 196 | -------------------------------------------------------------------------------- /src/py.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-classes-per-file */ 2 | /* eslint-disable @typescript-eslint/no-use-before-define */ 3 | import PythonShell, { PythonShellConfig } from './PythonShell'; 4 | 5 | export interface PyBase { 6 | x: (strings: readonly string[], ...vars: PyVar[]) => Promise; 7 | e: (strings: readonly string[], ...vars: PyVar[]) => Promise; 8 | import: (name: string) => Promise; 9 | shell: PythonShell; 10 | } 11 | 12 | export interface PythonEnv extends PyBase { 13 | (strings: readonly string[], ...vars: TemplateValue[]): PyVar; 14 | } 15 | 16 | type TemplateValue = 17 | | PyVariable 18 | | string 19 | | number 20 | | boolean 21 | | null 22 | | TemplateValue[] 23 | | { [idx: string]: TemplateValue }; 24 | 25 | function isPyVariable(v: TemplateValue): v is PyVariable { 26 | return !!v && typeof (v as any).varId === 'string'; 27 | } 28 | function isTemplateStrings(v: any): v is TemplateStringsArray { 29 | return v instanceof Array && (v as any).raw; 30 | } 31 | 32 | function resolveTemplateValue(v: TemplateValue): string { 33 | if (v === null) return 'None'; 34 | if (typeof v === 'string') return `"${v.replace('/"/g', '\\"')}"`; 35 | if (typeof v === 'number') return String(v); 36 | if (typeof v === 'boolean') return v ? 'True' : 'False'; 37 | if (v instanceof Array) return `[${v.map(resolveTemplateValue).join(',')}]`; 38 | if (isPyVariable(v)) return v.varId; 39 | 40 | // then it's a dict 41 | const params = Object.entries(v).map( 42 | ([k, vv]) => `${resolveTemplateValue(k)}: ${resolveTemplateValue(vv)}`, 43 | ); 44 | return `{${params.join(',')}}`; 45 | } 46 | 47 | function buildCommand(strings: readonly string[], vars: TemplateValue[]) { 48 | const cmd = strings.map( 49 | (s, i) => `${s}${i in vars ? resolveTemplateValue(vars[i]) : ''}`, 50 | ); 51 | return cmd.join(''); 52 | } 53 | 54 | export class Py implements PyBase { 55 | shell: PythonShell; 56 | 57 | varCounter = 0; 58 | 59 | constructor(init: string | PythonShellConfig) { 60 | const config = typeof init === 'string' ? { pythonPath: init } : init; 61 | this.shell = new PythonShell(config); 62 | } 63 | 64 | *getAllVars(vars: TemplateValue[]): Generator { 65 | for (const v of vars) { 66 | if (isPyVariable(v)) yield v; 67 | else if (Array.isArray(v)) { 68 | for (const vv of this.getAllVars(v)) yield vv; 69 | } else if (v instanceof Object) { 70 | for (const vv of this.getAllVars(Object.values(v))) yield vv; 71 | } 72 | } 73 | } 74 | 75 | getCommandObject(strings: readonly string[], vars: TemplateValue[]) { 76 | const pyVars = Array.from(this.getAllVars(vars)); 77 | 78 | return { 79 | cmd: buildCommand(strings, vars), 80 | pyVars, 81 | resolve: () => Promise.all(pyVars.map((v) => v.resolver)), 82 | }; 83 | } 84 | 85 | x = async (strings: readonly string[], ...vars: PyVar[]) => { 86 | const { cmd, resolve } = this.getCommandObject(strings, vars); 87 | await resolve(); 88 | await this.shell.sendAndReceive('EXEC', cmd); 89 | }; 90 | 91 | e = async (strings: readonly string[], ...vars: TemplateValue[]) => { 92 | const { cmd, resolve } = this.getCommandObject(strings, vars); 93 | await resolve(); 94 | return this.shell.sendAndReceive('EVAL', cmd); 95 | }; 96 | 97 | import = async (name: string) => { 98 | await this.x([`import ${name}`]); 99 | return this.expr([name]); 100 | }; 101 | 102 | expr = (strings: readonly string[], ...vars: TemplateValue[]): PyVar => { 103 | const { cmd, resolve } = this.getCommandObject(strings, vars); 104 | const varId = `v${this.varCounter++}`; 105 | 106 | const resolver = resolve().then(async () => { 107 | await this.shell.sendAndReceive('EXEC', `${varId}=${cmd}`); 108 | }); 109 | return getPyVar(this, varId, resolver); 110 | }; 111 | } 112 | 113 | export class PyVariable { 114 | constructor( 115 | private py: Py, 116 | public varId: string, 117 | public resolver: Promise, 118 | ) {} 119 | 120 | get _() { 121 | return this.resolver.then(() => this.py.e`${this}`); 122 | } 123 | } 124 | 125 | export class Kwargs { 126 | // eslint-disable-next-line no-shadow 127 | constructor(public kwargs: Dict) {} 128 | } 129 | export function kwargs(k: Dict) { 130 | return new Kwargs(k); 131 | } 132 | 133 | export type PyArgs = TemplateValue | Kwargs; 134 | 135 | export interface PyVarDict { 136 | [idx: string]: PyVar; 137 | } 138 | export type PyVar = PyVarDict & 139 | PyVariable & 140 | ((...a: PyArgs[]) => PyVar) & 141 | ((strings: readonly string[], ...vars: PyVar[]) => PyVar); 142 | 143 | function getPyVar(py: Py, varId: string, resolver: Promise): PyVar { 144 | const pyVar = new PyVariable(py, varId, resolver); 145 | 146 | return new Proxy(() => null, { 147 | get: (_: unknown, key: string): PyVar => { 148 | if (key === 'then') { 149 | return undefined as any; 150 | } 151 | if (key in pyVar) return (pyVar as any)[key]; 152 | 153 | if (!Number.isNaN(Number(key))) { 154 | return py.expr(['', `[${Number(key)}]`], pyVar); 155 | } 156 | if (!/^[a-zA-Z_][\w]*$/.test(key)) { 157 | return py.expr(['', `[${resolveTemplateValue(key)}]`], pyVar); 158 | } 159 | return py.expr(['', `.${key}`], pyVar); 160 | }, 161 | apply: ( 162 | _target, 163 | _thisArg, 164 | allArgs: PyArgs[] | [TemplateStringsArray, ...PyVar[]], 165 | ): PyVar => { 166 | if (isTemplateStrings(allArgs[0])) { 167 | const [strings, ...templateVars] = allArgs as [ 168 | TemplateStringsArray, 169 | ...PyVar[] 170 | ]; 171 | return py.expr(['', ...strings], ...[pyVar, ...templateVars]); 172 | } 173 | 174 | const pyArgs = allArgs as PyArgs[]; 175 | 176 | const strings: string[] = ['', '(']; 177 | const vars: TemplateValue[] = [pyVar]; 178 | pyArgs.forEach((a, i) => { 179 | const isLast = i + 1 === pyArgs.length; 180 | if (!isLast) { 181 | if (a instanceof Kwargs) { 182 | throw new Error('kwargs need to be last'); 183 | } 184 | vars.push(a); 185 | strings.push(','); 186 | } 187 | if (isLast) { 188 | if (a instanceof Kwargs) { 189 | strings.pop(); // remove last string 190 | Object.entries(a.kwargs).forEach(([k, v], ki) => { 191 | const isFirstKwarg = ki === 0; 192 | if (isFirstKwarg && pyArgs.length < 2) { 193 | strings.push(`(${k}=`); 194 | } else { 195 | strings.push(`,${k}=`); 196 | } 197 | vars.push(v); 198 | }); 199 | } else { 200 | vars.push(a); 201 | } 202 | } 203 | }); 204 | strings.push(')'); 205 | return py.expr(strings, ...vars); 206 | }, 207 | }) as any; 208 | } 209 | 210 | export function createPythonEnv( 211 | pythonPath: string | PythonShellConfig, 212 | ): PythonEnv { 213 | const pyObj = new Py(pythonPath); 214 | 215 | const py = pyObj.expr; 216 | 217 | return Object.assign(py, pyObj); 218 | } 219 | --------------------------------------------------------------------------------