├── .editorconfig ├── .eslintrc.cjs ├── .gitignore ├── .prettierignore ├── .prettierrc.json ├── .vscode ├── extensions.json ├── settings.json └── tasks.json ├── LICENSE ├── README.md ├── lerna.json ├── package.json ├── packages ├── core-protocol │ ├── .npmignore │ ├── README.md │ ├── package.json │ ├── src │ │ ├── global.ts │ │ ├── index.ts │ │ ├── naming │ │ │ ├── generated-type-names.ts │ │ │ ├── name-pattern.ts │ │ │ ├── pipe-names.ts │ │ │ ├── project-names.ts │ │ │ └── resource-names.ts │ │ ├── pipe │ │ │ ├── bundle-schema.ts │ │ │ ├── execution-result.ts │ │ │ ├── pipe.ts │ │ │ └── publish-args.ts │ │ ├── resources │ │ │ ├── base-resource.ts │ │ │ ├── buffer-document.ts │ │ │ ├── cloud-storage-resource.ts │ │ │ ├── file-resource.ts │ │ │ ├── index.ts │ │ │ ├── resource-map.ts │ │ │ └── s3-resource.ts │ │ └── utils.ts │ └── tsconfig.json ├── core │ ├── .npmignore │ ├── README.md │ ├── package.json │ ├── src │ │ ├── define-pipe.ts │ │ ├── dump.ts │ │ ├── index.ts │ │ ├── libraries │ │ │ ├── date-fns.ts │ │ │ ├── html-to-text.ts │ │ │ └── node-html-parser.ts │ │ ├── typed.ts │ │ └── utils │ │ │ ├── array-utils.ts │ │ │ ├── bigquery │ │ │ ├── bigquery-table.ts │ │ │ ├── jsonschema-bigquery.d.ts │ │ │ └── schema.ts │ │ │ ├── extract-json-assignments.ts │ │ │ ├── pick.ts │ │ │ └── round.ts │ └── tsconfig.json ├── create-typestream │ ├── .npmignore │ ├── bin │ │ └── run.js │ ├── package.json │ ├── samples │ │ └── get-started.ts │ ├── src │ │ ├── async-pipe-out.ts │ │ ├── create-env.ts │ │ ├── create-project-files.ts │ │ ├── create-tutorial-pipe.ts │ │ ├── get-package.ts │ │ ├── get-project-name.ts │ │ ├── index.ts │ │ ├── initialize-git.ts │ │ ├── install-dependencies.ts │ │ ├── install-package.ts │ │ ├── log-stage.ts │ │ └── print-getting-started.ts │ └── tsconfig.json ├── sdk │ ├── .npmignore │ ├── README.md │ ├── bin │ │ └── run.js │ ├── package.json │ ├── src │ │ ├── commands │ │ │ ├── cloud.ts │ │ │ ├── create-pipe.ts │ │ │ ├── process.ts │ │ │ └── watch.ts │ │ ├── hooks │ │ │ └── init │ │ │ │ └── assert-node-version.ts │ │ ├── index.ts │ │ ├── paths │ │ │ ├── load-all-paths.ts │ │ │ ├── pipe-paths.ts │ │ │ └── project-paths.ts │ │ ├── pipe │ │ │ ├── bundling │ │ │ │ ├── build.ts │ │ │ │ ├── commonjs-polyfills.ts │ │ │ │ └── load-pipe-bundle.ts │ │ │ └── creation │ │ │ │ ├── check-pipe-exists.ts │ │ │ │ ├── create-pipe.ts │ │ │ │ └── get-pipe-code.ts │ │ ├── process │ │ │ ├── publish-documents.ts │ │ │ └── run-pipe-process.ts │ │ ├── project │ │ │ └── get-project-name.ts │ │ ├── resources │ │ │ ├── providers │ │ │ │ ├── cloud-storage-provider.ts │ │ │ │ ├── file-resource-provider.ts │ │ │ │ ├── index.ts │ │ │ │ ├── resource-provider.ts │ │ │ │ └── s3-provider.ts │ │ │ ├── resource-paths.ts │ │ │ └── samples │ │ │ │ └── sample-provider.ts │ │ ├── runner │ │ │ ├── pipe-controller.ts │ │ │ ├── pipe-executer.ts │ │ │ ├── rpc.ts │ │ │ └── runner-functions.ts │ │ ├── typing │ │ │ ├── generate-type-map.ts │ │ │ ├── process-schemas.ts │ │ │ ├── schema-sample-capturer.ts │ │ │ └── types-from-json-schema.ts │ │ ├── utils │ │ │ ├── array-utils.ts │ │ │ ├── ask.ts │ │ │ ├── async-gen-to-array.ts │ │ │ ├── catch-but-not-really.ts │ │ │ ├── chalk-extensions.ts │ │ │ ├── data-dumper.ts │ │ │ ├── error-logger.ts │ │ │ ├── load-project-env.ts │ │ │ ├── log-replace.ts │ │ │ ├── observe-async.ts │ │ │ ├── observe-directory.ts │ │ │ ├── promise-pool.ts │ │ │ ├── read-dir.ts │ │ │ └── read-full-stream.ts │ │ └── watch │ │ │ ├── error-summary.ts │ │ │ ├── render-watch-progress.ts │ │ │ ├── run-pipe.ts │ │ │ └── watch-progress.ts │ └── tsconfig.json └── tyst │ ├── .npmignore │ ├── index.js │ └── package.json ├── tsconfig.eslint.json └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | parser: '@typescript-eslint/parser', 4 | parserOptions: { 5 | tsconfigRootDir: __dirname, 6 | project: [ 7 | './tsconfig.eslint.json', 8 | './workspace/*/tsconfig.json', 9 | './packages/*/tsconfig.json', 10 | ], 11 | extraFileExtensions: ['.cjs'], 12 | }, 13 | plugins: ['@typescript-eslint'], 14 | extends: [ 15 | 'eslint:recommended', 16 | 'plugin:@typescript-eslint/recommended', 17 | 'plugin:unicorn/recommended', 18 | 'plugin:import/recommended', 19 | 'plugin:import/typescript', 20 | 'prettier', 21 | ], 22 | rules: { 23 | // Already checked by TypeScript 24 | 'no-undef': 'off', 25 | 'import/no-unresolved': 'off', 26 | 27 | // We know what we're doing™ 28 | '@typescript-eslint/no-namespace': 'off', 29 | '@typescript-eslint/no-explicit-any': 'off', 30 | '@typescript-eslint/no-non-null-assertion': 'off', 31 | '@typescript-eslint/ban-types': 'off', 32 | 'unicorn/prevent-abbreviations': 'off', 33 | 'unicorn/no-process-exit': 'off', 34 | 'unicorn/prefer-export-from': 'off', 35 | 'unicorn/no-null': 'off', 36 | 'unicorn/numeric-separators-style': 'off', 37 | 'unicorn/prefer-json-parse-buffer': 'off', 38 | 'unicorn/text-encoding-identifier-case': 'off', 39 | 'unicorn/no-array-for-each': 'off', 40 | 'unicorn/filename-case': [ 41 | 'error', 42 | { case: 'kebabCase', ignore: [/\/generated-types\//] }, 43 | ], 44 | 45 | // Additional code quality rules 46 | '@typescript-eslint/await-thenable': 'error', 47 | '@typescript-eslint/no-misused-promises': [ 48 | 'error', 49 | { checksVoidReturn: false }, 50 | ], 51 | '@typescript-eslint/no-floating-promises': 'error', 52 | '@typescript-eslint/no-unnecessary-type-assertion': 'error', 53 | '@typescript-eslint/restrict-template-expressions': [ 54 | 'error', 55 | { allowAny: true, allowUnknown: true }, 56 | ], 57 | '@typescript-eslint/restrict-plus-operands': 'error', 58 | 59 | // Additional code style rules 60 | 'object-shorthand': 'error', 61 | 'import/order': [ 62 | 'error', 63 | { 64 | groups: ['builtin', 'external', 'internal'], 65 | pathGroupsExcludedImportTypes: [], 66 | alphabetize: { order: 'asc' }, 67 | 'newlines-between': 'always', 68 | }, 69 | ], 70 | 'import/newline-after-import': ['error', { count: 1 }], 71 | }, 72 | } 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/node_modules 2 | .env 3 | **/dist 4 | **/.DS_Store 5 | /workspace 6 | /dev-project 7 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | **/dist 2 | **/builds 3 | **/sample-data 4 | **/generated-types 5 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "trailingComma": "all", 4 | "singleQuote": true, 5 | "arrowParens": "avoid" 6 | } 7 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "EditorConfig.EditorConfig", 4 | "esbenp.prettier-vscode", 5 | "dbaeumer.vscode-eslint" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.rulers": [80, 120], 3 | 4 | // Prettier for automatic code formatting 5 | "[typescript][javascript][json][jsonc][yaml]": { 6 | "editor.formatOnSave": true, 7 | "editor.defaultFormatter": "esbenp.prettier-vscode" 8 | }, 9 | 10 | // ESLint for code quality rules 11 | "eslint.validate": ["javascript", "typescript"], 12 | "eslint.runtime": "node", 13 | "editor.codeActionsOnSave": { "source.fixAll": true }, 14 | 15 | "typescript.preferences.importModuleSpecifierEnding": "js", 16 | "typescript.tsdk": "node_modules/typescript/lib", 17 | "typescript.enablePromptUseWorkspaceTsdk": true, 18 | 19 | "debug.javascript.autoAttachFilter": "onlyWithFlag", 20 | "debug.javascript.terminalOptions": { 21 | "skipFiles": ["/**", "**/node_modules/**"] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "npm", 6 | "script": "lint", 7 | "problemMatcher": ["$eslint-stylish"], 8 | "label": "npm: lint", 9 | "detail": "eslint --ext .js,.ts --ignore-path .gitignore packages/*/src" 10 | }, 11 | { 12 | "type": "npm", 13 | "script": "watch", 14 | "group": "build", 15 | "problemMatcher": [], 16 | "label": "npm: watch", 17 | "detail": "yarn build && lerna run watch --parallel", 18 | "runOptions": { "runOn": "folderOpen" } 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Scopas Technologies GmbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TypeStream 2 | 3 | > Next-generation data transformation framework for TypeScript that puts developer experience first 4 | 5 | [![](https://img.shields.io/npm/v/@typestream/sdk?style=flat-square)](https://www.npmjs.com/package/@typestream/sdk) 6 | 7 | Nowadays, almost every developer is working with increasingly complex and varying types of data. 8 | While tooling for this problem already exists, current solutions are heavy to use, targeted towards big enterprises 9 | and put little to no emphasis on developer experience. 10 | 11 | TypeStream allows you to get started within seconds, iterate blazingly fast over type-safe transformation code and work with common data storage services either locally or in the cloud. 12 | 13 | Here's how it could be integrated into your workflow: 14 | 15 | ![](https://storage.googleapis.com/typestream-demo-content/flow-illustration.png) 16 | 17 | ## Getting started 18 | 19 | Make sure you have [Node.js](https://nodejs.dev/) (at least 16.0.0) installed and scaffold a new project using: 20 | 21 | ```bash 22 | $ npm init typestream -- --get-started 23 | ``` 24 | 25 | ### Opening the project 26 | 27 | > **Note:** Right now, we only officially support [Visual Studio Code](https://code.visualstudio.com/) as some important TypeStream features like zero-setup debugging require editor-specific configuration. 28 | 29 | To get started developing your project, open the created folder in VS Code. At this point, you will probably be asked whether you want to use the workspace TypeScript version: press "Allow" to continue. If you don't see the prompt, you can also [configure this manually](https://code.visualstudio.com/docs/typescript/typescript-compiling#_using-the-workspace-version-of-typescript). 30 | 31 | ### Working on a pipe 32 | 33 | Pipes are at the core of what TypeStream does as they contain the data transformation code of your project. Since you've specified the `--get-started` flag while creating the project, you should already see a pipe under `src/pipes/transform-product.ts`. Feel free to read through it to get a general idea of what it contains. 34 | 35 | To try out the pipe and experiment with changes, you can start TypeStream in watch mode. To do that, open up an [integrated terminal](https://code.visualstudio.com/docs/editor/integrated-terminal) (this is necessary for debugging support) and run the following command: 36 | 37 | ``` 38 | $ npx tyst watch 39 | ``` 40 | 41 | Make sure to replace `` with the name of the pipe you want to work on. If you're following the getting started guide, that's going to be `transform-product`. 42 | 43 | If everything's working correctly, TypeStream should now download a number of sample files and then attempt to process them using the pipe. Since you're in watch mode, TypeStream will start over whenever you save the file, allowing you to quickly experiment with changes to your transformation. 44 | 45 | At this point, feel free to play around with the code and give all of TypeStream's different features a try, some of which are documented in the example file, others right here in the README. 46 | 47 | > If you get stuck with anything, want to suggest a new feature, or share general feedback, please don't hesitate to reach out to us by [creating an issue](https://github.com/scopashq/typestream/issues) — we'd love to hear from you! ❤️ 48 | 49 | ## Features 50 | 51 | ### Iterate blazingly fast over your transformation code 52 | 53 | When writing software, being able to directly see how the changes you've made affect the output is a key feature for efficient and fun development. Thus we have designed TypeStream in a way that let's you see your transformed data anywhere in your pipeline and update it every time you save your code. 54 | If there are errors in your transformation you will get an aggregated overview over the complete sample of datapoints your testing on. 55 | 56 | ![](https://storage.googleapis.com/typestream-demo-content/dump.gif) 57 | 58 | ### Step into edge cases, right when they are happening 59 | 60 | When working with a lot of data, it's impossible to know every edge case upfront. That's why you'll hit a breakpoint right when an edge case breaks your transformation code to see what the outlier data looks like. 61 | You can also set your own breakpoints anywhere in your transformation code and step through one data sample at a time 62 | 63 | ![](https://storage.googleapis.com/typestream-demo-content/error.gif) 64 | 65 | ### Automatic type inference 66 | 67 | Everyone who has used a strictly typed language before will love features like advanced IntelliSense, catching bugs at compile-time and the like. Using `typed` you can infer the type of any variable in your pipe based on a statistically relevant sample. 68 | 69 | ![](https://storage.googleapis.com/typestream-demo-content/typed.gif) 70 | 71 | ### Data source agnostic 72 | 73 | Want to read and write data from your local file system, Google Cloud Storage, S3, BigQuery or Redshift? All at once? 74 | No problem! TypeStream’s modular resource system allows to read from and write to most common storage systems. 75 | 76 | ### Multi-step pipelines 77 | 78 | To keep things more maintainable or to aggregate multiple streams of data into one you can push into a resource in one pipe and consume it in the next. 79 | 80 | ## Concepts 81 | 82 | The three core concepts to understand when working with TypeStream are **resources**, **documents** and **pipes**. 83 | To make each of them more tangible, we will work with an example use-case. If you want to get a more hands-on feeling 84 | for them, you can also use the getting started guide. 85 | An example use case could be that you have raw product data of two different eCommerce platforms - let's say Amazon and eBay. Your goal is to take the raw data from each provider, transform it into a common format and put it into a common storage so you can work with it. 86 | 87 | ### Resources 88 | 89 | One resource holds many documents that are all described by the same concept and have a similar structure. Each resource will also have different metadata 90 | that describe where its data can be retrieved from. Thus, for all of your raw amazon and ebay products you could define your resources as follows: 91 | 92 | ```typescript 93 | const amazonProduct = new S3Resource('raw-amazon-product', { 94 | region: 'eu-central-1', 95 | bucket: 'business-data', 96 | pathPrefix: 'amazon-products/2022/', 97 | }) 98 | 99 | const ebayProduct = new CloudStorageResource('raw-ebay-product', { 100 | cloudStorageProject: 'typestream', 101 | bucket: 'business-data', 102 | pathPrefix: 'ebay-products/2022/', 103 | }) 104 | 105 | // Used to write the transformed data into 106 | const allProducts = new FileResource('transformed-product', { 107 | basePath: '/Users/typestream/data', 108 | recursive: true, 109 | }) 110 | ``` 111 | 112 | Note that for each type of storage there will be a different resource class with different 113 | kinds of parameters required. As of now, TypeStream supports the following resources: 114 | 115 | - Google Cloud Storage 116 | - AWS S3 117 | - BigQuery 118 | - AWS Redshift (coming soon...) 119 | - Local file system 120 | 121 | The standard authentication method for both GCP and AWS is authentication via default credentials. You can find 122 | the documentation on how to set up these for each platform here: 123 | 124 | - [Setting up Google Cloud Platform default credentials](https://cloud.google.com/sdk/gcloud/reference/auth/application-default) 125 | - [Setting up AWS default credentials](https://docs.aws.amazon.com/accounts/latest/reference/root-user-access-key.html) 126 | 127 | Alternatively, you can also provide explicit authentication for a project. If these environment variables are set, default credentials will be ignored entirely. You can set the environment variables by putting their values in the generated `.env` file of your project: 128 | 129 | - `GOOGLE_APPLICATION_CREDENTIALS`, which has to be a path to a service-account key. [Use the docs for reference](https://cloud.google.com/docs/authentication/getting-started) 130 | - `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. [Use the docs for reference](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) 131 | 132 | ### Documents 133 | 134 | Documents are the containers of the data you’re working with. While you will never have to create a document yourself because TypeStream takes care of this under the hood, it makes sense to understand their properties. 135 | 136 | Each document has data which will usually be in the form of a Buffer. You can call the `read()` method 137 | of the document to retrieve the data in raw form or helpers like `asJson()`, `asHtml()` or `asText()` to automatically parse the data into the respective format. If the document doesn't contain i.e. valid JSON, an error will be thrown. 138 | 139 | ```typescript 140 | const buffer = await doc.read() // Buffer 141 | const json = await doc.asJson() // any 142 | const html = await doc.asHtml() // HTMLElement (node-html-parser) 143 | const text = await doc.asText() // string 144 | ``` 145 | 146 | You can also work with the document’s metadata without ever calling `read()` on it. What this looks like is 147 | dependent on what kind of resource the document belongs to. Metadata could for example hold information 148 | about the MIME-type of a Google Cloud Storage object or the path of a file in the local file system. 149 | 150 | ```typescript 151 | if (doc.metadata.contentType === 'application/json') console.log('Found JSON!') 152 | ``` 153 | 154 | ### Pipes 155 | 156 | Pipes are the essential building blocks when working with TypeStream. You can think of them as connectors 157 | between resources. 158 | 159 | Each pipe has an origin resource from which it will consume data. When defining the pipe, you can transform 160 | the data of a document and then publish it to one or more target resources. 161 | 162 | Screenshot 2022-03-18 at 14 00 33 163 | 164 | Working with the example from above, you could write a pipe that reads the documents from `amazonProducts`, 165 | transforms them in any desired way and publishes them to the `allProducts` resource. 166 | 167 | ```typescript 168 | export default definePipe(ebayProducts, async ctx => { 169 | const rawProduct = typed('RawProduct', await ctx.doc.asJson()) 170 | 171 | const transformedData = ctx.publish({ 172 | // Your transformation code goes here... 173 | resource: allProducts, 174 | data: transformedData, 175 | metdata: { name: transformedData.name }, 176 | }) 177 | }) 178 | ``` 179 | 180 | You can now write a second pipe for your `ebayProducts` resource and also publish them into `allProducts`. 181 | When hosted via TypeStream Cloud, these pipes will listen for new objects being added to your resources 182 | and process them automatically. 183 | 184 | # Transformation utilities 185 | 186 | Transforming a lot of data, you easily find yourself repeating different processes time over time. To mitigate this problem TypeStream comes with a few simple utitilities. Each of these utilities is further documented in the TypeStream library 187 | 188 | ### `dump()` 189 | 190 | While using `tyst watch` on a pipe, `dump()` can be used to store all intermediate results into a single file. This can be used to quickly understand how changes in the transformation code affect the output. Every time you save your pipe, dump will overwrite the new intermediate results. 191 | 192 | ```typescript 193 | const intermediateResult = { 194 | /** ...your data here*/ 195 | } 196 | dump(intermediateResult) 197 | ``` 198 | 199 | ### `pick()` 200 | 201 | `pick()` can be used to comfortably select a few keys from a messy object. If the object is typed, there will also be autocomplete/type errors on the keys you choose. 202 | 203 | ```typescript 204 | const messyObject = { key1: 1, key2: 2, key3: 3, key4: 4, key5: 5 } 205 | const prunedObject = pick(messyObject, ['key1', 'key3']) 206 | ``` 207 | 208 | ### Hydration utilities 209 | 210 | When extracting data from server side rendered applications, automatically extracting the hydration from an HTML response can save a lot of time and nerves. 211 | 212 | ```typescript 213 | const hydration = extractJsonAssignments(htmlString) 214 | const hydration = extractJsonAssignmentsFromDocument(htmlElement) 215 | const hydration = extractJsonScriptsFromDocument(htmlElement) 216 | ``` 217 | 218 | ### Array utilities 219 | 220 | Utilities to write more readable code when dealing with arrays 221 | 222 | ```typescript 223 | products.sort(basedOn(_ => _.price, 'desc')) 224 | products.sort(basedOnKey('price', 'desc')) 225 | products.sort( 226 | basedOnMultiple([ 227 | ['price', 'desc'], 228 | ['discount', 'asc'], 229 | ]), 230 | ) 231 | 232 | sumOf(products.map(product => product.price)) 233 | ``` 234 | -------------------------------------------------------------------------------- /lerna.json: -------------------------------------------------------------------------------- 1 | { 2 | "npmClient": "yarn", 3 | "useWorkspaces": true, 4 | "version": "independent" 5 | } 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "private": true, 3 | "type": "module", 4 | "workspaces": { 5 | "packages": [ 6 | "packages/*" 7 | ] 8 | }, 9 | "scripts": { 10 | "build": "yarn && lerna run build", 11 | "check-deps": "lerna run check-deps", 12 | "clean": "lerna run clean --parallel && rimraf node_modules/", 13 | "fix": "yarn lint --fix ; yarn format --list-different", 14 | "format": "prettier --write .", 15 | "lint": "eslint --ext .js,.ts --ignore-path .gitignore packages/*/src", 16 | "release:publish": "yarn build && lerna publish from-git", 17 | "release:version": "lerna version --no-private --message 'Publish packages'", 18 | "watch": "yarn build && lerna run watch --parallel" 19 | }, 20 | "devDependencies": { 21 | "@typescript-eslint/eslint-plugin": "^5.15.0", 22 | "@typescript-eslint/parser": "^5.15.0", 23 | "depcheck": "^1.4.3", 24 | "eslint": "^8.11.0", 25 | "eslint-config-prettier": "^8.5.0", 26 | "eslint-plugin-import": "^2.25.4", 27 | "eslint-plugin-unicorn": "^41.0.0", 28 | "lerna": "^4.0.0", 29 | "prettier": "^2.5.1", 30 | "prettier-plugin-packagejson": "^2.2.15", 31 | "rimraf": "^3.0.2", 32 | "typescript": "^4.7.0-dev.20220216" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /packages/core-protocol/.npmignore: -------------------------------------------------------------------------------- 1 | # Necessary because npm otherwise completely ignores gitignored files even if 2 | # they are explicitly listed under `files` in the `package.json` file 3 | # See: https://npm.github.io/publishing-pkgs-docs/publishing/the-npmignore-file.html 4 | -------------------------------------------------------------------------------- /packages/core-protocol/README.md: -------------------------------------------------------------------------------- 1 | # [TypeStream](https://typestream.dev) core protocol 2 | 3 | > This package is part of the [**TypeStream** data transformation framework](https://typestream.dev). 4 | -------------------------------------------------------------------------------- /packages/core-protocol/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@typestream/core-protocol", 3 | "version": "0.0.5", 4 | "description": "Internal package used by TypeStream to communicate between the SDK and project code", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/scopashq/typestream", 8 | "path": "packages/core-protocol" 9 | }, 10 | "license": "MIT", 11 | "author": "Scopas Technologies GmbH", 12 | "type": "module", 13 | "exports": { 14 | ".": "./dist/index.js", 15 | "./utils": "./dist/utils.js", 16 | "./resources": "./dist/resources/index.js" 17 | }, 18 | "files": [ 19 | "dist" 20 | ], 21 | "scripts": { 22 | "build": "rimraf dist/ && tsc", 23 | "check-deps": "depcheck", 24 | "clean": "rimraf dist/ node_modules/", 25 | "watch": "tsc --watch --preserveWatchOutput" 26 | }, 27 | "dependencies": { 28 | "@aws-sdk/client-s3": "^3.55.0", 29 | "node-html-parser": "^5.3.3", 30 | "zod": "^3.11.6" 31 | }, 32 | "devDependencies": { 33 | "@types/node": "16", 34 | "typescript": "^4.7.0-dev.20220216" 35 | }, 36 | "engines": { 37 | "node": ">=16.0.0" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /packages/core-protocol/src/global.ts: -------------------------------------------------------------------------------- 1 | // Only way to define globals 2 | /* eslint-disable no-var */ 3 | 4 | type GlobalDumpFunction = (options: { 5 | data: any 6 | name: string 7 | skipDuplicates: boolean 8 | }) => void 9 | declare global { 10 | /** 11 | * This function is used to capture type samples in watch mode. 12 | * It is set by the pipe-executor. 13 | */ 14 | var typestreamCaptureTypeSample: 15 | | ((name: string, data: any) => void) 16 | | undefined 17 | 18 | /** 19 | * This function cab be globally registered to allow dumping data in any pipe 20 | * at any time. It is set by the pipe-executor. 21 | */ 22 | var typestreamWriteDump: GlobalDumpFunction | undefined 23 | 24 | /** 25 | * This variable is used for toggling writes from pipes. E.g. when we use bigquery 26 | * we can disable writes during watch mode. 27 | */ 28 | var typestreamWritingActive: boolean 29 | } 30 | 31 | globalThis.typestreamWritingActive = false 32 | 33 | export {} 34 | -------------------------------------------------------------------------------- /packages/core-protocol/src/index.ts: -------------------------------------------------------------------------------- 1 | export { BundleSchema } from './pipe/bundle-schema.js' 2 | export { Pipe } from './pipe/pipe.js' 3 | export type { PublishArgs } from './pipe/publish-args.js' 4 | export { NAME_PATTERN, FILE_NAME_PATTERN } from './naming/name-pattern.js' 5 | 6 | import './global.js' 7 | -------------------------------------------------------------------------------- /packages/core-protocol/src/naming/generated-type-names.ts: -------------------------------------------------------------------------------- 1 | export const GENERATED_TYPE_NAME_PATTERN = /^[A-Z][\dA-Za-z]*$/ 2 | 3 | export function assertGeneratedTypeName(name: string) { 4 | if (!GENERATED_TYPE_NAME_PATTERN.test(name)) 5 | throw new Error( 6 | `"${name}" is not a valid type name! Type names must start with a capital letter and can only include letters and numbers.`, 7 | ) 8 | } 9 | -------------------------------------------------------------------------------- /packages/core-protocol/src/naming/name-pattern.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * All namings of resources, pipes and projects must follow this pattern. 3 | * Making it more restrictive will invalidate all live pipes. 4 | */ 5 | export const NAME_PATTERN = /^[\da-z]+([_-][\da-z]+)*$/ 6 | export const NAME_PATTERN_EXPLANATION = 7 | /* 'names must' + */ 'only consist of lowercase letters, numbers, and dashes' 8 | 9 | /** 10 | * Regex for a valid unix path according to: 11 | * https://stackoverflow.com/a/537876 12 | */ 13 | export const FILE_NAME_PATTERN = /[^\0]+/ 14 | -------------------------------------------------------------------------------- /packages/core-protocol/src/naming/pipe-names.ts: -------------------------------------------------------------------------------- 1 | import { NAME_PATTERN, NAME_PATTERN_EXPLANATION } from './name-pattern.js' 2 | 3 | export const PIPE_NAME_PATTERN = NAME_PATTERN 4 | 5 | export function assertPipeName(name: string) { 6 | if (!NAME_PATTERN.test(name)) 7 | throw new Error( 8 | `"${name}" is not a valid pipe name.` + 9 | ` Pipes names must ${NAME_PATTERN_EXPLANATION}.`, 10 | ) 11 | } 12 | -------------------------------------------------------------------------------- /packages/core-protocol/src/naming/project-names.ts: -------------------------------------------------------------------------------- 1 | import { NAME_PATTERN, NAME_PATTERN_EXPLANATION } from './name-pattern.js' 2 | 3 | export const PROJECT_NAME_PATTERN = NAME_PATTERN 4 | 5 | export function assertProjectName(name: string) { 6 | if (!NAME_PATTERN.test(name)) 7 | throw new Error( 8 | `"${name}" is not a valid project name.` + 9 | ` Project names must ${NAME_PATTERN_EXPLANATION}.`, 10 | ) 11 | } 12 | -------------------------------------------------------------------------------- /packages/core-protocol/src/naming/resource-names.ts: -------------------------------------------------------------------------------- 1 | import { NAME_PATTERN, NAME_PATTERN_EXPLANATION } from './name-pattern.js' 2 | 3 | export const RESOURCE_NAME_PATTERN = NAME_PATTERN 4 | 5 | export function assertResourceName(name: string) { 6 | if (!NAME_PATTERN.test(name)) 7 | throw new Error( 8 | `"${name}" is not a valid resource name!` + 9 | ` Resource names must ${NAME_PATTERN_EXPLANATION}.`, 10 | ) 11 | } 12 | -------------------------------------------------------------------------------- /packages/core-protocol/src/pipe/bundle-schema.ts: -------------------------------------------------------------------------------- 1 | import { z, ZodType } from 'zod' 2 | 3 | import { AnyDocument, AnyResource } from '../resources/base-resource.js' 4 | import { executionResultSchema as _executionResultSchema } from './execution-result.js' 5 | import { PublishArgs } from './publish-args.js' 6 | 7 | /** 8 | * Definition of the bundle schema. This serves as the API between core package 9 | * and the rest because the core knows this way what sould be exported and TypeStream 10 | * knows what can be expected from the import. 11 | * 12 | * *Do not apply breaking changes here because it will invalidate old pipes!* 13 | */ 14 | export namespace BundleSchema { 15 | export const executionResultSchema = _executionResultSchema 16 | 17 | /** The type of `Bundle.call`. Takes a `Document` as input and returns an `executionResult` */ 18 | export type BundleFunction = ( 19 | doc: AnyDocument, 20 | callbacks: { publish: (args: PublishArgs) => void }, 21 | ) => Promise 22 | 23 | /** Just `z.function()` so that zod doesn't wrap the actual function and destroys the default debugging experience */ 24 | export const bundleFunctionSchema = z.function() as ZodType 25 | const publishCallbackSchema = z.function( 26 | z.tuple([z.any() as ZodType, any, any>], z.void()), 27 | ) 28 | 29 | export const processorArgsSchema = z.tuple([ 30 | z.any() as ZodType, 31 | publishCallbackSchema, 32 | ]) 33 | 34 | /** The schema of the pipe-bundle */ 35 | export const bundleSchema = z.object({ 36 | resource: z.any() as ZodType, 37 | protocolVersion: z.literal(1), 38 | call: bundleFunctionSchema, 39 | }) 40 | 41 | /** Type of a pipe bundle. `definePipe()` returns this and every pipe should export this as default. */ 42 | export type Bundle = z.infer 43 | 44 | /** A Promise will be returned from the BundleFunction. It contains all the data that was created by the pipe. */ 45 | export type ExecutionResult = z.infer 46 | } 47 | -------------------------------------------------------------------------------- /packages/core-protocol/src/pipe/execution-result.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod' 2 | 3 | export const executionResultSchema = z.object({}) 4 | -------------------------------------------------------------------------------- /packages/core-protocol/src/pipe/pipe.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod' 2 | 3 | import { PIPE_NAME_PATTERN } from '../naming/pipe-names.js' 4 | import { BundleSchema } from './bundle-schema.js' 5 | 6 | export namespace Pipe { 7 | export const pipeRefSchema = z.object({ 8 | name: z.string().regex(PIPE_NAME_PATTERN), 9 | projectName: z.string(), 10 | }) 11 | 12 | export type PipeRef = z.infer 13 | 14 | export const fullPipeSchema = z.object({ 15 | id: z.string(), 16 | ref: pipeRefSchema, 17 | bundle: BundleSchema.bundleSchema, 18 | }) 19 | export type FullPipe = z.infer 20 | 21 | export function pipeIdFromRef(ref: PipeRef) { 22 | pipeRefSchema.parse(ref) 23 | return `${ref.projectName}/${ref.name}` 24 | } 25 | 26 | export function pipeRefFromId(pipeId: string) { 27 | const match = pipeId.match(/^(.+)\/(.+)$/) ?? [] 28 | 29 | const projectName = match[1] 30 | const pipeName = match[2] 31 | 32 | const pipeRef = pipeRefSchema.parse({ projectName, pipeName }) 33 | 34 | return pipeRef 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /packages/core-protocol/src/pipe/publish-args.ts: -------------------------------------------------------------------------------- 1 | import { 2 | AnyResource, 3 | DataOfDocument, 4 | DocumentOfResource, 5 | PublishMetadataOfResource, 6 | } from '../resources/base-resource.js' 7 | 8 | export type PublishArgs = { 9 | resource: TargetRes 10 | data: DataOfDocument> 11 | metadata: PublishMetadataOfResource 12 | } 13 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/base-resource.ts: -------------------------------------------------------------------------------- 1 | import { isDeepStrictEqual } from 'node:util' 2 | 3 | import { assertResourceName } from '../naming/resource-names.js' 4 | import { ResourceType } from './resource-map.js' 5 | 6 | export const kDocClass = Symbol('kDocClass') 7 | 8 | export interface ResourceRef { 9 | type: ResourceType 10 | name: string 11 | options: Record 12 | } 13 | export interface DocumentRef { 14 | resourceRef: ResourceRef 15 | metadata: Record 16 | id: string 17 | } 18 | 19 | const kPublishMetadataType = Symbol('kPublishMetadataType') 20 | 21 | type ClassOf = new (...args: any[]) => T 22 | export abstract class Resource< 23 | Options extends Record, 24 | Doc extends AnyDocument, 25 | PublishMetadata extends Record, 26 | > { 27 | public abstract readonly [kDocClass]: ClassOf 28 | abstract readonly type: ResourceType 29 | 30 | constructor( 31 | /** The name of the resource (has to be unique within a project). */ 32 | public name: string, 33 | 34 | /** The options that are passed to the resource's provider. */ 35 | public options: Options, 36 | ) { 37 | assertResourceName(this.name) 38 | this.validateOptions(this.options) 39 | } 40 | 41 | abstract validateOptions(options: Options): void 42 | 43 | public toResourceRef(): ResourceRef { 44 | return { 45 | name: this.name, 46 | options: this.options, 47 | type: this.type, 48 | } 49 | } 50 | 51 | public buildDocument( 52 | documentRef: DocumentRef, 53 | read: () => Promise, 54 | ): Doc { 55 | this.assertRefEqualsThis(documentRef.resourceRef) 56 | 57 | const DocumentClass = this[kDocClass] 58 | const doc = new DocumentClass(this, documentRef.metadata, read) 59 | 60 | return doc 61 | } 62 | 63 | protected assertRefEqualsThis(resourceRef: ResourceRef) { 64 | if ( 65 | resourceRef.name === this.name && 66 | resourceRef.type === this.type && 67 | isDeepStrictEqual(resourceRef.options, this.options) 68 | ) 69 | return 70 | 71 | throw new Error('This Resource does not equal the passed ResourceRef!') 72 | } 73 | 74 | // Necessary because TypeScript otherwise "forgets" about the `Doc` type 75 | // parameter which makes it impossible to extract it using `DocumentOfResource` 76 | public readonly [kPublishMetadataType]?: PublishMetadata 77 | } 78 | 79 | export abstract class Document< 80 | Data, 81 | Metadata extends Record, 82 | Res extends AnyResource, 83 | > { 84 | constructor( 85 | /** The resource this document is from. */ 86 | public readonly resource: Res, 87 | 88 | /** Metadata about the document made accessible through getters. */ 89 | public readonly metadata: Metadata, 90 | 91 | /** Read the document's contents. */ 92 | public readonly read: () => Promise, 93 | ) {} 94 | 95 | public toDocumentRef(): DocumentRef { 96 | return { 97 | resourceRef: this.resource.toResourceRef(), 98 | id: this.id, 99 | metadata: this.metadata, 100 | } 101 | } 102 | 103 | public abstract get id(): string 104 | } 105 | 106 | export type AnyResource = Resource 107 | 108 | export type AnyDocument = Document 109 | 110 | export type DocumentOfResource = Res extends Resource< 111 | any, 112 | infer Doc, 113 | any 114 | > 115 | ? Doc 116 | : never 117 | 118 | export type ResourceOfDocument = Doc extends Document< 119 | any, 120 | any, 121 | infer Res 122 | > 123 | ? Res 124 | : never 125 | 126 | export type PublishMetadataOfResource = 127 | Res extends Resource 128 | ? PublishMetadata 129 | : never 130 | 131 | export type DataOfDocument = Doc extends Document< 132 | infer Data, 133 | any, 134 | any 135 | > 136 | ? Data 137 | : never 138 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/buffer-document.ts: -------------------------------------------------------------------------------- 1 | import { parse, HTMLElement } from 'node-html-parser' 2 | 3 | import { AnyResource, Document } from './base-resource.js' 4 | 5 | export abstract class BufferDocuments< 6 | ResourceMetadata, 7 | Resource extends AnyResource, 8 | > extends Document { 9 | /** 10 | * Read the document's content and returns 11 | * it as a JSON object. 12 | * @param encoding (default `utf-8`) 13 | * @returns JSON object 14 | */ 15 | public async asJson(encoding?: BufferEncoding): Promise { 16 | const document = await this.read() 17 | const jsonObject = JSON.parse(document.toString(encoding)) 18 | return jsonObject 19 | } 20 | 21 | /** 22 | * Read the document's content and returns it 23 | * it as text. 24 | * @param encoding (default `utf-8`) 25 | * @returns text 26 | */ 27 | public async asText(encoding?: BufferEncoding): Promise { 28 | const document = await this.read() 29 | const documentString = document.toString(encoding) 30 | return documentString 31 | } 32 | 33 | /** 34 | * Read the document's content and returns it 35 | * as an HTMLElement. 36 | * @param encoding (default `utf-8`) 37 | * @returns HTMLElement 38 | */ 39 | public async asHtml(encoding?: BufferEncoding): Promise { 40 | const document = await this.read() 41 | const documentString = document.toString(encoding) 42 | const htmlObject = parse(documentString) 43 | return htmlObject 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/cloud-storage-resource.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod' 2 | 3 | import { kDocClass, Resource } from './base-resource.js' 4 | import { BufferDocuments } from './buffer-document.js' 5 | import { ResourceType } from './resource-map.js' 6 | 7 | export type CloudStorageResourceMetadata = { 8 | kind: string 9 | id: string 10 | selfLink: string 11 | mediaLink: string 12 | name: string 13 | bucket: string 14 | generation: string 15 | metageneration: string 16 | contentType: string 17 | storageClass: string 18 | size: string 19 | md5Hash: string 20 | crc32c: string 21 | etag: string 22 | timeCreated: string 23 | updated: string 24 | timeStorageClassUpdated: string 25 | metadata: Record 26 | 27 | /** The full path without the prefix. Added by us while downloading from cloud storage */ 28 | slicedPath: string 29 | } 30 | 31 | export type CloudStoragePublishMetadata = { 32 | name: string 33 | metadata?: Record 34 | } 35 | 36 | const cloudStorageResourceOptionsSchema = z.object({ 37 | bucket: z.string(), 38 | cloudStorageProject: z.string(), 39 | pathPrefix: z.string(), 40 | }) 41 | 42 | type CloudStorageResourceOptions = z.infer< 43 | typeof cloudStorageResourceOptionsSchema 44 | > 45 | 46 | /** 47 | * @param {object} options 48 | * Resources are the core concept when thinking about where your data is stored. 49 | * With `CloudStorageResource` you can define a cloud storage data source to 50 | * which you write your transformed data or from which you read the data. 51 | * @param {string} name The name of the resource (has to be unique within a project). 52 | * @param {object} options The options needed for the Cloud Storage Reference 53 | * @param {string} cloudStorageProject The name of the Cloud Storage project 54 | * @param {string} bucket The reference name to a Cloud Storage bucket 55 | * @param {string} pathPrefix Can be thought of a directory name in a local storage system. 56 | * When reading your bucket, only objects under that path will be considered. 57 | * 58 | * @example 59 | * new CloudStorageResource('getting-started-dataset', { 60 | cloudStorageProject: 'scopas', 61 | bucket: 'typestream-datasets', 62 | pathPrefix: 'bestsellers-ecommerce', 63 | }) 64 | */ 65 | export class CloudStorageDocument extends BufferDocuments< 66 | CloudStorageResourceMetadata, 67 | CloudStorageResource 68 | > { 69 | public get id(): string { 70 | if (!this.metadata.slicedPath) 71 | throw new Error( 72 | 'Id property not available on the metadata of this cloud storage document. Make sure `metadata.slicedPath` exists!', 73 | ) 74 | return this.metadata.slicedPath 75 | } 76 | } 77 | 78 | export class CloudStorageResource extends Resource< 79 | CloudStorageResourceOptions, 80 | CloudStorageDocument, 81 | CloudStoragePublishMetadata 82 | > { 83 | readonly type: ResourceType = 'gcs' 84 | 85 | validateOptions(options: { 86 | bucket: string 87 | cloudStorageProject: string 88 | pathPrefix: string 89 | }): void { 90 | cloudStorageResourceOptionsSchema.parse(options) 91 | } 92 | 93 | [kDocClass] = CloudStorageDocument 94 | } 95 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/file-resource.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod' 2 | 3 | import { kDocClass, Resource } from './base-resource.js' 4 | import { BufferDocuments } from './buffer-document.js' 5 | import { ResourceType } from './resource-map.js' 6 | 7 | type FileResourceMetadata = { 8 | path: string 9 | } 10 | 11 | const fileResourceOptionsSchema = z.object({ 12 | basePath: z.string(), 13 | /** 14 | * If recursive is set to `true` the whole subtree below `basePath` will be 15 | * considered instead of only the files directly under `basePath` 16 | */ 17 | recursive: z.boolean(), 18 | }) 19 | 20 | export type FilePublishMetadata = { 21 | name: string 22 | } 23 | 24 | type FileResourceOptions = z.infer 25 | 26 | /** 27 | * Resources are the core concept when thinking about where your data is stored. 28 | * With `FileResource` you can define a local directory to which 29 | * you write your transformed data or from which you read the data. 30 | * 31 | * @param {string} name The name of the resource (has to be unique within a project). 32 | * @param {object} options The options describe the document location for the resource 33 | * @param {string} basePath The base path starting from your project folder 34 | * @param {boolean} recursive if `recursive` is set to `true` the whole subtree 35 | * below `basePath` will be considered instead of only the files directly under `basePath` 36 | * 37 | * @example 38 | * new FileResource('transformed-products', { 39 | * basePath: 'output' 40 | * recursive: false, 41 | * }) 42 | */ 43 | export class FileResource extends Resource< 44 | FileResourceOptions, 45 | FileDocument, 46 | FilePublishMetadata 47 | > { 48 | validateOptions(options: { basePath: string; recursive: boolean }): void { 49 | fileResourceOptionsSchema.parse(options) 50 | } 51 | 52 | readonly type: ResourceType = 'file' 53 | 54 | public [kDocClass] = FileDocument 55 | } 56 | 57 | export class FileDocument extends BufferDocuments< 58 | FileResourceMetadata, 59 | FileResource 60 | > { 61 | public get id(): string { 62 | if (!this.metadata.path) 63 | throw new Error( 64 | 'Id field on FileDocument not found! On file documents, the `metadata.path` property must be set!', 65 | ) 66 | return this.metadata.path 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/index.ts: -------------------------------------------------------------------------------- 1 | export { resourceFromRef } from './resource-map.js' 2 | 3 | export { Document, Resource, kDocClass } from './base-resource.js' 4 | export type { 5 | DocumentOfResource, 6 | ResourceOfDocument, 7 | PublishMetadataOfResource, 8 | DataOfDocument, 9 | AnyResource, 10 | AnyDocument, 11 | } from './base-resource.js' 12 | export { 13 | CloudStorageDocument, 14 | CloudStorageResource, 15 | } from './cloud-storage-resource.js' 16 | export type { S3ResourceMetadata, S3PublishMetadata } from './s3-resource.js' 17 | export { S3Document, S3Resource } from './s3-resource.js' 18 | export type { 19 | CloudStorageResourceMetadata, 20 | CloudStoragePublishMetadata, 21 | } from './cloud-storage-resource.js' 22 | export { FileDocument, FileResource } from './file-resource.js' 23 | export type { FilePublishMetadata } from './file-resource.js' 24 | 25 | export type { DocumentRef, ResourceRef } from './base-resource.js' 26 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/resource-map.ts: -------------------------------------------------------------------------------- 1 | import { Resource, ResourceRef } from './base-resource.js' 2 | import { CloudStorageResource } from './cloud-storage-resource.js' 3 | import { FileResource } from './file-resource.js' 4 | import { S3Resource } from './s3-resource.js' 5 | 6 | export type ResourceType = 'gcs' | 'file' | 's3' 7 | 8 | type ClassOf = new (...args: any[]) => T 9 | 10 | const registeredResources: Record< 11 | ResourceType, 12 | ClassOf> 13 | > = { 14 | file: FileResource, 15 | gcs: CloudStorageResource, 16 | s3: S3Resource, 17 | } 18 | 19 | export function resourceFromRef(resourceRef: ResourceRef) { 20 | const resourceClass = registeredResources[resourceRef.type] 21 | if (!resourceClass) 22 | throw new Error(`Unknown resource type (${resourceRef.type})!`) 23 | 24 | return new resourceClass(resourceRef.name, resourceRef.options) 25 | } 26 | -------------------------------------------------------------------------------- /packages/core-protocol/src/resources/s3-resource.ts: -------------------------------------------------------------------------------- 1 | import type { GetObjectCommandOutput } from '@aws-sdk/client-s3' 2 | import { z } from 'zod' 3 | 4 | import { kDocClass, Resource } from './base-resource.js' 5 | import { BufferDocuments } from './buffer-document.js' 6 | import { ResourceType } from './resource-map.js' 7 | 8 | export type S3ResourceMetadata = Omit< 9 | GetObjectCommandOutput, 10 | '$metadata' | 'Body' 11 | > & { 12 | /** The full path without the prefix. Added by us while downloading from cloud storage */ 13 | slicedPath: string 14 | } 15 | 16 | export type S3PublishMetadata = { 17 | name: string 18 | metadata?: Record 19 | } 20 | 21 | const S3ResourceOptionsSchema = z.object({ 22 | region: z.string(), 23 | bucket: z.string(), 24 | pathPrefix: z.string(), 25 | }) 26 | 27 | type S3ResourceOptions = z.infer 28 | 29 | export class S3Document extends BufferDocuments< 30 | S3ResourceMetadata, 31 | S3Resource 32 | > { 33 | public get id(): string { 34 | if (!this.metadata.slicedPath) 35 | throw new Error( 36 | 'Id property not available on the metadata of this cloud storage document. Make sure `metadata.slicedPath` exists!', 37 | ) 38 | return this.metadata.slicedPath 39 | } 40 | } 41 | 42 | /** 43 | * @param {object} options 44 | * Resources are the core concept when thinking about where your data is stored. 45 | * With `S3Resource` you can define an s3 data source to which you write your 46 | * transformed data or from which you read the data. 47 | * @param {string} name The name of the resource (has to be unique within a project). 48 | * @param {object} options The options needed for the S3 reference 49 | * @param {string} region The region of your S3 bucket. 50 | * @param {string} bucket The name of your S3 bucket 51 | * @param {string} pathPrefix Can be thought of a directory name in a local storage system. 52 | * When reading your bucket, only objects under that path will be considered. 53 | * 54 | * @example 55 | * new CloudStorageResource('getting-started-dataset', { 56 | region: 'eu-central-1', 57 | bucket: 'typestream-datasets', 58 | pathPrefix: 'bestsellers-ecommerce/2022', 59 | }) 60 | */ 61 | export class S3Resource extends Resource< 62 | S3ResourceOptions, 63 | S3Document, 64 | S3PublishMetadata 65 | > { 66 | validateOptions(options: { 67 | region: string 68 | bucket: string 69 | pathPrefix: string 70 | }): void { 71 | S3ResourceOptionsSchema.parse(options) 72 | } 73 | 74 | readonly type: ResourceType = 's3' 75 | 76 | public [kDocClass] = S3Document 77 | } 78 | -------------------------------------------------------------------------------- /packages/core-protocol/src/utils.ts: -------------------------------------------------------------------------------- 1 | export { 2 | assertGeneratedTypeName, 3 | GENERATED_TYPE_NAME_PATTERN, 4 | } from './naming/generated-type-names.js' 5 | 6 | export { PIPE_NAME_PATTERN, assertPipeName } from './naming/pipe-names.js' 7 | export { 8 | PROJECT_NAME_PATTERN, 9 | assertProjectName, 10 | } from './naming/project-names.js' 11 | export { 12 | RESOURCE_NAME_PATTERN, 13 | assertResourceName, 14 | } from './naming/resource-names.js' 15 | -------------------------------------------------------------------------------- /packages/core-protocol/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "dist", 4 | "target": "ES2021", 5 | "module": "NodeNext", 6 | "sourceMap": true, 7 | "skipLibCheck": true, 8 | "strict": true, 9 | "allowJs": true, 10 | "isolatedModules": true, 11 | "declaration": true, 12 | "rootDir": "src" 13 | }, 14 | "include": ["**/*.ts"] 15 | } 16 | -------------------------------------------------------------------------------- /packages/core/.npmignore: -------------------------------------------------------------------------------- 1 | # Necessary because npm otherwise completely ignores gitignored files even if 2 | # they are explicitly listed under `files` in the `package.json` file 3 | # See: https://npm.github.io/publishing-pkgs-docs/publishing/the-npmignore-file.html 4 | -------------------------------------------------------------------------------- /packages/core/README.md: -------------------------------------------------------------------------------- 1 | # [TypeStream](https://typestream.dev) core 2 | 3 | > This package is part of the [**TypeStream** data transformation framework](https://typestream.dev). 4 | -------------------------------------------------------------------------------- /packages/core/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@typestream/core", 3 | "version": "0.0.8", 4 | "description": "Core library to be used in TypeStream projects", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/scopashq/typestream", 8 | "path": "packages/core" 9 | }, 10 | "license": "MIT", 11 | "author": "Scopas Technologies GmbH", 12 | "type": "module", 13 | "exports": { 14 | ".": "./dist/index.js", 15 | "./date-fns": "./dist/libraries/date-fns.js", 16 | "./html-to-text": "./dist/libraries/html-to-text.js", 17 | "./html-parser": "./dist/libraries/node-html-parser.js" 18 | }, 19 | "files": [ 20 | "dist" 21 | ], 22 | "scripts": { 23 | "build": "rimraf dist/ && tsc", 24 | "check-deps": "depcheck", 25 | "clean": "rimraf dist/ node_modules/", 26 | "watch": "tsc --watch --preserveWatchOutput" 27 | }, 28 | "dependencies": { 29 | "@google-cloud/bigquery": "^5.11.0", 30 | "@typestream/core-protocol": "^0.0.5", 31 | "acorn": "^8.7.0", 32 | "acorn-walk": "^8.2.0", 33 | "date-fns": "^2.28.0", 34 | "html-to-text": "^8.1.0", 35 | "jsonschema-bigquery": "^5.0.0", 36 | "node-html-parser": "^5.2.0", 37 | "zod": "^3.11.6", 38 | "zod-to-json-schema": "^3.11.3" 39 | }, 40 | "devDependencies": { 41 | "@types/html-to-text": "^8.0.1", 42 | "typescript": "^4.7.0-dev.20220216" 43 | }, 44 | "engines": { 45 | "node": ">=16.0.0" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /packages/core/src/define-pipe.ts: -------------------------------------------------------------------------------- 1 | import { BundleSchema, PublishArgs } from '@typestream/core-protocol' 2 | import { 3 | AnyResource, 4 | AnyDocument, 5 | DocumentOfResource, 6 | } from '@typestream/core-protocol/resources' 7 | 8 | // Important to note: 9 | // publish() can receive any given resource as an argument and is in no 10 | // way related to the resource of the doc 11 | type PipeContext = { 12 | doc: Doc 13 | /** 14 | * With publish you write your transformed data to your output resource. 15 | * The target can be either a local directory or a cloud storage. 16 | * If you are done developing this pipe, type `tyst process` to actually 17 | * publish all your inputs 18 | * @param {FileResource} resource The resource to which you publish the data 19 | * @param data The data you publish (in form of a buffer) 20 | * @param {object} metadata The metadata you want to add to the data 21 | * @param {string} name The name for the document - unique for each documents 22 | */ 23 | publish: (args: PublishArgs) => void 24 | } 25 | 26 | export function definePipe( 27 | resource: SourceRes, 28 | fun: (ctx: PipeContext>) => Promise, 29 | ): BundleSchema.Bundle { 30 | const wrappedFunction = async ( 31 | doc: DocumentOfResource, 32 | callbacks: { publish: (args: PublishArgs) => void }, 33 | ) => { 34 | // PREPARE EXECUTION 35 | const ctx: PipeContext> = { 36 | doc, 37 | publish: ( 38 | args: PublishArgs, 39 | ) => { 40 | callbacks.publish(args) 41 | }, 42 | } 43 | 44 | // EXECUTION 45 | await fun(ctx) 46 | 47 | // AFTER EXECUTION 48 | const executionResult: BundleSchema.ExecutionResult = {} 49 | return executionResult 50 | } 51 | return { 52 | call: wrappedFunction as BundleSchema.BundleFunction, 53 | protocolVersion: 1, 54 | resource, 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /packages/core/src/dump.ts: -------------------------------------------------------------------------------- 1 | interface DumpOptions { 2 | /** Only if the condition is truthy, data will be dumped */ 3 | condition?: any 4 | 5 | /** Name is optional. This is the file into which data will be dumped (file in dump folder). 6 | * Defaults to `dump` and you will find the data in the `dump.json` file. 7 | */ 8 | name?: string 9 | 10 | skipDuplicates?: boolean 11 | } 12 | 13 | /** 14 | * `dump()` is a developer tool that allows you to write data to a file while 15 | * building your pipe. The JSON documents will be appended in a file to allow you 16 | * to easily see the data you are working with. 17 | * 18 | * **Dump will do nothing in production.** 19 | */ 20 | export function dump(value: any, options: DumpOptions = {}) { 21 | const { condition, name = 'default', skipDuplicates = false } = options 22 | const hasCondition = 'condition' in options 23 | 24 | if (!hasCondition || condition) 25 | globalThis.typestreamWriteDump?.({ data: value, name, skipDuplicates }) 26 | } 27 | -------------------------------------------------------------------------------- /packages/core/src/index.ts: -------------------------------------------------------------------------------- 1 | export { definePipe } from './define-pipe.js' 2 | export { dump } from './dump.js' 3 | export { typed } from './typed.js' 4 | export { 5 | basedOn, 6 | basedOnKey, 7 | basedOnMultiple, 8 | sumOf, 9 | toSum, 10 | } from './utils/array-utils.js' 11 | export { round } from './utils/round.js' 12 | 13 | export { BigQueryTable } from './utils/bigquery/bigquery-table.js' 14 | export { 15 | extractJsonAssignments, 16 | extractJsonAssignmentsFromDocument, 17 | extractJsonScriptsFromDocument, 18 | } from './utils/extract-json-assignments.js' 19 | export { pick } from './utils/pick.js' 20 | export { 21 | FileResource, 22 | CloudStorageResource, 23 | S3Resource, 24 | } from '@typestream/core-protocol/resources' 25 | export { z } from 'zod' 26 | -------------------------------------------------------------------------------- /packages/core/src/libraries/date-fns.ts: -------------------------------------------------------------------------------- 1 | export * from 'date-fns' 2 | -------------------------------------------------------------------------------- /packages/core/src/libraries/html-to-text.ts: -------------------------------------------------------------------------------- 1 | export * from 'html-to-text' 2 | -------------------------------------------------------------------------------- /packages/core/src/libraries/node-html-parser.ts: -------------------------------------------------------------------------------- 1 | export { 2 | parse as parseHtml, 3 | HTMLElement, 4 | CommentNode, 5 | Node, 6 | NodeType, 7 | TextNode, 8 | valid, 9 | } from 'node-html-parser' 10 | 11 | export type { Options } from 'node-html-parser' 12 | -------------------------------------------------------------------------------- /packages/core/src/typed.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * `typed()` is a no-op function which creates types based on the input. 3 | * @param name the name of the generated types 4 | * @param value the input from which the types are inferred 5 | * @returns the types for the input 6 | */ 7 | export function typed( 8 | name: N, 9 | value: any, 10 | ): N extends keyof TypeStream.Types ? TypeStream.Types[N] : unknown { 11 | globalThis.typestreamCaptureTypeSample?.(name, value) 12 | 13 | return value 14 | } 15 | 16 | declare global { 17 | namespace TypeStream { 18 | // Will be extended by the auto generated type map 19 | // eslint-disable-next-line @typescript-eslint/no-empty-interface 20 | export interface Types {} 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /packages/core/src/utils/array-utils.ts: -------------------------------------------------------------------------------- 1 | type Direction = 'asc' | 'desc' 2 | 3 | /** Made to be used with `.sort` (e.g. `.sort(basedOnKey('size', 'desc'))`). */ 4 | export function basedOnKey>( 5 | key: keyof T, 6 | direction: Direction, 7 | ) { 8 | return basedOn(_ => _[key], direction) 9 | } 10 | 11 | /** Made to be used with `.sort` (e.g. `.sort(basedOn(_ => _.size, 'desc'))`). */ 12 | export function basedOn(argFn: (arg: T) => number, direction: Direction) { 13 | const sortFn = 14 | direction === 'asc' 15 | ? (x: number, y: number) => x - y 16 | : (x: number, y: number) => y - x 17 | 18 | return (a: T, b: T) => sortFn(argFn(a), argFn(b)) 19 | } 20 | 21 | /** 22 | * Like `basedOn`, but with support for multiple sorting criteria: 23 | * 24 | * ``` 25 | * items.sort(basedOnMultiple([ 26 | * [_ => _.size, 'desc'], 27 | * [_ => _.createdAt, 'asc'], 28 | * ])) 29 | * ``` 30 | */ 31 | export function basedOnMultiple( 32 | criteria: [(arg: T) => number, Direction][], 33 | ) { 34 | const sortFns = criteria.map(([argFn, direction]) => 35 | basedOn(argFn, direction), 36 | ) 37 | 38 | return (a: T, b: T) => { 39 | for (const sortFn of sortFns) { 40 | const result = sortFn(a, b) 41 | if (result !== 0) return result 42 | } 43 | 44 | return 0 45 | } 46 | } 47 | 48 | /** Made to be used with `.reduce` (`.reduce(...toSum)`). */ 49 | export const toSum = [(sum: number, value: number) => sum + value, 0] as const 50 | 51 | export function sumOf(numbers: number[]) { 52 | return numbers.reduce(...toSum) 53 | } 54 | -------------------------------------------------------------------------------- /packages/core/src/utils/bigquery/bigquery-table.ts: -------------------------------------------------------------------------------- 1 | import { BigQuery, Table } from '@google-cloud/bigquery' 2 | import { ZodSchema } from 'zod' 3 | 4 | import { zodToBigQuerySchema } from './schema.js' 5 | 6 | type BigQueryTableOptions = { 7 | /** 8 | * Schema of the table as a zod schema. When the table has no schema yet, 9 | * typestream will create a new BigQuery Schema for it. 10 | */ 11 | schema: ZodSchema 12 | 13 | /** 14 | * Name of the table in BigQuery. 15 | */ 16 | tableName: string 17 | 18 | /** 19 | * The id (name) of the Dataset in BigQuery 20 | */ 21 | datasetId: string 22 | 23 | /** 24 | * The id of the GoogleCloud project. Reccomended to use but this is optional 25 | * because the project can also be set with the default project. 26 | */ 27 | projectId?: string 28 | 29 | /** 30 | * BigQuery provides an insertId to deduplicate documents. When you define this 31 | * function, you can specify a value that is used as insertId. 32 | */ 33 | insertIdFn?: (arg: T) => string 34 | } 35 | 36 | //TODO: delete 37 | export class BigQueryTable { 38 | readonly zodSchema: ZodSchema 39 | private table: Table 40 | 41 | private bigquery: BigQuery 42 | private insertIdFn?: (arg: T) => string 43 | private bqSchema: any 44 | private setSchemaPromise?: Promise 45 | 46 | constructor({ 47 | datasetId, 48 | insertIdFn, 49 | schema, 50 | tableName, 51 | projectId, 52 | }: BigQueryTableOptions) { 53 | this.bigquery = new BigQuery({ projectId }) 54 | this.table = this.bigquery.dataset(datasetId).table(tableName) 55 | this.zodSchema = schema 56 | this.insertIdFn = insertIdFn 57 | 58 | this.bqSchema = zodToBigQuerySchema(this.zodSchema) 59 | } 60 | 61 | /** 62 | * This is the BigQuery schema that was generated with the zod schema. 63 | */ 64 | get bigquerySchema() { 65 | return this.bqSchema 66 | } 67 | 68 | /** 69 | * Insert documents to the BigQuery table that match the defined schema. 70 | */ 71 | async insert(rows: T[]) { 72 | const parsedRows = rows.map(x => this.zodSchema.parse(x)) 73 | if (parsedRows.length === 0) 74 | throw new Error( 75 | 'You must provide at least one or moew row to insert to BigQuery!', 76 | ) 77 | 78 | if (!globalThis.typestreamWritingActive) return 79 | await (this.setSchemaPromise ??= this.updateSchema()) 80 | 81 | await (this.insertIdFn 82 | ? insertWithInsertId( 83 | parsedRows, 84 | this.insertIdFn, 85 | this.table, 86 | this.bqSchema, 87 | ) 88 | : this.table.insert(parsedRows, { schema: this.bqSchema })) 89 | } 90 | 91 | /** 92 | * This function is automatically called on the first insert to a table and 93 | * sets the table schema to the current zod schema. 94 | */ 95 | async updateSchema() { 96 | const [tableExists] = await this.table.exists() 97 | if (!tableExists) 98 | throw new Error( 99 | `Table does not exist yet. Go to your bigquery (https://console.cloud.google.com/bigquery) and create the table. Typestream will create the schema for you.`, 100 | ) 101 | 102 | const [metadata] = await this.table.getMetadata() 103 | 104 | // Update schema 105 | metadata.schema = this.bqSchema 106 | await this.table.setMetadata(metadata) 107 | } 108 | } 109 | 110 | /** 111 | * Provides a way to insert rows to BigQuery with the insertId. This is needed because 112 | * the default BigQuery node API does not provide a way to set the insert Id easily. 113 | */ 114 | async function insertWithInsertId( 115 | parsedRows: any[], 116 | insertIdFn: (arg: T) => string, 117 | table: Table, 118 | bqSchema: any, 119 | ) { 120 | const encodedRows = parsedRows.map(row => ({ 121 | json: Table.encodeValue_(row), 122 | insertId: insertIdFn(row), 123 | })) 124 | 125 | await table.insert(encodedRows, { raw: true, schema: bqSchema }) 126 | } 127 | -------------------------------------------------------------------------------- /packages/core/src/utils/bigquery/jsonschema-bigquery.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'jsonschema-bigquery' { 2 | export function run( 3 | input_schema: any, 4 | options?: { 5 | preventAdditionalObjectProperties: boolean 6 | continueOnError: boolean 7 | }, 8 | ): any 9 | } 10 | -------------------------------------------------------------------------------- /packages/core/src/utils/bigquery/schema.ts: -------------------------------------------------------------------------------- 1 | import { run as jsonSchemaToBqSchema } from 'jsonschema-bigquery' 2 | import { ZodSchema } from 'zod' 3 | import { zodToJsonSchema } from 'zod-to-json-schema' 4 | 5 | export function zodToBigQuerySchema(schema: ZodSchema) { 6 | const jsonSchema = zodToJsonSchema(schema) 7 | const bqSchema = jsonSchemaToBqSchema(jsonSchema).schema 8 | return bqSchema 9 | } 10 | -------------------------------------------------------------------------------- /packages/core/src/utils/extract-json-assignments.ts: -------------------------------------------------------------------------------- 1 | import { parse as parseJs } from 'acorn' 2 | import { simple as walkTree } from 'acorn-walk' 3 | import { HTMLElement } from 'node-html-parser' 4 | 5 | /** 6 | * Takes in a string of JavaScript, tries to find all the instances where a 7 | * piece of JSON (for example SSR state) is assigned to a variable or property, 8 | * and returns a map of the parsed results keyed by the name of the variable. 9 | */ 10 | export function extractJsonAssignments(script: string) { 11 | const map: Record = {} 12 | const tree = parseJs(script, { ecmaVersion: 'latest' }) 13 | 14 | const processNode = (keyNode: any, valueNode: any) => { 15 | const key = getNodeString(script, keyNode) 16 | 17 | // `JSON.parse` calls 18 | // (e.g. `window.state = JSON.parse('{"items":[1,2,3]}')`) 19 | if ( 20 | valueNode.type === 'CallExpression' && 21 | getNodeString(script, valueNode.callee) === 'JSON.parse' 22 | ) { 23 | const jsonString = valueNode.arguments[0].value 24 | map[key] = JSON.parse(jsonString) 25 | } 26 | 27 | // Directly inserted JSON 28 | // (e.g. `window.state = {"items":[1,2,3]}`) 29 | else if ( 30 | valueNode.type === 'ObjectExpression' || 31 | valueNode.type === 'ArrayExpression' 32 | ) { 33 | const jsonString = getNodeString(script, valueNode) 34 | 35 | try { 36 | const data = JSON.parse(jsonString) 37 | 38 | // Ignore `{}` and `[]` 39 | if (Object.keys(data).length === 0) return 40 | 41 | map[key] = data 42 | } catch { 43 | // Values that can't be parsed are ignored 44 | } 45 | } 46 | } 47 | 48 | walkTree(tree, { 49 | VariableDeclarator: (node: any) => processNode(node.id, node.init), 50 | AssignmentExpression: (node: any) => processNode(node.left, node.right), 51 | }) 52 | 53 | return map 54 | } 55 | 56 | export function extractJsonAssignmentsFromDocument(document: HTMLElement) { 57 | const scripts = document.querySelectorAll( 58 | 'script:not([src]):not([type="application/json"])', 59 | ) 60 | 61 | const map: Record = {} 62 | for (const script of scripts) { 63 | try { 64 | const assignments = extractJsonAssignments(script.text) 65 | Object.assign(map, assignments) 66 | } catch { 67 | // Scripts that can't be parsed are skipped 68 | } 69 | } 70 | 71 | return map 72 | } 73 | 74 | export function extractJsonScriptsFromDocument(document: HTMLElement) { 75 | const scripts = document.querySelectorAll( 76 | 'script:not([src])', //[type="application/json"] 77 | ) 78 | const jsonScripts = scripts.filter(x => 79 | x.attrs.type?.toLowerCase().includes('json'), 80 | ) 81 | 82 | const res: { json: any; attributes: Record }[] = [] 83 | for (const script of jsonScripts) { 84 | const attributes = script.attrs 85 | try { 86 | const json = JSON.parse(script.text) 87 | res.push({ 88 | attributes, 89 | json, 90 | }) 91 | } catch { 92 | // We just want to skip unparsable entries 93 | } 94 | } 95 | 96 | return res 97 | } 98 | 99 | function getNodeString(script: string, node: acorn.Node) { 100 | return script.slice(node.start, node.end) 101 | } 102 | -------------------------------------------------------------------------------- /packages/core/src/utils/pick.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * With `pick()` you pass an array of keys that are 3 | * contained in the passed object. It returns accordingly a new object, 4 | * which contains only the contents for the selected keys. 5 | * @param {object} obj 6 | * @param {Array} keys 7 | * 8 | * @example 9 | * ``` 10 | * const object = { name: 'randy', age: 23 } 11 | * const newObject = pick(object, ['name']) 12 | * ``` 13 | * 14 | */ 15 | export function pick(obj: T, keys: K[]): Pick { 16 | const result: any = {} 17 | for (const key of keys) { 18 | result[key] = obj[key] 19 | } 20 | return result 21 | } 22 | -------------------------------------------------------------------------------- /packages/core/src/utils/round.ts: -------------------------------------------------------------------------------- 1 | export function round(number: number, digits: number) { 2 | const factor = 10 ** digits 3 | 4 | const d = Math.round(number * factor) / factor 5 | return d 6 | } 7 | -------------------------------------------------------------------------------- /packages/core/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "dist", 4 | "target": "ES2021", 5 | "lib": ["ES2021"], 6 | "module": "NodeNext", 7 | "sourceMap": true, 8 | "skipLibCheck": true, 9 | "strict": true, 10 | "allowJs": true, 11 | "isolatedModules": true, 12 | "declaration": true, 13 | "rootDir": "src" 14 | }, 15 | "include": ["**/*.ts"] 16 | } 17 | -------------------------------------------------------------------------------- /packages/create-typestream/.npmignore: -------------------------------------------------------------------------------- 1 | # Necessary because npm otherwise completely ignores gitignored files even if 2 | # they are explicitly listed under `files` in the `package.json` file 3 | # See: https://npm.github.io/publishing-pkgs-docs/publishing/the-npmignore-file.html 4 | -------------------------------------------------------------------------------- /packages/create-typestream/bin/run.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import '../dist/index.js' 4 | -------------------------------------------------------------------------------- /packages/create-typestream/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "create-typestream", 3 | "version": "0.0.8", 4 | "description": "Package to create a TypeStream project", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/scopashq/typestream", 8 | "path": "packages/create-typestream" 9 | }, 10 | "license": "MIT", 11 | "author": "Scopas Technologies GmbH", 12 | "type": "module", 13 | "bin": { 14 | "create-typestream": "./bin/run.js" 15 | }, 16 | "files": [ 17 | "bin", 18 | "dist", 19 | "samples" 20 | ], 21 | "scripts": { 22 | "build": "rimraf dist && tsc", 23 | "check-deps": "depcheck --ignores @types/dotenv", 24 | "clean": "rimraf dist/ node_modules/", 25 | "watch": "tsc --watch --preserveWatchOutput" 26 | }, 27 | "dependencies": { 28 | "chalk": "^5.0.1", 29 | "execa": "^6.1.0", 30 | "prompts": "^2.4.2" 31 | }, 32 | "devDependencies": { 33 | "@types/node": "16", 34 | "@typestream/core": "^0.0.8", 35 | "typescript": "^4.7.0-dev.20220216" 36 | }, 37 | "engines": { 38 | "node": ">=16.0.0" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /packages/create-typestream/samples/get-started.ts: -------------------------------------------------------------------------------- 1 | import { 2 | definePipe, 3 | typed, 4 | FileResource, 5 | CloudStorageResource, 6 | pick, 7 | dump, 8 | } from '@typestream/core' 9 | 10 | /** 11 | * The Getting Started Guide will walk you through 12 | * how to use Typestream to transform your data faster. 13 | * 14 | * The developer experience of TypeStream is all based on the `tyst watch` command that 15 | * executes your code as soon as you do changes and gives you insights about it's performance. 16 | * 17 | * Use it with: 18 | * tyst watch getting-started-pipe 19 | * 20 | * You will access to a data set provided by us (typestream team) 21 | * via the Google Cloud Storage. To load the data accordingly, we have created a resource 22 | * that points to the according cloud storage bucket. 23 | * Instead, you could also read the data from an S3 bucket or local folder. 24 | * 25 | * Here, you define the resources that you want the pipe to interact with. 26 | */ 27 | const eCommerceProduct = new CloudStorageResource('e-commerce-product', { 28 | cloudStorageProject: 'scopas', 29 | bucket: 'typestream-datasets', 30 | pathPrefix: 'bestsellers-ecommerce', 31 | }) 32 | 33 | const transformedProduct = new FileResource('transformed-product', { 34 | basePath: './output', 35 | recursive: false, 36 | }) 37 | 38 | /** 39 | * Write your code 40 | * Define your first pipe and process your data. 41 | * The pipe processes documents one at a time. In our example: eCommerceProduct 42 | */ 43 | export default definePipe(eCommerceProduct, async ctx => { 44 | /** 45 | * The context (ctx) allows you to interact with typestream 46 | * and gives you access to the read and write actions specific to the processed document 47 | * 48 | * `asJson()` gives you the current document already parsed as JSON. 49 | */ 50 | 51 | const rawProductData = await ctx.doc.asJson() 52 | 53 | /** 54 | * When you want your inputs to be typed, you can use the `typed()` function. 55 | * In watch mode, typestream infers all the types from every object you put through it. 56 | * The Return value of `typed()` will be typed. 57 | * 58 | * The first value is the name of the type 59 | * and the second argument is the object you want to create types for. 60 | * 61 | * (When your editor doesn't show the inferred types, try reloading the editor or the ts-server) 62 | */ 63 | const productData = typed('ProductData', rawProductData) 64 | 65 | /** 66 | * In the following you can write your individual transformation code. 67 | * You can write any code for your transformation 68 | * or use some of the TypeStream utils (like `pick()`) 69 | */ 70 | const prunedProduct = pick(productData.product, [ 71 | 'product_id', 72 | 'title', 73 | 'rating', 74 | 'manufacturer', 75 | 'description', 76 | 'weight', 77 | ]) 78 | 79 | /** 80 | * To get a better feeling of what your transformed data looks like, simply 81 | * use `dump()` to write all the values to a file. 82 | * 83 | * You can dump any value (like strings, numbers, and complex objects) and see the results in the 84 | * "./dump-files" folder. 85 | */ 86 | dump(prunedProduct) 87 | 88 | dump(prunedProduct.manufacturer, { 89 | name: 'manufacturer', 90 | skipDuplicates: true, 91 | }) 92 | 93 | const relatedProducts = 94 | productData.compare_with_similar?.map(product => product.product_id) ?? [] 95 | 96 | const parsedProduct = { 97 | ...prunedProduct, 98 | weight: getProductWeight(prunedProduct.weight), 99 | similarProductIds: relatedProducts, 100 | } 101 | 102 | /** 103 | * With publish you write your transformed data to your output resource. 104 | * The target can be either any typestream resource that you define. 105 | * 106 | * If you are done developing this pipe, type `tsyt process` to actually 107 | * process and publish all your inputs. You can than see the results in the folder 108 | * defined in the resource "transformedProduct". 109 | */ 110 | ctx.publish({ 111 | resource: transformedProduct, 112 | data: Buffer.from(JSON.stringify(parsedProduct)), 113 | metadata: { name: `${parsedProduct.product_id}.json` }, 114 | }) 115 | }) 116 | 117 | function getProductWeight(weight: string) { 118 | const WEIGHT_PATTERN = /\d+\.\d+/ 119 | const weightString = weight.match(WEIGHT_PATTERN)?.[0] ?? '0' 120 | return Number.parseFloat(weightString) 121 | } 122 | -------------------------------------------------------------------------------- /packages/create-typestream/src/async-pipe-out.ts: -------------------------------------------------------------------------------- 1 | import { ExecaChildProcess } from 'execa' 2 | 3 | export async function asyncPipeOut(childProcess: ExecaChildProcess) { 4 | childProcess.stderr?.pipe(process.stderr) 5 | childProcess.stdout?.pipe(process.stdout) 6 | await childProcess 7 | } 8 | -------------------------------------------------------------------------------- /packages/create-typestream/src/create-env.ts: -------------------------------------------------------------------------------- 1 | import { writeFile } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | 4 | export async function createEnv(path: string) { 5 | const envText = ` 6 | # Environment variables that will be used to authenticate different services. 7 | 8 | # Google Cloud Platform 9 | # If you want to store them inside of your project you can create a 10 | # \`credentials\` folder which is already included in the .gitignore 11 | # GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json 12 | 13 | # AWS 14 | # AWS_ACCESS_KEY_ID= 15 | # AWS_SECRET_ACCESS_KEY= 16 | ` 17 | 18 | const envPath = join(path, '.env') 19 | 20 | await writeFile(envPath, envText) 21 | } 22 | -------------------------------------------------------------------------------- /packages/create-typestream/src/create-project-files.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, writeFile } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | 4 | import { createEnv } from './create-env.js' 5 | import { getPackageJson } from './get-package.js' 6 | 7 | const settingsJson = { 8 | 'typescript.tsdk': 'node_modules/typescript/lib', 9 | 'typescript.enablePromptUseWorkspaceTsdk': true, 10 | 'debug.javascript.autoAttachFilter': 'onlyWithFlag', 11 | 'debug.javascript.terminalOptions': { 12 | skipFiles: ['/**', '**/node_modules/**'], 13 | }, 14 | } 15 | 16 | const tsConfig = { 17 | compilerOptions: { 18 | rootDir: 'src', 19 | outDir: 'dist', 20 | strict: true, 21 | declaration: true, 22 | target: 'ES2021', 23 | lib: ['ES2021'], 24 | module: 'NodeNext', 25 | types: ['./src/pipes/generated-types/type-map.js'], 26 | }, 27 | include: ['src/**/*'], 28 | } 29 | 30 | const gitignoreFile = `/node_modules 31 | /builds 32 | /sample-data 33 | /dump-files 34 | /credentials 35 | 36 | .DS_Store 37 | ` 38 | 39 | export async function createProjectFiles({ 40 | projectName, 41 | projectPath, 42 | }: { 43 | projectPath: string 44 | projectName: string 45 | }) { 46 | const vscodePath = join(projectPath, '.vscode') 47 | const vscodeSettings = join(vscodePath, 'settings.json') 48 | await mkdir(vscodePath, { recursive: true }) 49 | 50 | const pipesPath = join(projectPath, 'src', 'pipes') 51 | await mkdir(pipesPath, { recursive: true }) 52 | 53 | await writeFile(vscodeSettings, JSON.stringify(settingsJson, undefined, ' ')) 54 | 55 | const packageJsonPath = join(projectPath, 'package.json') 56 | await writeFile( 57 | packageJsonPath, 58 | JSON.stringify(getPackageJson(projectName), undefined, ' '), 59 | ) 60 | 61 | const tsConfigPath = join(projectPath, 'tsconfig.json') 62 | await writeFile(tsConfigPath, JSON.stringify(tsConfig, undefined, ' ')) 63 | 64 | const gitignorePath = join(projectPath, '.gitignore') 65 | await writeFile(gitignorePath, gitignoreFile) 66 | 67 | await createEnv(projectPath) 68 | } 69 | -------------------------------------------------------------------------------- /packages/create-typestream/src/create-tutorial-pipe.ts: -------------------------------------------------------------------------------- 1 | import { copyFile } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | import { fileURLToPath } from 'node:url' 4 | 5 | import chalk from 'chalk' 6 | 7 | import { typescriptHint } from './print-getting-started.js' 8 | 9 | export async function createTutorialPipe(projectPath: string) { 10 | const getStartedPipeFile = join( 11 | fileURLToPath(import.meta.url), 12 | '../../samples/get-started.ts', 13 | ) 14 | 15 | const destinationPath = join( 16 | projectPath, 17 | 'src', 18 | 'pipes', 19 | 'transform-product.ts', 20 | ) 21 | await copyFile(getStartedPipeFile, destinationPath) 22 | 23 | return destinationPath 24 | } 25 | 26 | export function printTutorial(projectName: string, destinationPath: string) { 27 | console.log(` 28 | ${boldGreenBox('4. Your getting started guide is ready to use 🎉 Have fun!')} 29 | 30 | 31 | ${boldGreenBox('Next Steps:')} 32 | 33 | 1. Go into your project directory: cd ${projectName} 34 | 2. Open the tutorial pipe: ${chalk.underline(destinationPath)} 35 | 3. Follow the instructions. 36 | 37 | 38 | ${typescriptHint}`) 39 | } 40 | 41 | function boldGreenBox(text: string) { 42 | return chalk.bold.green.inverse(` ${text} `) 43 | } 44 | -------------------------------------------------------------------------------- /packages/create-typestream/src/get-package.ts: -------------------------------------------------------------------------------- 1 | export function getPackageJson(projectName: string) { 2 | return { 3 | name: projectName, 4 | dependencies: {}, 5 | devDependencies: { 6 | '@types/node': '16', 7 | typescript: '^4.7.0-dev.20220320', 8 | }, 9 | type: 'module', 10 | engines: { 11 | node: '>=16.0.0', 12 | }, 13 | typestreamProject: true, 14 | private: true, 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /packages/create-typestream/src/get-project-name.ts: -------------------------------------------------------------------------------- 1 | import { join } from 'node:path' 2 | import { exit } from 'node:process' 3 | 4 | import chalk from 'chalk' 5 | import prompt from 'prompts' 6 | 7 | export async function getProjectName() { 8 | const { projectName } = await prompt( 9 | { 10 | type: 'text', 11 | name: 'projectName', 12 | message: 'How do you want to name your TypeStream project?', 13 | initial: 'typestream-project', 14 | }, 15 | { onCancel: () => exit(1) }, 16 | ) 17 | 18 | const projectPath = join(process.cwd(), projectName) 19 | 20 | if (!/^[\da-z]+(-[\da-z]+)*$/.test(projectName)) { 21 | console.log( 22 | chalk.bold.red( 23 | '\nInvalid project name! TypeStream project names can only consist of lowercase letters, numbers, and dashes.\n', 24 | ), 25 | ) 26 | exit(1) 27 | } 28 | 29 | return { projectName, projectPath } 30 | } 31 | -------------------------------------------------------------------------------- /packages/create-typestream/src/index.ts: -------------------------------------------------------------------------------- 1 | import { access } from 'node:fs/promises' 2 | import { exit } from 'node:process' 3 | 4 | import chalk from 'chalk' 5 | 6 | import { createProjectFiles } from './create-project-files.js' 7 | import { createTutorialPipe, printTutorial } from './create-tutorial-pipe.js' 8 | import { getProjectName } from './get-project-name.js' 9 | import { initGit } from './initialize-git.js' 10 | import { installDependencies } from './install-dependencies.js' 11 | import { logStage } from './log-stage.js' 12 | import { printGettingStarted } from './print-getting-started.js' 13 | 14 | async function main() { 15 | const getStartedGuide = process.argv.includes('--get-started') 16 | 17 | const { projectName, projectPath } = await getProjectName() 18 | 19 | await assertDirEmpty(projectPath) 20 | 21 | logStage(`1. Creating project ${projectName}`) 22 | console.log(`at: ${projectPath}...`) 23 | 24 | await createProjectFiles({ projectName, projectPath }) 25 | 26 | let tutorialPipePath: string 27 | if (getStartedGuide) tutorialPipePath = await createTutorialPipe(projectPath) 28 | 29 | logStage('2. Installing dependencies...') 30 | await installDependencies(projectPath) 31 | 32 | logStage('3. Initializing git...') 33 | await initGit(projectPath) 34 | 35 | if (getStartedGuide) { 36 | printTutorial(projectName, tutorialPipePath!) 37 | } else { 38 | printGettingStarted(projectName) 39 | } 40 | } 41 | 42 | async function assertDirEmpty(path: string) { 43 | try { 44 | await access(path) 45 | console.log( 46 | chalk.bold.red( 47 | `\nThe dir (${path}) is not empty. Use another name for your project!\n`, 48 | ), 49 | ) 50 | exit(1) 51 | } catch { 52 | return true 53 | } 54 | } 55 | 56 | void main() 57 | -------------------------------------------------------------------------------- /packages/create-typestream/src/initialize-git.ts: -------------------------------------------------------------------------------- 1 | import { execa } from 'execa' 2 | 3 | import { asyncPipeOut } from './async-pipe-out.js' 4 | 5 | export async function initGit(path: string) { 6 | try { 7 | await asyncPipeOut(execa('git', ['init'], { cwd: path })) 8 | 9 | await asyncPipeOut(execa('git', ['add', '.'], { cwd: path })) 10 | await asyncPipeOut( 11 | execa('git', ['commit', '-m', 'Initial commit'], { 12 | cwd: path, 13 | }), 14 | ) 15 | } catch { 16 | console.log('Setting up git repository failed. Skipping this step.') 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /packages/create-typestream/src/install-dependencies.ts: -------------------------------------------------------------------------------- 1 | import { execa } from 'execa' 2 | 3 | import { asyncPipeOut } from './async-pipe-out.js' 4 | import { installPackage } from './install-package.js' 5 | 6 | export async function installDependencies(path: string) { 7 | const useYarn = await hasYarn() 8 | 9 | await (useYarn 10 | ? asyncPipeOut(execa('yarn', [], { cwd: path })) 11 | : asyncPipeOut(execa('npm', ['install'], { cwd: path }))) 12 | 13 | await installPackage({ 14 | devDependency: true, 15 | packageName: '@typestream/sdk', 16 | path, 17 | useYarn, 18 | }) 19 | 20 | await installPackage({ 21 | devDependency: false, 22 | packageName: '@typestream/core', 23 | path, 24 | useYarn, 25 | }) 26 | } 27 | 28 | async function hasYarn(cwd?: string) { 29 | try { 30 | await execa('yarn', ['--version'], { cwd: cwd ?? process.cwd() }) 31 | return true 32 | } catch { 33 | return false 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /packages/create-typestream/src/install-package.ts: -------------------------------------------------------------------------------- 1 | import { execa } from 'execa' 2 | 3 | import { asyncPipeOut } from './async-pipe-out.js' 4 | 5 | export async function installPackage({ 6 | devDependency, 7 | packageName, 8 | path, 9 | useYarn, 10 | }: { 11 | path: string 12 | packageName: string 13 | useYarn: boolean 14 | devDependency: boolean 15 | }) { 16 | await (useYarn 17 | ? asyncPipeOut( 18 | execa('yarn', ['add', ...(devDependency ? ['-D'] : []), packageName], { 19 | cwd: path, 20 | }), 21 | ) 22 | : asyncPipeOut( 23 | execa( 24 | 'npm', 25 | ['install', ...(devDependency ? ['--save-dev'] : []), packageName], 26 | { cwd: path }, 27 | ), 28 | )) 29 | } 30 | -------------------------------------------------------------------------------- /packages/create-typestream/src/log-stage.ts: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | 3 | export function logStage(text: string) { 4 | console.log('\n' + chalk.green.inverse.bold(` ${text} `)) 5 | } 6 | -------------------------------------------------------------------------------- /packages/create-typestream/src/print-getting-started.ts: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | 3 | import { logStage } from './log-stage.js' 4 | 5 | const hintBlock = chalk.bold.inverse(' Hint: ') 6 | 7 | const link = chalk.underline( 8 | 'https://code.visualstudio.com/docs/typescript/typescript-compiling#_using-the-workspace-version-of-typescript', 9 | ) 10 | 11 | export const typescriptHint = 12 | chalk.yellow(`${hintBlock} Make sure that you are using the workspace typescript version. 13 | Do it in VSCode: ${link}`) 14 | 15 | export function printGettingStarted(projectName: string) { 16 | logStage('4. Your project is all set up! 🎉') 17 | console.log(` 18 | 19 | 20 | ${boldBox('Get started with your project:')} 21 | 22 | 1. Go to your project folder: ${chalk.bold.italic(`cd ${projectName}`)} 23 | 2. Create a new pipe: ${chalk.bold.italic( 24 | `npx tyst create-pipe `, 25 | )} 26 | 27 | 28 | ${typescriptHint} 29 | `) 30 | } 31 | 32 | function boldBox(text: string) { 33 | return chalk.bold.green.inverse(` ${text} `) 34 | } 35 | -------------------------------------------------------------------------------- /packages/create-typestream/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "dist", 4 | "rootDir": "src", 5 | "strict": true, 6 | "target": "ES2021", 7 | "module": "NodeNext", 8 | "skipLibCheck": true 9 | }, 10 | "include": ["src/**/*"] 11 | } 12 | -------------------------------------------------------------------------------- /packages/sdk/.npmignore: -------------------------------------------------------------------------------- 1 | # Necessary because npm otherwise completely ignores gitignored files even if 2 | # they are explicitly listed under `files` in the `package.json` file 3 | # See: https://npm.github.io/publishing-pkgs-docs/publishing/the-npmignore-file.html 4 | -------------------------------------------------------------------------------- /packages/sdk/README.md: -------------------------------------------------------------------------------- 1 | # [TypeStream](https://typestream.dev) SDK 2 | 3 | > This package is part of the [**TypeStream** data transformation framework](https://typestream.dev). 4 | -------------------------------------------------------------------------------- /packages/sdk/bin/run.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { run, flush, Errors } from '@oclif/core' 4 | 5 | import { loadProjectEnv } from '../dist/utils/load-project-env.js' 6 | 7 | loadProjectEnv() 8 | 9 | run(undefined, import.meta.url) 10 | .then(flush) 11 | .catch(Errors.handle) 12 | -------------------------------------------------------------------------------- /packages/sdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@typestream/sdk", 3 | "version": "0.0.11", 4 | "description": "SDK for the rapid development of data transformation projects", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/scopashq/typestream", 8 | "path": "packages/sdk" 9 | }, 10 | "license": "MIT", 11 | "author": "Scopas Technologies GmbH", 12 | "type": "module", 13 | "bin": { 14 | "tyst": "./bin/run.js" 15 | }, 16 | "files": [ 17 | "bin", 18 | "dist" 19 | ], 20 | "scripts": { 21 | "build": "rimraf dist && tsc", 22 | "check-deps": "depcheck --ignores oclif,@oclif/*,@types/dotenv", 23 | "clean": "rimraf dist/ node_modules/", 24 | "watch": "tsc --watch --preserveWatchOutput" 25 | }, 26 | "dependencies": { 27 | "@aws-sdk/client-s3": "^3.54.1", 28 | "@google-cloud/storage": "^5.18.2", 29 | "@oclif/core": "^1", 30 | "@oclif/plugin-help": "^5", 31 | "@supercharge/promise-pool": "^2.1.0", 32 | "@types/dotenv-safe": "^8.1.2", 33 | "@typestream/core-protocol": "^0.0.5", 34 | "ansi-escapes": "^5.0.0", 35 | "chalk": "^5.0.0", 36 | "chalk-template": "^0.4.0", 37 | "chokidar": "^3.5.3", 38 | "cli-cursor": "^4.0.0", 39 | "compare-versions": "^4.1.3", 40 | "date-fns": "^2.28.0", 41 | "dotenv-safe": "^8.2.0", 42 | "esbuild": "^0.14.18", 43 | "genson-js": "^0.0.8", 44 | "json-schema-to-typescript": "^10.1.5", 45 | "ora": "^6.1.0", 46 | "p-defer": "^4.0.0", 47 | "prompts": "^2.4.2", 48 | "rxjs": "^7.5.5" 49 | }, 50 | "devDependencies": { 51 | "@types/dotenv": "^8.2.0", 52 | "@types/node": "16", 53 | "@types/prompts": "^2.0.14", 54 | "oclif": "^2", 55 | "typescript": "^4.7.0-dev.20220216" 56 | }, 57 | "engines": { 58 | "node": ">=16.0.0" 59 | }, 60 | "oclif": { 61 | "bin": "tyst", 62 | "dirname": "typestream-sdk", 63 | "commands": "./dist/commands", 64 | "plugins": [ 65 | "@oclif/plugin-help" 66 | ], 67 | "hooks": { 68 | "init": "./dist/hooks/init/assert-node-version" 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /packages/sdk/src/commands/cloud.ts: -------------------------------------------------------------------------------- 1 | import { Command } from '@oclif/core' 2 | import chalk from 'chalk' 3 | import ct from 'chalk-template' 4 | 5 | const logo = String.raw` 6 | _____ ___ _ ___ _ _ 7 | |_ _| _ _ __ ___/ __| |_ _ _ ___ __ _ _ __ / __| |___ _ _ __| | 8 | | || || | '_ \/ -_)__ \ _| '_/ -_) _' | ' \ | (__| / _ \ || / _' | 9 | |_| \_, | .__/\___|___/\__|_| \___\__,_|_|_|_| \___|_\___/\_,_\__,_| 10 | |__/|_| ` 11 | 12 | export default class Login extends Command { 13 | static description = chalk.blue('Get started with TypeStream Cloud.') 14 | 15 | static strict = true 16 | async run(): Promise { 17 | console.log( 18 | ct`{bold.blue ${logo}} 19 | 20 | 21 | {bold TypeStream Cloud is currently in closed beta. 22 | It allows you to deploy and execute your pipes in the cloud}: 23 | 24 | • Parallelize computation and process terabytes in minutes 25 | • Stream your data: subscribe a pipe to a pub/sub topic 26 | • Push data from your application directly to TypeStream Cloud 27 | • Manage all your pipes and see their errors in our dashboard 28 | 29 | {bold ▶ Learn more and get early access: 30 | {underline.blue https://typestream.cloud}} 31 | `, 32 | ) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /packages/sdk/src/commands/create-pipe.ts: -------------------------------------------------------------------------------- 1 | import { Command } from '@oclif/core' 2 | import ct from 'chalk-template' 3 | 4 | import { loadAllPaths } from '../paths/load-all-paths.js' 5 | import { createPipe } from '../pipe/creation/create-pipe.js' 6 | 7 | //TODO: check documentation for every cli command 8 | export default class CreateProject extends Command { 9 | static description = 'Create the necessary files for a new pipe.' 10 | 11 | static args = [ 12 | { 13 | name: 'pipeName', 14 | required: true, 15 | description: 'Pipe name that will be created!', 16 | }, 17 | ] 18 | 19 | static strict = true 20 | async run(): Promise { 21 | const { args } = await this.parse(CreateProject) 22 | 23 | const paths = await loadAllPaths(args.pipeName) 24 | 25 | console.log(`Creating pipe ${args.pipeName}...`) 26 | 27 | await createPipe(paths) 28 | 29 | console.log( 30 | ct`\nPipe creation successful. 31 | 32 | {inverse.green.bold Getting started with development: } 33 | 34 | 1. Define a resource and put it into your pipe. 35 | 2. Run: {italic.bold tyst watch ${args.pipeName}} to see how your code is working. 36 | 37 | 38 | Your pipe file: {underline ${paths.pipe.sourceFileName}} 39 | `, 40 | ) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /packages/sdk/src/commands/process.ts: -------------------------------------------------------------------------------- 1 | import { Command, Flags } from '@oclif/core' 2 | 3 | import { loadAllPaths } from '../paths/load-all-paths.js' 4 | import { PipePath } from '../paths/pipe-paths.js' 5 | import { ProjectPaths } from '../paths/project-paths.js' 6 | import { runPipeProgress } from '../process/run-pipe-process.js' 7 | import { logReplace } from '../utils/log-replace.js' 8 | import { renderWatchProgress } from '../watch/render-watch-progress.js' 9 | import { WatchProgress } from '../watch/watch-progress.js' 10 | 11 | export default class Process extends Command { 12 | static description = 13 | "Process all documents of a pipe's resource and publish the outputs." 14 | 15 | static args = [ 16 | { 17 | name: 'pipeName', 18 | required: true, 19 | description: 'Pipe name that will run and process documents.', 20 | }, 21 | ] 22 | 23 | static flags = { 24 | debug: Flags.boolean({ 25 | char: 'd', 26 | description: 'Enable debigging for the pipe.', 27 | default: false, 28 | }), 29 | } 30 | 31 | static strict = true 32 | async run(): Promise { 33 | const { args, flags } = await this.parse(Process) 34 | const pipeName = args.pipeName as string 35 | const paths = await loadAllPaths(pipeName) 36 | 37 | await this.process({ enableDebugging: flags.debug, paths }) 38 | } 39 | 40 | private async process({ 41 | enableDebugging, 42 | paths, 43 | }: { 44 | enableDebugging: boolean 45 | paths: { 46 | project: ProjectPaths 47 | pipe: PipePath 48 | } 49 | }) { 50 | let lastProgress: WatchProgress 51 | runPipeProgress({ 52 | debuggingEnabled: enableDebugging, 53 | paths, 54 | concurrency: 20, 55 | }) 56 | .forEach(progress => { 57 | lastProgress = progress 58 | logReplace.write(renderWatchProgress(progress)) 59 | }) 60 | .catch((error: Error) => { 61 | logReplace.write(renderWatchProgress(lastProgress, { error })) 62 | }) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /packages/sdk/src/commands/watch.ts: -------------------------------------------------------------------------------- 1 | import { Command, Flags } from '@oclif/core' 2 | import { throttleTime } from 'rxjs' 3 | 4 | import { loadAllPaths } from '../paths/load-all-paths.js' 5 | import { logReplace } from '../utils/log-replace.js' 6 | import { observeDirectory } from '../utils/observe-directory.js' 7 | import { renderWatchProgress } from '../watch/render-watch-progress.js' 8 | import { runPipe } from '../watch/run-pipe.js' 9 | import { WatchProgress } from '../watch/watch-progress.js' 10 | 11 | export default class Watch extends Command { 12 | static description = 13 | 'Watch a pipe and continuously get feedback without publishing.' 14 | 15 | static args = [ 16 | { 17 | name: 'pipeName', 18 | required: true, 19 | description: 20 | 'Pipe that will be watched and executed on every file change.', 21 | }, 22 | ] 23 | 24 | static flags = { 25 | count: Flags.integer({ 26 | char: 'c', 27 | description: 'Sample count', 28 | default: 100, 29 | }), 30 | 'no-debug': Flags.boolean({ 31 | char: 'D', 32 | description: 'Disable debugging', 33 | default: false, 34 | }), 35 | 'no-typing': Flags.boolean({ 36 | char: 'T', 37 | description: 'Disable automatic type inferance.', 38 | default: false, 39 | }), 40 | } 41 | 42 | static strict = true 43 | 44 | async run() { 45 | const { args, flags } = await this.parse(Watch) 46 | const pipeName = args.pipeName as string 47 | const paths = await loadAllPaths(pipeName) 48 | 49 | let lastRun = Promise.resolve() 50 | let abortController = new AbortController() 51 | let alreadyWaiting = false 52 | let isFirstRun = true 53 | 54 | const build = async () => { 55 | if (alreadyWaiting) return 56 | 57 | alreadyWaiting = true 58 | 59 | abortController.abort() 60 | await lastRun 61 | 62 | abortController = new AbortController() 63 | let lastProgress: WatchProgress 64 | lastRun = runPipe({ 65 | pipeName, 66 | debuggingEnabled: !flags['no-debug'], 67 | paths, 68 | abortSignal: abortController.signal, 69 | sampleCount: flags.count, 70 | checkSampleCounts: isFirstRun, 71 | captureSchemas: !flags['no-typing'], 72 | }) 73 | .forEach(progress => { 74 | lastProgress = progress 75 | logReplace.write(renderWatchProgress(progress)) 76 | }) 77 | .catch((error: Error) => { 78 | logReplace.write(renderWatchProgress(lastProgress, { error })) 79 | }) 80 | 81 | isFirstRun = false 82 | alreadyWaiting = false 83 | } 84 | 85 | observeDirectory(paths.project.sourcePath) 86 | // Throttle events to avoid jank and race conditions 87 | .pipe(throttleTime(500)) 88 | .subscribe(() => void build()) 89 | 90 | // Trigger initial build 91 | void build() 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /packages/sdk/src/hooks/init/assert-node-version.ts: -------------------------------------------------------------------------------- 1 | import { Hook } from '@oclif/core' 2 | import compareVersions from 'compare-versions' 3 | 4 | const MINIMUM_SUPPORTED_VERSION = '16.0.0' 5 | 6 | const hook: Hook<'init'> = async function () { 7 | // compareVersions will return the following possible values 8 | // if v1 is greater than v2 -> 1 9 | // if v1 is equal to v2 -> 0 10 | // if v1 is less than v2 -> -1 11 | // Thus, if we want process.version to be equal to or higher than the minimum 12 | // supported one, we'll assert that the comparison is 1 or 0 13 | 14 | const nodeVersion = process.version 15 | 16 | const comparison = compareVersions(nodeVersion, MINIMUM_SUPPORTED_VERSION) 17 | const isSupported = comparison === 0 || comparison === 1 18 | 19 | if (!isSupported) { 20 | // Because of the way oclif works, we can't simply throw an error but have 21 | // to exit the process manually. 22 | process.stdout.write( 23 | `OUTDATED NODE VERSION DETECTED:\nnode version ${nodeVersion} is older than mimimum supported version ${MINIMUM_SUPPORTED_VERSION}.\nPlease upgrade node before continuing!\n`, 24 | ) 25 | process.exit() 26 | } 27 | } 28 | 29 | export default hook 30 | -------------------------------------------------------------------------------- /packages/sdk/src/index.ts: -------------------------------------------------------------------------------- 1 | export { run } from '@oclif/core' 2 | -------------------------------------------------------------------------------- /packages/sdk/src/paths/load-all-paths.ts: -------------------------------------------------------------------------------- 1 | import { assertPipeName } from '@typestream/core-protocol/utils' 2 | 3 | import { getPipePaths } from './pipe-paths.js' 4 | import { getProjectPaths } from './project-paths.js' 5 | 6 | export async function loadAllPaths(pipeName: string) { 7 | if (/\/|\./.test(pipeName)) 8 | throw new Error( 9 | 'You must only specify the name of the pipe, not the path, no file extension. The pipe must be in "src/pipes/.ts"!', 10 | ) 11 | if (!pipeName) throw new Error('Could not parse pipe name!') 12 | assertPipeName(pipeName) 13 | 14 | const project = await getProjectPaths() 15 | const pipe = getPipePaths(project, pipeName) 16 | 17 | return { 18 | project, 19 | pipe, 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /packages/sdk/src/paths/pipe-paths.ts: -------------------------------------------------------------------------------- 1 | import { join } from 'node:path' 2 | 3 | import { ProjectPaths } from './project-paths.js' 4 | 5 | export type PipePath = { 6 | name: string 7 | sourceFileName: string 8 | bundleDirName: string 9 | bundleFileName: string 10 | } 11 | 12 | export function getPipePaths( 13 | projectPaths: ProjectPaths, 14 | pipeName: string, 15 | ): PipePath { 16 | const bundleDirName = join(projectPaths.path, 'builds', pipeName) 17 | 18 | return { 19 | name: pipeName, 20 | sourceFileName: join(projectPaths.pipesPath, `${pipeName}.ts`), 21 | bundleDirName, 22 | bundleFileName: join(bundleDirName, 'bundle.js'), 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /packages/sdk/src/paths/project-paths.ts: -------------------------------------------------------------------------------- 1 | import { join } from 'node:path' 2 | 3 | import { assertProjectName } from '@typestream/core-protocol/utils' 4 | 5 | import { getProjectName } from '../project/get-project-name.js' 6 | 7 | export type ProjectPaths = { 8 | name: string 9 | path: string 10 | sourcePath: string 11 | pipesPath: string 12 | resourcesDir: string 13 | typesPath: string 14 | typeMapPath: string 15 | inferredTypesDir: string 16 | dumpDir: string 17 | } 18 | 19 | /** 20 | * Gets all relevant folders and files of a project and asserts their names and that the cwd is actually a project. 21 | */ 22 | export async function getProjectPaths(options?: { 23 | basePath?: string 24 | projectName?: string 25 | }): Promise { 26 | const projectRoot = options?.basePath ?? process.cwd() 27 | const typesFolder = join(projectRoot, 'src/pipes/generated-types') 28 | 29 | const projectName = 30 | options?.projectName ?? (await getProjectName(projectRoot)) 31 | 32 | assertProjectName(projectName) 33 | 34 | return { 35 | name: projectName, 36 | path: projectRoot, 37 | sourcePath: join(projectRoot, 'src'), 38 | pipesPath: join(projectRoot, 'src/pipes'), 39 | typesPath: typesFolder, 40 | resourcesDir: join(projectRoot, 'sample-data'), 41 | typeMapPath: join(typesFolder, 'type-map.d.ts'), 42 | inferredTypesDir: join(typesFolder, 'keys'), 43 | dumpDir: join(projectRoot, 'dump-files'), 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /packages/sdk/src/pipe/bundling/build.ts: -------------------------------------------------------------------------------- 1 | import esbuild from 'esbuild' 2 | 3 | import { PipePath } from '../../paths/pipe-paths.js' 4 | import { COMMONJS_POLYFILLS_BANNER } from './commonjs-polyfills.js' 5 | 6 | export async function buildPipe(pipePath: PipePath) { 7 | await esbuild.build({ 8 | entryPoints: [pipePath.sourceFileName], 9 | bundle: true, 10 | outdir: pipePath.bundleDirName, 11 | entryNames: 'bundle', 12 | platform: 'node', 13 | format: 'esm', 14 | banner: { 15 | js: COMMONJS_POLYFILLS_BANNER, 16 | }, 17 | minify: false, 18 | sourcemap: true, 19 | treeShaking: true, 20 | 21 | // Without this ESBuild would print build errors directly to the terminal 22 | logLevel: 'silent', 23 | }) 24 | } 25 | -------------------------------------------------------------------------------- /packages/sdk/src/pipe/bundling/commonjs-polyfills.ts: -------------------------------------------------------------------------------- 1 | export const COMMONJS_POLYFILLS_BANNER = ` 2 | // CommonJS polyfills generated by the TypeStream for compatibility with CommonJS 3 | // modules which depend on CommonJS-specific Node.js globals to work 4 | import { createRequire as __cjspCreateRequire } from 'node:module'; 5 | const require = __cjspCreateRequire(import.meta.url); 6 | 7 | import { dirname as __cjspDirname } from 'node:path'; 8 | import { fileURLToPath as __cjspFileURLToPath } from 'node:url'; 9 | const __dirname = __cjspDirname(__cjspFileURLToPath(import.meta.url)); 10 | 11 | // Start of the regular bundle 12 | `.trim() 13 | -------------------------------------------------------------------------------- /packages/sdk/src/pipe/bundling/load-pipe-bundle.ts: -------------------------------------------------------------------------------- 1 | import { parse } from 'node:path' 2 | import { pathToFileURL } from 'node:url' 3 | 4 | import { BundleSchema } from '@typestream/core-protocol' 5 | 6 | const loadedBundlePaths = new Set() 7 | 8 | export async function loadPipeBundle(path: string) { 9 | // We do this check to ensure that a bundle isn't accidentally loaded twice 10 | // into the same Node.js thread or process as that might cause outdated code 11 | // to be loaded (due to caching) or a memory leak to occur (due to caching). 12 | if (loadedBundlePaths.has(path)) 13 | throw new Error( 14 | `The bundle "${path}" has already been loaded!` + 15 | ` Loading a bundle twice in the same thread or process is not allowed.`, 16 | ) 17 | loadedBundlePaths.add(path) 18 | 19 | const parsedPath = parse(path) 20 | if (parsedPath.ext !== '.js') 21 | throw new Error( 22 | `Invalid bundle extention "${parsedPath.ext}"! Must be ".js".`, 23 | ) 24 | 25 | const fileUrl = pathToFileURL(path) 26 | 27 | const module = await import(fileUrl.toString()) 28 | const bundle = module.default 29 | 30 | const parsedBundle = await parsePipeBundle(bundle) 31 | return parsedBundle 32 | } 33 | 34 | async function parsePipeBundle(bundle: any) { 35 | try { 36 | const validatedBundle = BundleSchema.bundleSchema.parse(bundle) 37 | return validatedBundle 38 | } catch (error: any) { 39 | throw new Error(`Failed to parse bundle with error: ${error}`) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /packages/sdk/src/pipe/creation/check-pipe-exists.ts: -------------------------------------------------------------------------------- 1 | import { stat } from 'node:fs/promises' 2 | 3 | export async function checkPipeExists(path: string) { 4 | try { 5 | const s = await stat(path) 6 | return s.isFile() 7 | } catch { 8 | return false 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /packages/sdk/src/pipe/creation/create-pipe.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, stat, writeFile } from 'node:fs/promises' 2 | 3 | import { PipePath } from '../../paths/pipe-paths.js' 4 | import { ProjectPaths } from '../../paths/project-paths.js' 5 | import { createTypeMap } from '../../typing/generate-type-map.js' 6 | import { checkPipeExists } from './check-pipe-exists.js' 7 | import { getPipeCode } from './get-pipe-code.js' 8 | 9 | export async function createPipe(paths: { 10 | project: ProjectPaths 11 | pipe: PipePath 12 | }) { 13 | if (await checkPipeExists(paths.pipe.sourceFileName)) 14 | throw new Error(`Pipe with name "${paths.pipe.name}" already exists!`) 15 | 16 | await mkdir(paths.project.sourcePath, { recursive: true }) 17 | await writeFile(paths.pipe.sourceFileName, getPipeCode(), 'utf-8') 18 | console.log(`Created pipe source file at: ${paths.pipe.sourceFileName}`) 19 | 20 | await mkdir(paths.project.typesPath, { recursive: true }) 21 | 22 | // Only if the type map does not exists, an empty one is inserted. 23 | try { 24 | await stat(paths.project.typeMapPath) 25 | } catch { 26 | await createTypeMap(paths.project) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /packages/sdk/src/pipe/creation/get-pipe-code.ts: -------------------------------------------------------------------------------- 1 | export function getPipeCode() { 2 | return ` 3 | import { definePipe, typed, dump } from '@typestream/core' 4 | 5 | /** 6 | * Get automatically typed data: 7 | * "const typedData = typed('TypeName', unknownObject)" 8 | * 9 | * See values while development: 10 | * "dump(typedData.fieldImInterestedIn)" 11 | */ 12 | export default definePipe(undefined, async ctx => { 13 | // Write your code here 14 | }) 15 | `.trim() 16 | } 17 | -------------------------------------------------------------------------------- /packages/sdk/src/process/publish-documents.ts: -------------------------------------------------------------------------------- 1 | import { PublishArgs } from '@typestream/core-protocol' 2 | import { AnyResource } from '@typestream/core-protocol/resources' 3 | 4 | import { ProjectPaths } from '../paths/project-paths.js' 5 | import { getResourceProvider } from '../resources/providers/index.js' 6 | import { ResourceProvider } from '../resources/providers/resource-provider.js' 7 | 8 | export async function publishDocuments( 9 | publishResults: PublishArgs[], 10 | projectPath: ProjectPaths, 11 | ) { 12 | const providerMap: Record> = {} 13 | 14 | await Promise.all( 15 | publishResults.map(publishResult => { 16 | const provider = (providerMap[publishResult.resource.name] ??= 17 | getResourceProvider(publishResult.resource, projectPath)) 18 | 19 | return provider.publishDocument( 20 | publishResult.data, 21 | publishResult.metadata, 22 | ) 23 | }), 24 | ) 25 | } 26 | -------------------------------------------------------------------------------- /packages/sdk/src/process/run-pipe-process.ts: -------------------------------------------------------------------------------- 1 | import { 2 | AnyDocument, 3 | AnyResource, 4 | resourceFromRef, 5 | } from '@typestream/core-protocol/resources' 6 | import chalk from 'chalk' 7 | 8 | import { PipePath } from '../paths/pipe-paths.js' 9 | import { ProjectPaths } from '../paths/project-paths.js' 10 | import { buildPipe } from '../pipe/bundling/build.js' 11 | import { getResourceProvider } from '../resources/providers/index.js' 12 | import { ResourceProvider } from '../resources/providers/resource-provider.js' 13 | import { PipeController } from '../runner/pipe-controller.js' 14 | import { DataDumper } from '../utils/data-dumper.js' 15 | import { ErrorLogger } from '../utils/error-logger.js' 16 | import observeAsync from '../utils/observe-async.js' 17 | import { promisePool } from '../utils/promise-pool.js' 18 | import { ErrorSummary } from '../watch/error-summary.js' 19 | import { WatchProgress } from '../watch/watch-progress.js' 20 | import { publishDocuments } from './publish-documents.js' 21 | 22 | interface PipeRunOptions { 23 | debuggingEnabled: boolean 24 | concurrency: number 25 | paths: { 26 | project: ProjectPaths 27 | pipe: PipePath 28 | } 29 | } 30 | 31 | export function runPipeProgress({ 32 | debuggingEnabled, 33 | paths, 34 | concurrency, 35 | }: PipeRunOptions) { 36 | if (debuggingEnabled && concurrency !== 1) { 37 | console.log( 38 | chalk.yellow( 39 | `For debugging, the processing concurrency can't be ${concurrency} and will be set to 1.`, 40 | ), 41 | ) 42 | concurrency = 1 43 | } 44 | 45 | return observeAsync(async next => { 46 | const progress: WatchProgress = { 47 | stage: 'BUILD', 48 | documentNumbers: { 49 | failed: 0, 50 | succeeded: 0, 51 | published: 0, 52 | currentDocumentNumber: 0, 53 | }, 54 | errorSummary: new ErrorSummary(), 55 | } 56 | next(progress) 57 | 58 | const pipeController = new PipeController(debuggingEnabled) 59 | const dataDumper = new DataDumper(paths.project) 60 | 61 | await buildPipe(paths.pipe) 62 | 63 | const resourceRef = await pipeController.loadPipe(paths.pipe.name, { 64 | enableSchemaCapturing: false, 65 | dumpFunction: debuggingEnabled 66 | ? options => dataDumper.dump(options) 67 | : undefined, 68 | enableWriting: true, 69 | }) 70 | const resource = resourceFromRef(resourceRef) 71 | 72 | const provider = getResourceProvider(resource, paths.project) 73 | 74 | const errorLogger = new ErrorLogger(paths.project, paths.pipe.name) 75 | 76 | progress.stage = 'PROCESS' 77 | 78 | await promisePool({ 79 | concurrency, 80 | generator: (provider as ResourceProvider).getDocuments(), 81 | fn: async (doc: AnyDocument) => { 82 | next(progress) 83 | 84 | try { 85 | progress.documentNumbers.currentDocumentNumber++ 86 | next(progress) 87 | 88 | const res = await pipeController.processDocument(doc) 89 | 90 | progress.documentNumbers.succeeded++ 91 | 92 | await publishDocuments(res.documentsToPublish, paths.project) 93 | 94 | progress.documentNumbers.published += res.documentsToPublish.length 95 | } catch (error: any) { 96 | progress.documentNumbers.failed++ 97 | progress.errorSummary.captureError(error) 98 | 99 | errorLogger.write({ 100 | error, 101 | inputDocumentRef: doc.toDocumentRef(), 102 | }) 103 | } 104 | 105 | next(progress) 106 | }, 107 | }) 108 | 109 | progress.stage = 'DONE' 110 | next(progress) 111 | 112 | errorLogger.close() 113 | 114 | await pipeController.stop() 115 | }) 116 | } 117 | -------------------------------------------------------------------------------- /packages/sdk/src/project/get-project-name.ts: -------------------------------------------------------------------------------- 1 | import { readFile } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | 4 | import { assertProjectName } from '@typestream/core-protocol/utils' 5 | 6 | export async function getProjectName(path: string) { 7 | const packageJsonPath = join(path, 'package.json') 8 | const packageJson = await loadPackageJson(packageJsonPath) 9 | 10 | if (packageJson.name && packageJson.typestreamProject) { 11 | assertProjectName(packageJson.name) 12 | return packageJson.name 13 | } 14 | 15 | throw new Error(`${path} is not a valid typestream-project!`) 16 | } 17 | 18 | async function loadPackageJson(path: string) { 19 | try { 20 | const packageJson = JSON.parse(await readFile(path, { encoding: 'utf-8' })) 21 | return packageJson as { 22 | name?: string 23 | typestreamProject?: boolean 24 | } 25 | } catch { 26 | throw new Error(`Could not load package.json at ${path}!`) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/providers/cloud-storage-provider.ts: -------------------------------------------------------------------------------- 1 | import { join } from 'node:path' 2 | 3 | import { Bucket, File, GetFilesOptions, Storage } from '@google-cloud/storage' 4 | import { 5 | CloudStorageDocument, 6 | CloudStorageResource, 7 | CloudStoragePublishMetadata, 8 | } from '@typestream/core-protocol/resources' 9 | 10 | import { SampleCachingProvider } from '../samples/sample-provider.js' 11 | import { ResourceProvider } from './resource-provider.js' 12 | 13 | export class CloudStorageResourceProvider extends ResourceProvider { 14 | async *getDocuments() { 15 | const { 16 | bucket: bucketName, 17 | cloudStorageProject, 18 | pathPrefix, 19 | } = this.resource.options 20 | const storage = new Storage({ projectId: cloudStorageProject }) 21 | const bucket = new Bucket(storage, bucketName) 22 | 23 | let nextQuery: GetFilesOptions = { 24 | prefix: pathPrefix, 25 | autoPaginate: false, 26 | } 27 | 28 | let files: File[] 29 | 30 | while (nextQuery) { 31 | ;[files, nextQuery] = await bucket.getFiles(nextQuery) 32 | for (const file of files) { 33 | const download = async () => { 34 | const [data] = await file.download() 35 | return data 36 | } 37 | 38 | const path = file.name.slice(pathPrefix?.length ?? 0) 39 | yield new CloudStorageDocument( 40 | this.resource, 41 | { ...file.metadata, slicedPath: path }, 42 | download, 43 | ) 44 | } 45 | } 46 | } 47 | 48 | async *getSamples() { 49 | const sampleProvider = new SampleCachingProvider({ 50 | resource: this.resource, 51 | projectPaths: this.projectPaths, 52 | documentStream: this.getDocuments(), 53 | }) 54 | 55 | for await (const sample of sampleProvider.load()) { 56 | yield new CloudStorageDocument(this.resource, sample.meta, sample.read) 57 | } 58 | } 59 | 60 | async cacheSamples({ count = 200, validateCounts = false }) { 61 | const sampleProvider = new SampleCachingProvider({ 62 | resource: this.resource, 63 | projectPaths: this.projectPaths, 64 | documentStream: this.getDocuments(), 65 | }) 66 | await sampleProvider.initSample({ sampleSize: count, validateCounts }) 67 | } 68 | 69 | publishDocument(data: Buffer, metadata: CloudStoragePublishMetadata) { 70 | const storage = new Storage() 71 | const bucket = storage.bucket(this.resource.options.bucket) 72 | 73 | const pathPrefix = this.resource.options.pathPrefix 74 | const path = join(pathPrefix, metadata.name) 75 | 76 | return bucket 77 | .file(path) 78 | .save(data, { metadata: metadata.metadata, resumable: false }) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/providers/file-resource-provider.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, readFile, writeFile } from 'node:fs/promises' 2 | import { join, join as joinPath, parse } from 'node:path' 3 | 4 | import { FILE_NAME_PATTERN } from '@typestream/core-protocol' 5 | import { 6 | FileDocument, 7 | FileResource, 8 | FilePublishMetadata, 9 | } from '@typestream/core-protocol/resources' 10 | 11 | import { getFilesIn } from '../../utils/read-dir.js' 12 | import { ResourceProvider } from './resource-provider.js' 13 | 14 | export class FileResourceProvider extends ResourceProvider { 15 | private sampleCount: number | undefined = undefined 16 | 17 | async *getDocuments() { 18 | const options = this.resource.options 19 | 20 | for await (const path of getFilesIn(options.basePath, { 21 | recursive: options.recursive, 22 | })) { 23 | const read = async () => { 24 | return readFile(joinPath(options.basePath, path)) 25 | } 26 | yield new FileDocument(this.resource, { path }, read) 27 | } 28 | } 29 | 30 | async *getSamples(): AsyncGenerator { 31 | if (!this.sampleCount) 32 | throw new Error( 33 | 'Cache samples not initialized. Implement "provider.cacheSamples({count:...})" before "getSamples()".', 34 | ) 35 | let fetched = 0 36 | for await (const doc of this.getDocuments()) { 37 | yield doc 38 | fetched++ 39 | if (fetched === this.sampleCount) return 40 | } 41 | } 42 | 43 | async cacheSamples({ count = 100 }): Promise { 44 | this.sampleCount = count 45 | } 46 | 47 | async publishDocument( 48 | data: Buffer, 49 | metadata: FilePublishMetadata, 50 | ): Promise { 51 | const name = metadata.name 52 | if (!FILE_NAME_PATTERN.test(name)) 53 | throw new Error(`File name does not match: ${FILE_NAME_PATTERN.source}`) 54 | 55 | const { dir, base } = parse(name) 56 | const basePath = join(this.resource.options.basePath, dir) 57 | 58 | // create the target directory if it doesn't exist yet 59 | await mkdir(basePath, { recursive: true }) 60 | return writeFile(join(basePath, base), data) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/providers/index.ts: -------------------------------------------------------------------------------- 1 | import { AnyResource } from '@typestream/core-protocol/resources' 2 | 3 | import { ProjectPaths } from '../../paths/project-paths.js' 4 | import { CloudStorageResourceProvider } from './cloud-storage-provider.js' 5 | import { FileResourceProvider } from './file-resource-provider.js' 6 | import { S3ResourceProvider } from './s3-provider.js' 7 | 8 | const RESOURCE_PROVIDERS = { 9 | file: FileResourceProvider, 10 | gcs: CloudStorageResourceProvider, 11 | s3: S3ResourceProvider, 12 | } 13 | 14 | function getResourceProviderClass(res: AnyResource) { 15 | const Provider = 16 | RESOURCE_PROVIDERS[res.type as keyof typeof RESOURCE_PROVIDERS] 17 | if (Provider == null) 18 | throw new Error(`Could not find provider for resource type ${res.type}`) 19 | 20 | return Provider 21 | } 22 | 23 | export function getResourceProvider( 24 | res: AnyResource, 25 | projectPaths: ProjectPaths, 26 | ) { 27 | const Provider = getResourceProviderClass(res) 28 | 29 | const provider = new Provider(res, projectPaths) 30 | 31 | return provider 32 | } 33 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/providers/resource-provider.ts: -------------------------------------------------------------------------------- 1 | import { 2 | DocumentOfResource, 3 | AnyResource, 4 | PublishMetadataOfResource, 5 | DataOfDocument, 6 | } from '@typestream/core-protocol/resources' 7 | 8 | import { ProjectPaths } from '../../paths/project-paths.js' 9 | 10 | export abstract class ResourceProvider { 11 | constructor(public resource: Res, protected projectPaths: ProjectPaths) {} 12 | 13 | abstract getDocuments(): AsyncGenerator> 14 | 15 | abstract getSamples(): AsyncGenerator> 16 | 17 | abstract cacheSamples(options: { 18 | count: number 19 | validateCounts: boolean 20 | }): Promise 21 | 22 | abstract publishDocument( 23 | data: DataOfDocument>, 24 | metadata: PublishMetadataOfResource, 25 | ): Promise 26 | } 27 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/providers/s3-provider.ts: -------------------------------------------------------------------------------- 1 | import { join } from 'node:path' 2 | 3 | import { S3 } from '@aws-sdk/client-s3' 4 | import { 5 | S3Document, 6 | S3PublishMetadata, 7 | S3Resource, 8 | } from '@typestream/core-protocol/resources' 9 | 10 | import { readFullStream } from '../../utils/read-full-stream.js' 11 | import { SampleCachingProvider } from '../samples/sample-provider.js' 12 | import { ResourceProvider } from './resource-provider.js' 13 | 14 | export class S3ResourceProvider extends ResourceProvider { 15 | async *getDocuments() { 16 | const { region, bucket, pathPrefix } = this.resource.options 17 | const prefix = join(this.resource.name, pathPrefix) 18 | 19 | const s3 = new S3({ region }) 20 | 21 | const options = { Bucket: bucket, Prefix: prefix } 22 | let { Contents, NextContinuationToken } = await s3.listObjectsV2(options) 23 | 24 | do { 25 | if (Contents == null) break 26 | 27 | for (const object of Contents) { 28 | const { Key } = object 29 | if (!Key) throw new Error(`Received s3 object without a key`) 30 | 31 | // eslint-disable-next-line unicorn/consistent-function-scoping 32 | const download = async () => { 33 | const res = await s3.getObject({ Bucket: bucket, Key }) 34 | const data = await readFullStream(res.Body as any) 35 | return data 36 | } 37 | 38 | const slicedPath = Key.slice(pathPrefix?.length ?? 0) 39 | yield new S3Document(this.resource, { slicedPath }, download) 40 | } 41 | 42 | const res = await s3.listObjectsV2({ 43 | ...options, 44 | ContinuationToken: NextContinuationToken, 45 | }) 46 | ;({ Contents, NextContinuationToken } = res) 47 | } while (NextContinuationToken) 48 | } 49 | 50 | async *getSamples() { 51 | const sampleProvider = new SampleCachingProvider({ 52 | resource: this.resource, 53 | projectPaths: this.projectPaths, 54 | documentStream: this.getDocuments(), 55 | }) 56 | 57 | for await (const sample of sampleProvider.load()) { 58 | yield new S3Document(this.resource, sample.meta, sample.read) 59 | } 60 | } 61 | 62 | async cacheSamples({ count = 200, validateCounts = false }) { 63 | const sampleProvider = new SampleCachingProvider({ 64 | resource: this.resource, 65 | projectPaths: this.projectPaths, 66 | documentStream: this.getDocuments(), 67 | }) 68 | await sampleProvider.initSample({ sampleSize: count, validateCounts }) 69 | } 70 | 71 | async publishDocument(data: Buffer, metadata: S3PublishMetadata) { 72 | const s3 = new S3({ region: this.resource.options.region }) 73 | 74 | const { pathPrefix, bucket } = this.resource.options 75 | const keyPath = join(pathPrefix, metadata.name) 76 | 77 | await s3.putObject({ 78 | Bucket: bucket, 79 | Key: keyPath, 80 | Body: data, 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/resource-paths.ts: -------------------------------------------------------------------------------- 1 | import { join } from 'node:path' 2 | 3 | import { ProjectPaths } from '../paths/project-paths.js' 4 | 5 | export type ResourceSamplePath = { 6 | /** Path the resource folder itself */ 7 | resourcePath: string 8 | 9 | /** Path to the resource config.json */ 10 | configPath: string 11 | 12 | /** Path to the document meta files */ 13 | metaPath: string 14 | 15 | /** Path to the actual document data files */ 16 | dataPath: string 17 | } 18 | 19 | export function getResourceSamplePaths( 20 | resourceName: string, 21 | projectPaths: ProjectPaths, 22 | ) { 23 | const resourcePath = join(projectPaths.resourcesDir, resourceName) 24 | 25 | const res: ResourceSamplePath = { 26 | resourcePath, 27 | configPath: join(resourcePath, 'config.json'), 28 | metaPath: join(resourcePath, 'meta'), 29 | dataPath: join(resourcePath, 'data'), 30 | } 31 | 32 | return res 33 | } 34 | 35 | export type ResourceSampleDocumentPaths = { 36 | metaFile: string 37 | dataFile: string 38 | id: string 39 | } 40 | 41 | export function getResourceSampleDocumentPaths( 42 | id: string, 43 | resourcePaths: ResourceSamplePath, 44 | ): ResourceSampleDocumentPaths { 45 | const sanitizedId = Buffer.from(id).toString('base64') 46 | return { 47 | id, 48 | dataFile: join(resourcePaths.dataPath, sanitizedId), 49 | metaFile: join(resourcePaths.metaPath, `${sanitizedId}.json`), 50 | } 51 | } 52 | 53 | export function decodeDocumentSampleId(encodedId: string) { 54 | return Buffer.from(encodedId, 'base64').toString('utf-8') 55 | } 56 | -------------------------------------------------------------------------------- /packages/sdk/src/resources/samples/sample-provider.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | 4 | import { PromisePool } from '@supercharge/promise-pool' 5 | import { 6 | DocumentOfResource, 7 | AnyResource, 8 | } from '@typestream/core-protocol/resources' 9 | import ct from 'chalk-template' 10 | import ora from 'ora' 11 | 12 | import { ProjectPaths } from '../../paths/project-paths.js' 13 | import { askYesNo } from '../../utils/ask.js' 14 | import { asyncGeneratorToArray } from '../../utils/async-gen-to-array.js' 15 | import { 16 | decodeDocumentSampleId, 17 | getResourceSampleDocumentPaths, 18 | getResourceSamplePaths, 19 | ResourceSampleDocumentPaths, 20 | ResourceSamplePath, 21 | } from '../resource-paths.js' 22 | 23 | type SampleProviderOptions = { 24 | resource: Res 25 | projectPaths: ProjectPaths 26 | documentStream: AsyncGenerator> 27 | } 28 | 29 | export class SampleCachingProvider { 30 | private readonly path: ResourceSamplePath 31 | public readonly resource: Res 32 | private readonly documentStream: AsyncGenerator> 33 | 34 | constructor({ 35 | documentStream, 36 | projectPaths, 37 | resource, 38 | }: SampleProviderOptions) { 39 | this.resource = resource 40 | this.documentStream = documentStream 41 | this.path = getResourceSamplePaths(resource.name, projectPaths) 42 | } 43 | 44 | private async save(doc: DocumentOfResource) { 45 | const paths = getResourceSampleDocumentPaths(doc.id, this.path) 46 | 47 | await mkdir(join(paths.dataFile, '..'), { recursive: true }) 48 | await mkdir(join(paths.metaFile, '..'), { recursive: true }) 49 | 50 | await writeFile(paths.dataFile, await doc.read()) 51 | await writeFile(paths.metaFile, JSON.stringify(doc.metadata)) 52 | 53 | return { 54 | id: doc.id, 55 | meta: doc.metadata, 56 | read: () => readFile(paths.dataFile), 57 | } 58 | } 59 | 60 | async *load(): AsyncGenerator<{ 61 | id: string 62 | meta: any 63 | read: () => Promise 64 | }> { 65 | const allCachedSamples = await this.getAllCachedSamples() 66 | 67 | for (const sample of allCachedSamples) { 68 | yield await this.loadSample(sample) 69 | } 70 | 71 | if (allCachedSamples.length === 0) { 72 | console.log( 73 | `No samples are cached for resource "${this.resource.name}" [${this.resource.type}].`, 74 | ) 75 | } 76 | } 77 | 78 | private async loadSample(path: ResourceSampleDocumentPaths) { 79 | const read = async () => await readFile(path.dataFile) 80 | const meta = JSON.parse( 81 | await readFile(path.metaFile, { encoding: 'utf-8' }), 82 | ) 83 | 84 | return { id: path.id, meta, read } 85 | } 86 | 87 | protected async getAllCachedSamples() { 88 | try { 89 | const files = await readdir(this.path.dataPath) 90 | const ids = files.filter(x => x !== '.DS_store') 91 | 92 | const res: ResourceSampleDocumentPaths[] = ids.map(x => 93 | getResourceSampleDocumentPaths(decodeDocumentSampleId(x), this.path), 94 | ) 95 | 96 | return res 97 | } catch { 98 | return [] 99 | } 100 | } 101 | 102 | public async initSample({ 103 | sampleSize = 100, 104 | validateCounts = false, 105 | }): Promise { 106 | const newConfigStr = JSON.stringify(this.resource.options, undefined, ' ') 107 | 108 | // Tries to load the old config. If none exists, it will fail and the new one will be initialized. 109 | 110 | const oldConfigStr = await tryReadConfig(this.path.configPath) 111 | if (oldConfigStr && oldConfigStr === newConfigStr) { 112 | // If validate counts is true (on the first run of a pipe in watch) and the config stayed the same, we want to cache/delete old samples 113 | if (validateCounts) await this.cacheSamples({ sampleSize }) 114 | return 115 | } 116 | 117 | if (oldConfigStr) { 118 | const yes = await askYesNo( 119 | ct`The config of resource "${this.resource.name}" [${this.resource.type}] changed. Do you want to clear the cache?`, 120 | ) 121 | if (!yes) { 122 | console.log( 123 | 'Skipping deletion of old cached files. This could lead to inconsistencies.', 124 | ) 125 | return 126 | } 127 | 128 | await setupCleanCache(this.path, newConfigStr) 129 | } 130 | 131 | await this.cacheSamples({ sampleSize }) 132 | } 133 | 134 | protected async cacheSamples({ sampleSize }: { sampleSize: number }) { 135 | const allCachedSamples = await this.getAllCachedSamples() 136 | // If we already have the desired size, there is nothing to do 137 | if (allCachedSamples.length === sampleSize) return 138 | 139 | const spinner = ora( 140 | `Preparing samples of resource "${this.resource.name}"...`, 141 | ).start() 142 | if (allCachedSamples.length < sampleSize) { 143 | // If the number of samples is lower than the desired size, new samples are loaded 144 | const knownIds = new Set(allCachedSamples.map(x => x.id)) 145 | const samples = await asyncGeneratorToArray(this.documentStream, { 146 | maxCount: sampleSize, 147 | }) 148 | await PromisePool.for(samples) 149 | .withConcurrency(10) 150 | .process(async (doc: DocumentOfResource) => { 151 | if (!knownIds.has(doc.id)) { 152 | const percentage = ((knownIds.size / sampleSize) * 100).toFixed(2) 153 | knownIds.add(doc.id) 154 | spinner.text = `Loading samples [${percentage}%] "${doc.id}" of resource "${this.resource.name}"` 155 | await this.save(doc) 156 | } 157 | }) 158 | } else { 159 | // Otherwise, we have too many samples and need to delete some. 160 | const excessSamples = allCachedSamples.slice(sampleSize) 161 | for (const samplePath of excessSamples) { 162 | spinner.text = `Deleting sample "${samplePath.id}" of resource "${this.resource.name}"` 163 | await rm(samplePath.dataFile) 164 | await rm(samplePath.metaFile) 165 | } 166 | } 167 | 168 | spinner.succeed('Finished preparing samples.') 169 | } 170 | } 171 | 172 | async function setupCleanCache( 173 | paths: ResourceSamplePath, 174 | newConfigStr: string, 175 | ) { 176 | try { 177 | await rm(paths.resourcePath, { force: true, recursive: true }) 178 | } finally { 179 | await mkdir(paths.resourcePath, { recursive: true }) 180 | await writeFile(paths.configPath, newConfigStr) 181 | } 182 | } 183 | 184 | async function tryReadConfig(path: string) { 185 | try { 186 | const config = await readFile(path, { encoding: 'utf8' }) 187 | return config 188 | } catch { 189 | return 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /packages/sdk/src/runner/pipe-controller.ts: -------------------------------------------------------------------------------- 1 | import { ChildProcess, fork } from 'node:child_process' 2 | import { fileURLToPath } from 'node:url' 3 | 4 | import { AnyDocument, ResourceRef } from '@typestream/core-protocol/resources' 5 | import pDefer from 'p-defer' 6 | 7 | import { DumpFunction } from '../utils/data-dumper.js' 8 | import { 9 | CustomMessage, 10 | getCapturedSchemasRef, 11 | loadPipeRef, 12 | processDocumentRef, 13 | } from './runner-functions.js' 14 | 15 | const WORKER_PATH = fileURLToPath(new URL('pipe-executer.js', import.meta.url)) 16 | 17 | export class PipeController { 18 | private childProcess: ChildProcess 19 | 20 | private untilChildProcessReady = pDefer() 21 | private untilChildProcessExit = pDefer() 22 | 23 | public resourceRef: ResourceRef | undefined 24 | 25 | // Rpc Functions 26 | private initializeRunner: ReturnType 27 | private processDocumentCaller: ReturnType< 28 | typeof processDocumentRef.createCallable 29 | > 30 | private getCapturedSchemasCaller: ReturnType< 31 | typeof getCapturedSchemasRef.createCallable 32 | > 33 | 34 | private dumpFunction: DumpFunction | undefined = undefined 35 | 36 | constructor(public readonly debuggingEnabled = false) { 37 | this.childProcess = fork(WORKER_PATH, { 38 | // Ignore stdin, stdout, and stderr, but set up channel for IPC 39 | stdio: ['ignore', 'ignore', 'ignore', 'ipc'], 40 | execArgv: this.debuggingEnabled ? ['--inspect'] : [], 41 | serialization: 'advanced', 42 | }) 43 | this.attachMessageHandlers() 44 | 45 | this.initializeRunner = loadPipeRef.createCallable(this.childProcess) 46 | this.processDocumentCaller = processDocumentRef.createCallable( 47 | this.childProcess, 48 | ) 49 | this.getCapturedSchemasCaller = getCapturedSchemasRef.createCallable( 50 | this.childProcess, 51 | ) 52 | } 53 | 54 | async processDocument(doc: AnyDocument) { 55 | const data = await doc.read() 56 | const documentRef = doc.toDocumentRef() 57 | const processingRes = await this.processDocumentCaller.call({ 58 | documentRef, 59 | data, 60 | exposeErrors: this.debuggingEnabled, 61 | }) 62 | 63 | return processingRes 64 | } 65 | 66 | private attachMessageHandlers() { 67 | this.childProcess.on('message', message => 68 | this.handleCustomMessage(message as any), 69 | ) 70 | 71 | this.childProcess.on('error', error => 72 | this.untilChildProcessExit.reject(error), 73 | ) 74 | this.childProcess.on('exit', () => this.untilChildProcessExit.resolve()) 75 | } 76 | 77 | private handleCustomMessage(message: CustomMessage) { 78 | if (message.type === 'ready') { 79 | this.untilChildProcessReady.resolve() 80 | return 81 | } 82 | if (message.type === 'dump') { 83 | void this.dumpFunction?.({ 84 | data: message.data, 85 | name: message.name, 86 | skipDuplicates: message.skipDuplicates, 87 | }) 88 | } 89 | } 90 | 91 | public async loadPipe( 92 | pipeName: string, 93 | options: { 94 | enableSchemaCapturing: boolean 95 | dumpFunction?: DumpFunction 96 | enableWriting: boolean 97 | }, 98 | ) { 99 | if (this.resourceRef) 100 | throw new Error('A pipe is already loaded into this process!') 101 | await Promise.race([ 102 | this.untilChildProcessReady.promise, 103 | delay(3000).then(() => { 104 | throw new Error( 105 | 'The process is not responding. Try to restart tyst watch.', 106 | ) 107 | }), 108 | ]) 109 | 110 | const { resourceRef } = await this.initializeRunner.call({ 111 | pipeName, 112 | captureSchemaSamples: options.enableSchemaCapturing, 113 | enableDumpFunctionality: options.dumpFunction !== undefined, 114 | enableWriting: options.enableWriting, 115 | }) 116 | this.resourceRef = resourceRef 117 | this.dumpFunction = options.dumpFunction 118 | 119 | return resourceRef 120 | } 121 | 122 | public async getCapturedSchemas() { 123 | const schemas = await this.getCapturedSchemasCaller.call({}) 124 | return schemas 125 | } 126 | 127 | public async stop() { 128 | this.childProcess.kill() 129 | await this.untilChildProcessExit.promise 130 | } 131 | } 132 | 133 | function delay(ms: number): Promise { 134 | return new Promise(resolve => setTimeout(() => resolve(), ms)) 135 | } 136 | -------------------------------------------------------------------------------- /packages/sdk/src/runner/pipe-executer.ts: -------------------------------------------------------------------------------- 1 | import { BundleSchema, PublishArgs } from '@typestream/core-protocol' 2 | import { AnyResource } from '@typestream/core-protocol/resources' 3 | 4 | import { loadAllPaths } from '../paths/load-all-paths.js' 5 | import { loadPipeBundle } from '../pipe/bundling/load-pipe-bundle.js' 6 | import { SchemaCapturer } from '../typing/schema-sample-capturer.js' 7 | import { catchButNotReally } from '../utils/catch-but-not-really.js' 8 | import { 9 | CustomMessage, 10 | getCapturedSchemasRef, 11 | loadPipeRef, 12 | processDocumentRef, 13 | } from './runner-functions.js' 14 | 15 | let enableSchemaCapturing = false 16 | let enableDump = false 17 | 18 | function main() { 19 | implementPipeLoading() 20 | 21 | sendMessage({ type: 'ready' }) 22 | } 23 | 24 | implementSchemaCapturing() 25 | implementDump() 26 | 27 | function implementPipeLoading() { 28 | loadPipeRef.implement( 29 | process, 30 | async ({ 31 | pipeName, 32 | captureSchemaSamples, 33 | enableDumpFunctionality, 34 | enableWriting, 35 | }) => { 36 | globalThis.typestreamWritingActive = enableWriting 37 | 38 | enableSchemaCapturing = captureSchemaSamples 39 | enableDump = enableDumpFunctionality 40 | 41 | const paths = await loadAllPaths(pipeName) 42 | const bundle = await loadPipeBundle(paths.pipe.bundleFileName) 43 | 44 | implementDocumentProcessing(bundle) 45 | 46 | return { resourceRef: bundle.resource.toResourceRef() } 47 | }, 48 | ) 49 | } 50 | 51 | function implementSchemaCapturing() { 52 | const schemaCapturer = new SchemaCapturer() 53 | 54 | globalThis.typestreamCaptureTypeSample = (name, data) => { 55 | if (enableSchemaCapturing) schemaCapturer.captureTypeSample({ name, data }) 56 | } 57 | 58 | getCapturedSchemasRef.implement(process, async () => { 59 | return [...schemaCapturer.schemaMap.values()] 60 | }) 61 | } 62 | 63 | function implementDump() { 64 | globalThis.typestreamWriteDump = ({ data, name, skipDuplicates }) => { 65 | if (enableDump) sendMessage({ type: 'dump', data, name, skipDuplicates }) 66 | } 67 | } 68 | 69 | function implementDocumentProcessing(bundle: BundleSchema.Bundle) { 70 | processDocumentRef.implement( 71 | process, 72 | async ({ data, documentRef, exposeErrors }) => { 73 | const documentsToPublish: PublishArgs[] = [] 74 | const doc = bundle.resource.buildDocument(documentRef, async () => data) 75 | 76 | const callBundle = () => 77 | bundle.call(doc, { 78 | publish: x => { 79 | documentsToPublish.push(x) 80 | x.resource 81 | }, 82 | }) 83 | 84 | // `catchButNotReally` is used to allow the debugger to jump to errors from 85 | // the pipe while still allowing us to catch and react to errors here 86 | await (exposeErrors ? catchButNotReally(callBundle) : callBundle()) 87 | 88 | return { documentsToPublish } 89 | }, 90 | ) 91 | } 92 | 93 | function sendMessage(message: CustomMessage) { 94 | process.send!(message) 95 | } 96 | 97 | void main() 98 | -------------------------------------------------------------------------------- /packages/sdk/src/runner/rpc.ts: -------------------------------------------------------------------------------- 1 | import { ChildProcess } from 'node:child_process' 2 | import { randomUUID } from 'node:crypto' 3 | 4 | import pDefer, { DeferredPromise } from 'p-defer' 5 | 6 | type RemoteRequestObject = { 7 | type: 'ipc-request' 8 | name: string 9 | data: { messageId: string; data: T } 10 | } 11 | 12 | type RemoteResponseError = { 13 | name: string 14 | message: string 15 | stack?: string 16 | } 17 | 18 | type RemoteResponseObject = { 19 | type: 'ipc-response' 20 | name: string 21 | messageId: string 22 | } & ( 23 | | { succeeded: true; data: T } 24 | | { succeeded: false; error: RemoteResponseError } 25 | ) 26 | 27 | const registerdHandlers = new Map< 28 | string, 29 | IpcFunctionCaller | IpcFunctionImplementation 30 | >() 31 | 32 | function ensureJustOneInProcess( 33 | name: string, 34 | handler: IpcFunctionCaller | IpcFunctionImplementation, 35 | ) { 36 | const alreadyRegistred = registerdHandlers.get(name) 37 | if (alreadyRegistred) 38 | throw new Error( 39 | `A handler (${alreadyRegistred.constructor.name}) for ${alreadyRegistred.name} is already registred in this process. Can not register another ${handler.constructor.name}!`, 40 | ) 41 | } 42 | 43 | export class IpcFunctionRef { 44 | constructor(public readonly name: string) {} 45 | 46 | public implement( 47 | peerProcess: NodeJS.Process, 48 | fn: (arg: FunctionArgs) => Promise, 49 | ) { 50 | return new IpcFunctionImplementation( 51 | this.name, 52 | peerProcess, 53 | fn, 54 | ) 55 | } 56 | public createCallable(peerProcess: ChildProcess) { 57 | return new IpcFunctionCaller( 58 | this.name, 59 | peerProcess, 60 | ) 61 | } 62 | } 63 | 64 | export class IpcDisconnectError extends Error { 65 | constructor() { 66 | super('The process exited before a response was received!') 67 | this.name = IpcDisconnectError.name 68 | } 69 | } 70 | 71 | class IpcFunctionCaller { 72 | private pendingRequests = new Map>() 73 | 74 | constructor(public readonly name: string, private peerProcess: ChildProcess) { 75 | ensureJustOneInProcess(name, this) 76 | if (!this.peerProcess.send) 77 | throw new Error( 78 | 'process.send() is not available. The passed in process must be a child process.', 79 | ) 80 | 81 | peerProcess.on('exit', () => { 82 | for (const promise of this.pendingRequests.values()) { 83 | promise.reject(new IpcDisconnectError()) 84 | } 85 | }) 86 | 87 | this.peerProcess.on( 88 | 'message', 89 | (message: RemoteResponseObject) => { 90 | if (message.name === this.name && message.type === 'ipc-response') { 91 | void this.receiveResponse(message) 92 | } 93 | }, 94 | ) 95 | } 96 | 97 | private sendMessage(message: RemoteRequestObject) { 98 | this.peerProcess.send(message) 99 | } 100 | 101 | private receiveResponse(message: RemoteResponseObject) { 102 | const promise = this.pendingRequests.get(message.messageId) 103 | if (!promise) throw new Error('Unknown message received!') 104 | if (message.succeeded) { 105 | promise.resolve(message.data) 106 | } else { 107 | const error = deserializeError(message.error) 108 | promise.reject(error) 109 | } 110 | } 111 | 112 | async call(data: FunctionArgs): Promise { 113 | const requestId = randomUUID() 114 | const p = pDefer() 115 | this.pendingRequests.set(requestId, p) 116 | 117 | this.sendMessage({ 118 | name: this.name, 119 | type: 'ipc-request', 120 | data: { data, messageId: requestId }, 121 | }) 122 | 123 | return await p.promise 124 | } 125 | } 126 | 127 | class IpcFunctionImplementation { 128 | constructor( 129 | public readonly name: string, 130 | private process: NodeJS.Process, 131 | private fn: (arg0: Request) => Promise, 132 | ) { 133 | ensureJustOneInProcess(name, this) 134 | if (!this.process.send) 135 | throw new Error( 136 | 'process.send() is not available. The passed in process must be a child process.', 137 | ) 138 | this.process.on('message', (message: RemoteRequestObject) => { 139 | if (message.name === this.name && message.type === 'ipc-request') { 140 | void this.receiveRequest(message) 141 | } 142 | }) 143 | } 144 | 145 | async receiveRequest(message: RemoteRequestObject) { 146 | try { 147 | const resp = await this.fn(message.data.data) 148 | this.sendMessage({ 149 | name: this.name, 150 | type: 'ipc-response', 151 | messageId: message.data.messageId, 152 | succeeded: true, 153 | data: resp, 154 | }) 155 | } catch (rawError) { 156 | const error = serializeError(rawError as Error) 157 | 158 | this.sendMessage({ 159 | name: this.name, 160 | type: 'ipc-response', 161 | messageId: message.data.messageId, 162 | succeeded: false, 163 | error, 164 | }) 165 | } 166 | } 167 | 168 | private sendMessage(message: RemoteResponseObject) { 169 | this.process.send!(message) 170 | } 171 | } 172 | 173 | function serializeError(error: Error): RemoteResponseError { 174 | return { 175 | name: error.name, 176 | message: error.message, 177 | stack: error.stack, 178 | } 179 | } 180 | 181 | function deserializeError(error: RemoteResponseError): Error { 182 | // eslint-disable-next-line unicorn/error-message 183 | const deserializedError = new Error() 184 | 185 | deserializedError.name = error.name 186 | deserializedError.message = error.message 187 | deserializedError.stack = error.stack 188 | 189 | return deserializedError 190 | } 191 | -------------------------------------------------------------------------------- /packages/sdk/src/runner/runner-functions.ts: -------------------------------------------------------------------------------- 1 | import { PublishArgs } from '@typestream/core-protocol' 2 | import { 3 | AnyResource, 4 | DocumentRef, 5 | ResourceRef, 6 | } from '@typestream/core-protocol/resources' 7 | 8 | import { FullSchema } from '../typing/schema-sample-capturer.js' 9 | import { IpcFunctionRef } from './rpc.js' 10 | 11 | export const loadPipeRef = new IpcFunctionRef< 12 | { 13 | pipeName: string 14 | captureSchemaSamples: boolean 15 | enableDumpFunctionality: boolean 16 | enableWriting: boolean 17 | }, 18 | { resourceRef: ResourceRef } 19 | >('loadPipe') 20 | 21 | type DocumentProcessResult = { 22 | documentsToPublish: PublishArgs[] 23 | } 24 | 25 | export const processDocumentRef = new IpcFunctionRef< 26 | { documentRef: DocumentRef; data: any; exposeErrors: boolean }, 27 | DocumentProcessResult 28 | >('processDocument') 29 | 30 | export const getCapturedSchemasRef = new IpcFunctionRef<{}, FullSchema[]>( 31 | 'getCapturedSchemas', 32 | ) 33 | 34 | export type CustomMessage = { 35 | type: string 36 | } & ( 37 | | { type: 'ready' } 38 | | { type: 'dump'; data: any; name: string; skipDuplicates: boolean } 39 | ) 40 | -------------------------------------------------------------------------------- /packages/sdk/src/typing/generate-type-map.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, readdir, writeFile } from 'node:fs/promises' 2 | import { parse } from 'node:path' 3 | 4 | import { ProjectPaths } from '../paths/project-paths.js' 5 | 6 | export async function createTypeMap(projectPaths: ProjectPaths) { 7 | await mkdir(projectPaths.inferredTypesDir, { recursive: true }) 8 | const typeFiles = await readdir(projectPaths.inferredTypesDir) 9 | 10 | const filteredTypeFiles = typeFiles.filter(x => /\.ts$/.test(x)) 11 | 12 | const keys = filteredTypeFiles.map(x => parse(x).name) 13 | 14 | const importLines = keys.map( 15 | key => `import { ${key} } from './keys/${key}.js'`, 16 | ) 17 | 18 | const interfaceProperties = keys.map(key => `${key}: ${key}`) 19 | 20 | const body = `declare global { 21 | namespace TypeStream { 22 | export interface Types { 23 | ${interfaceProperties.map(x => ` ${x}`).join('\n')} 24 | } 25 | } 26 | }` 27 | 28 | const file = [...importLines, '', body].join('\n') + '\n' 29 | 30 | await mkdir(projectPaths.typesPath, { recursive: true }) 31 | await writeFile(projectPaths.typeMapPath, file) 32 | } 33 | -------------------------------------------------------------------------------- /packages/sdk/src/typing/process-schemas.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, readFile, writeFile } from 'node:fs/promises' 2 | import { join as joinPath } from 'node:path' 3 | 4 | import ct from 'chalk-template' 5 | import { Schema, mergeSchemas, isSubset } from 'genson-js' 6 | 7 | import { ProjectPaths } from '../paths/project-paths.js' 8 | import { askOptions } from '../utils/ask.js' 9 | import { createTypeMap } from './generate-type-map.js' 10 | import { FullSchema } from './schema-sample-capturer.js' 11 | import { typescriptFromSchema } from './types-from-json-schema.js' 12 | 13 | export async function processSchemas( 14 | schemas: FullSchema[], 15 | projectPaths: ProjectPaths, 16 | ) { 17 | await mkdir(projectPaths.typesPath, { recursive: true }) 18 | await mkdir(projectPaths.inferredTypesDir, { recursive: true }) 19 | 20 | for (const fullSchema of schemas) { 21 | await writeTypeToFile(projectPaths, fullSchema) 22 | } 23 | 24 | await createTypeMap(projectPaths) 25 | } 26 | 27 | async function writeTypeToFile(paths: ProjectPaths, fullSchema: FullSchema) { 28 | const typeFilesDir = paths.inferredTypesDir 29 | 30 | const typeFilePath = joinPath(typeFilesDir, `${fullSchema.name}.ts`) 31 | const schemaFilePath = joinPath( 32 | typeFilesDir, 33 | `${fullSchema.name}.schema.json`, 34 | ) 35 | 36 | const oldSchema = await tryLoadSchema(schemaFilePath) 37 | 38 | const newFullSchema = await mergeOrExtendSchemas(oldSchema, fullSchema) 39 | if (!newFullSchema) return 40 | 41 | const typesFile = await typescriptFromSchema(newFullSchema) 42 | 43 | const schemaFile = JSON.stringify(newFullSchema.schema, undefined, ' ') 44 | 45 | await writeFile(schemaFilePath, schemaFile) 46 | await writeFile(typeFilePath, typesFile) 47 | 48 | console.log( 49 | ct`{dim If the new types don't show up, restart your TS Server or editor.}`, 50 | ) 51 | } 52 | 53 | /** 54 | * Returns null when the schema is the same or returns the new merged FullSchema 55 | */ 56 | async function mergeOrExtendSchemas( 57 | oldSchema: Schema | undefined, 58 | newSchema: FullSchema, 59 | ) { 60 | if (oldSchema) { 61 | const schemasEqual = isSubset(oldSchema, newSchema.schema) 62 | if (schemasEqual) return null 63 | 64 | const answer = await askOptions( 65 | ct`The schema of "{cyan ${newSchema.name}}" got new fields. 66 | Do you want to {yellow overwrite (reccomended)} or {yellow extend} the types?`, 67 | { 68 | overwrite: 'overwrite (reccomended)', 69 | extend: 'extend (use with small sample size)', 70 | }, 71 | ) 72 | if (answer === 'overwrite') { 73 | console.log(` Finished replacing types for ${newSchema.name}!`) 74 | return newSchema 75 | } else if (answer === 'extend') { 76 | const results = { 77 | ...newSchema, 78 | schema: mergeSchemas([oldSchema, newSchema.schema]), 79 | } 80 | console.log(` Finished extending types for ${newSchema.name}!`) 81 | return results 82 | } 83 | } else { 84 | console.log(` Finished creating new types for ${newSchema.name}!`) 85 | return newSchema 86 | } 87 | } 88 | 89 | async function tryLoadSchema(path: string) { 90 | try { 91 | const data = await readFile(path) 92 | return JSON.parse(data.toString('utf8')) as Schema 93 | } catch { 94 | return 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /packages/sdk/src/typing/schema-sample-capturer.ts: -------------------------------------------------------------------------------- 1 | import '@typestream/core-protocol' 2 | import { assertGeneratedTypeName } from '@typestream/core-protocol/utils' 3 | import { createSchema, extendSchema, Schema } from 'genson-js' 4 | 5 | type SchemaMap = Map 6 | 7 | export type FullSchema = { name: string; schema: Schema; samples: number } 8 | 9 | export class SchemaCapturer { 10 | public readonly schemaMap: SchemaMap = new Map() 11 | 12 | captureTypeSample({ name, data }: { name: string; data: any }) { 13 | assertGeneratedTypeName(name) 14 | 15 | if (!this.schemaMap.has(name)) 16 | this.schemaMap.set(name, { 17 | name, 18 | schema: createSchema(data), 19 | samples: 1, 20 | }) 21 | else { 22 | const fullSchema = this.schemaMap.get(name)! 23 | fullSchema.samples++ 24 | fullSchema.schema = extendSchema(fullSchema.schema, data) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /packages/sdk/src/typing/types-from-json-schema.ts: -------------------------------------------------------------------------------- 1 | import { compile as compileTsFromSchema } from 'json-schema-to-typescript' 2 | 3 | import { FullSchema } from './schema-sample-capturer.js' 4 | 5 | export async function typescriptFromSchema(fullSchema: FullSchema) { 6 | //The "json-schema-to-typescript" library alters the schema in some very rare cases. 7 | const copiedSchema = JSON.parse(JSON.stringify(fullSchema.schema)) 8 | 9 | const schema = await compileTsFromSchema(copiedSchema, fullSchema.name, { 10 | bannerComment: `/* eslint-disable unicorn/filename-case */ 11 | 12 | /** 13 | * Auto generated types by TypeStream. 14 | * Do not modify 15 | */`, 16 | }) 17 | 18 | // We need to cut out lines that contain `[k: string]: unknown` because we don't allow unknown properties 19 | const result = schema 20 | .split('\n') 21 | .filter(x => !x.includes('[k: string]: unknown')) 22 | .join('\n') 23 | 24 | return result 25 | } 26 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/array-utils.ts: -------------------------------------------------------------------------------- 1 | export type SortDirection = 'asc' | 'desc' 2 | 3 | /** Made to be used with `.sort` (e.g. `.sort(basedOnKey('size', 'desc'))`). */ 4 | export function basedOnKey>( 5 | key: keyof T, 6 | direction: SortDirection, 7 | ) { 8 | return basedOn(_ => _[key], direction) 9 | } 10 | 11 | /** Made to be used with `.sort` (e.g. `.sort(basedOn(_ => _.size, 'desc'))`). */ 12 | export function basedOn( 13 | argFn: (arg: T) => number, 14 | direction: SortDirection, 15 | ) { 16 | const sortFn = 17 | direction === 'asc' 18 | ? (x: number, y: number) => x - y 19 | : (x: number, y: number) => y - x 20 | 21 | return (a: T, b: T) => sortFn(argFn(a), argFn(b)) 22 | } 23 | 24 | /** 25 | * Like `basedOn`, but with support for multiple sorting criteria: 26 | * 27 | * ``` 28 | * items.sort(basedOnMultiple([ 29 | * [_ => _.size, 'desc'], 30 | * [_ => _.createdAt, 'asc'], 31 | * ])) 32 | * ``` 33 | */ 34 | export function basedOnMultiple( 35 | criteria: [(arg: T) => number, SortDirection][], 36 | ) { 37 | const sortFns = criteria.map(([argFn, direction]) => 38 | basedOn(argFn, direction), 39 | ) 40 | 41 | return (a: T, b: T) => { 42 | for (const sortFn of sortFns) { 43 | const result = sortFn(a, b) 44 | if (result !== 0) return result 45 | } 46 | 47 | return 0 48 | } 49 | } 50 | 51 | /** Made to be used with `.reduce` (`.reduce(...toSum)`). */ 52 | export const toSum = [(sum: number, value: number) => sum + value, 0] as const 53 | 54 | export function sumOf(numbers: number[]) { 55 | return numbers.reduce(...toSum) 56 | } 57 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/ask.ts: -------------------------------------------------------------------------------- 1 | import { exit } from 'node:process' 2 | 3 | import prompt from 'prompts' 4 | 5 | import { logReplace } from './log-replace.js' 6 | 7 | export async function askYesNo(question: string) { 8 | logReplace.clear() 9 | 10 | const answer = await prompt( 11 | { 12 | type: 'toggle', 13 | name: 'value', 14 | message: question, 15 | initial: true, 16 | active: 'yes', 17 | inactive: 'no', 18 | }, 19 | { onCancel: () => exit() }, 20 | ) 21 | 22 | return answer.value as boolean 23 | } 24 | 25 | export async function askOptions( 26 | question: string, 27 | answers: Record, 28 | ) { 29 | logReplace.clear() 30 | 31 | const choices = Object.entries(answers).map(([id, label]) => ({ 32 | value: id, 33 | title: label as string, 34 | })) 35 | 36 | const answer = await prompt( 37 | { 38 | type: 'select', 39 | name: 'value', 40 | message: question, 41 | choices, 42 | initial: 0, 43 | }, 44 | { onCancel: () => exit() }, 45 | ) 46 | 47 | return answer.value as T 48 | } 49 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/async-gen-to-array.ts: -------------------------------------------------------------------------------- 1 | export async function asyncGeneratorToArray( 2 | generator: AsyncGenerator, 3 | { maxCount = Number.POSITIVE_INFINITY }: { maxCount?: number } = {}, 4 | ) { 5 | const results: T[] = [] 6 | 7 | for await (const element of generator) { 8 | results.push(element) 9 | if (results.length === maxCount) break 10 | } 11 | 12 | return results 13 | } 14 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/catch-but-not-really.ts: -------------------------------------------------------------------------------- 1 | let alreadyWaitingForRejection = false 2 | 3 | /** 4 | * Arguably fairly hacky function that can be used to catch an error thrown by 5 | * an async function without the debugger (e.g. VS Code) considering the error 6 | * to be handled. 7 | * 8 | * The alternative would be to manually enable "Caught exception" 9 | * handling in the debugger settings which has the very bad side effect of 10 | * including a _lot_ of very uninteresting exceptions from libraries. 11 | * 12 | * _**Note:** Make sure to always wait for the last invocation of this function 13 | * to finish before calling it again. Calling it while it's waiting for another 14 | * invocation to finish will result in an exception!_ 15 | */ 16 | export function catchButNotReally(fn: () => Promise) { 17 | return new Promise((resolve, reject) => { 18 | if (alreadyWaitingForRejection) 19 | throw new Error('Already waiting for unhandled rejection!') 20 | alreadyWaitingForRejection = true 21 | 22 | const handleRejection = (error: any) => { 23 | alreadyWaitingForRejection = false 24 | reject(error) 25 | } 26 | process.once('unhandledRejection', handleRejection) 27 | 28 | void fn().then(value => { 29 | // Remove reject listener if promise resolves successfully 30 | process.off('uncaughtException', handleRejection) 31 | 32 | alreadyWaitingForRejection = false 33 | resolve(value) 34 | }) 35 | }) 36 | } 37 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/chalk-extensions.ts: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | 3 | /** 4 | * Chalk extensions that makes it easy to create those bold colored status 5 | * indicator boxes like `[Done!]` or `[Failed!]`. 6 | * 7 | * @example 8 | * console.log(`${chalk.red(boldBox('Done!'))} Finished successfully!`) 9 | */ 10 | export function boldBox(contents: string) { 11 | return chalk.inverse.bold( 12 | `${chalk.hidden('[')}${contents}${chalk.hidden(']')}`, 13 | ) 14 | } 15 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/data-dumper.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, writeFile } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | 4 | import { Subject, throttle, throttleTime } from 'rxjs' 5 | 6 | import { ProjectPaths } from '../paths/project-paths.js' 7 | 8 | type DumpFunctionOptions = { 9 | name: string 10 | data: any 11 | skipDuplicates: boolean 12 | } 13 | 14 | export type DumpFunction = (options: DumpFunctionOptions) => void 15 | 16 | export class DataDumper { 17 | private dumpsMap = new Map< 18 | string, 19 | { 20 | filePath: string 21 | entries: string[] 22 | writingSubject: Subject 23 | } 24 | >() 25 | constructor(private projectPaths: ProjectPaths) {} 26 | 27 | /** 28 | * Get all the active dump streams for logging. 29 | */ 30 | get activeDumps() { 31 | const res: { key: string; path: string }[] = [] 32 | this.dumpsMap.forEach((val, key) => { 33 | res.push({ key, path: val.filePath }) 34 | }) 35 | 36 | return res 37 | } 38 | 39 | dump({ data, name, skipDuplicates }: DumpFunctionOptions) { 40 | const filePath = join(this.projectPaths.dumpDir, `${name}.json`) 41 | 42 | if (!this.dumpsMap.has(name)) { 43 | assertDumpName(name) 44 | const writingSubject = new Subject() 45 | let lastPromise = Promise.resolve() 46 | 47 | writingSubject 48 | .pipe( 49 | throttleTime(100, undefined, { leading: false, trailing: true }), 50 | throttle(() => lastPromise, { leading: false, trailing: true }), 51 | ) 52 | .subscribe(() => { 53 | lastPromise = this.entryToFile(name) 54 | }) 55 | 56 | this.dumpsMap.set(name, { 57 | filePath, 58 | entries: [], 59 | writingSubject, 60 | }) 61 | } 62 | const currentElem = this.dumpsMap.get(name)! 63 | if (data === undefined) data = null 64 | const stringifiedData = JSON.stringify(data, undefined, ' ') 65 | 66 | if (!skipDuplicates || !currentElem.entries.includes(stringifiedData)) { 67 | currentElem.entries.push(stringifiedData) 68 | } 69 | 70 | currentElem.writingSubject.next() 71 | } 72 | 73 | async entryToFile(name: string) { 74 | await mkdir(this.projectPaths.dumpDir, { recursive: true }) 75 | const currentElem = this.dumpsMap.get(name)! 76 | 77 | const joinedEntries = currentElem.entries.map(x => indent(x)).join(',\n') 78 | 79 | const fullText = `[\n${joinedEntries}\n]` 80 | 81 | await writeFile(currentElem.filePath, fullText) 82 | } 83 | } 84 | 85 | function indent(str: string) { 86 | const lines = [] 87 | for (const row of str.split(/\n/)) { 88 | lines.push(` ${row}`) 89 | } 90 | return lines.join('\n') 91 | } 92 | 93 | function assertDumpName(name: string) { 94 | if (/[\d -_a-z]/i.test(name)) return 95 | throw new Error( 96 | `Dump name "${name}" not valid. You can only use numbers, letters, spaces, dashes and underscores!`, 97 | ) 98 | } 99 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/error-logger.ts: -------------------------------------------------------------------------------- 1 | import { createWriteStream, mkdirSync, WriteStream } from 'node:fs' 2 | import { join as joinPath } from 'node:path' 3 | 4 | import { DocumentRef } from '@typestream/core-protocol/resources' 5 | import { differenceInSeconds } from 'date-fns' 6 | 7 | import { ProjectPaths } from '../paths/project-paths.js' 8 | 9 | export class ErrorLogger { 10 | private writeStream: WriteStream 11 | private startTime: Date 12 | 13 | constructor(projectPaths: ProjectPaths, private pipeName: string) { 14 | this.startTime = new Date() 15 | 16 | const logsPath = joinPath(projectPaths.path, 'logs') 17 | mkdirSync(logsPath, { recursive: true }) 18 | const timeStr = this.startTime.toISOString().replaceAll(':', '-') 19 | const path = joinPath(logsPath, `${pipeName}_${timeStr}.log`) 20 | 21 | this.writeStream = createWriteStream(path, { autoClose: true }) 22 | 23 | this.writeStream.write( 24 | `[${new Date().toISOString()}] Started processing\n\n`, 25 | ) 26 | } 27 | 28 | write({ 29 | error, 30 | inputDocumentRef, 31 | }: { 32 | error: Error 33 | inputDocumentRef: DocumentRef 34 | }) { 35 | const now = new Date().toISOString() 36 | const stack = error.stack 37 | ? '\n' + 38 | error.stack 39 | .split('\n') 40 | .map(x => ` ${x}`) 41 | .join('\n') 42 | : '' 43 | 44 | this.writeStream.write( 45 | `[${now}] Error at: ${this.pipeName}\n Input file :${JSON.stringify( 46 | inputDocumentRef, 47 | )}\n Message: ${error.message}${stack}\n\n`, 48 | ) 49 | } 50 | 51 | close() { 52 | const finishTime = new Date() 53 | 54 | const seconds = differenceInSeconds(finishTime, this.startTime) 55 | 56 | this.writeStream.write( 57 | `[${finishTime.toISOString()}] Finished processing in ${readableTime( 58 | seconds, 59 | )}`, 60 | ) 61 | this.writeStream.close() 62 | } 63 | } 64 | 65 | function readableTime(seconds: number) { 66 | if (seconds <= 60) { 67 | return `${Math.round(seconds)} sec` 68 | } 69 | 70 | const mins = Math.floor(seconds / 60) 71 | const truncatedSeconds = Math.round(seconds - mins * 60) 72 | 73 | if (mins <= 60) { 74 | return `${mins} min ${truncatedSeconds} sec` 75 | } 76 | 77 | const hours = Math.floor(mins / 60) 78 | const truncatedMins = Math.round(mins - hours * 60) 79 | 80 | return `${hours} h ${truncatedMins} min ${truncatedSeconds} sec` 81 | } 82 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/load-project-env.ts: -------------------------------------------------------------------------------- 1 | import { existsSync } from 'node:fs' 2 | 3 | import { config } from 'dotenv-safe' 4 | 5 | export function loadProjectEnv() { 6 | // As the cli can also be used without an existing project, 7 | // first check if the there is an .env.example file existing 8 | // If there isn't, skip 9 | if (!existsSync('.env.example')) return 10 | 11 | config() 12 | } 13 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/log-replace.ts: -------------------------------------------------------------------------------- 1 | import ansiEscapes from 'ansi-escapes' 2 | import cliCursor from 'cli-cursor' 3 | 4 | /** 5 | * Our own replacement for the log-update package because it 6 | * struggles with outputs that are taller than the current terminal. 7 | * 8 | * To deal with this, we simply clear the whole terminal (including history) 9 | * and then write our own output to the terminal. 10 | * 11 | * See: 12 | * - https://github.com/vadimdemedes/ink/issues/382 13 | * - https://github.com/sindresorhus/log-update/issues/51 14 | */ 15 | export const logReplace = { 16 | write(text: string) { 17 | cliCursor.hide() 18 | process.stdout.write(ansiEscapes.clearTerminal) 19 | process.stdout.write(text) 20 | }, 21 | clear() { 22 | process.stdout.write(ansiEscapes.clearTerminal) 23 | cliCursor.show() 24 | }, 25 | } 26 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/observe-async.ts: -------------------------------------------------------------------------------- 1 | import { Observable } from 'rxjs' 2 | 3 | // Taken from https://github.com/shroudedcode/apk-mitm/blob/v1.2.1/src/utils/observe-async.ts 4 | 5 | /** 6 | * Wraps an async function and produces an `Observable` that reacts to the 7 | * function resolving (`complete` notification), rejecting (`error` 8 | * notification), and calling the `next` callback (`next` notification), making 9 | * it easier to write `async`/`await`-based code that reports its progress 10 | * through an `Observable` *without* forgetting to handle errors. 11 | */ 12 | export default function observeAsync( 13 | fn: (next: (value: T) => void) => Promise, 14 | ): Observable { 15 | return new Observable(subscriber => { 16 | fn(value => subscriber.next(value)) 17 | .then(() => subscriber.complete()) 18 | .catch(error => subscriber.error(error)) 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/observe-directory.ts: -------------------------------------------------------------------------------- 1 | import { watch } from 'chokidar' 2 | import { Observable } from 'rxjs' 3 | 4 | interface FileWatchEvent { 5 | type: 'add' | 'addDir' | 'change' | 'unlink' | 'unlinkDir' 6 | path: string 7 | } 8 | 9 | export function observeDirectory(path: string) { 10 | return new Observable(subscriber => { 11 | const watcher = watch(path, { 12 | ignoreInitial: true, 13 | ignored: ['**/generated-types/**'], 14 | }) 15 | 16 | watcher.on('all', (type, path) => { 17 | subscriber.next({ type, path }) 18 | }) 19 | 20 | watcher.on('error', error => subscriber.error(error)) 21 | 22 | return async () => { 23 | await watcher.close() 24 | } 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/promise-pool.ts: -------------------------------------------------------------------------------- 1 | export async function promisePool({ 2 | concurrency, 3 | generator, 4 | fn, 5 | }: { 6 | generator: AsyncGenerator | Generator 7 | fn: (arg0: T) => Promise 8 | concurrency: number 9 | }) { 10 | let stop = false 11 | 12 | const workerFn = async () => { 13 | while (!stop) { 14 | const { value, done } = await generator.next() 15 | if (done) { 16 | stop = true 17 | break 18 | } 19 | await fn(value) 20 | } 21 | } 22 | 23 | /** 24 | * PromisePool creates concurrency-many workers and fills them with promises for workers that process. 25 | */ 26 | const workers: Promise[] = [] 27 | for (let i = 0; i < concurrency; i++) { 28 | workers.push(workerFn()) 29 | } 30 | 31 | await Promise.all(workers) 32 | } 33 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/read-dir.ts: -------------------------------------------------------------------------------- 1 | import { readdir } from 'node:fs/promises' 2 | import { join } from 'node:path' 3 | 4 | export async function* getFilesIn( 5 | directoryPath: string, 6 | { recursive = false } = {}, 7 | ): AsyncGenerator { 8 | // Filters the raw generator to remove `.DS_Store` files. 9 | for await (const name of getFilesInRaw({ 10 | baseDir: directoryPath, 11 | recursive, 12 | })) { 13 | if (!name.includes('.DS_Store')) yield name 14 | } 15 | } 16 | 17 | async function* getFilesInRaw({ 18 | baseDir, 19 | recursive = false, 20 | nestingPath = '', 21 | }: { 22 | baseDir: string 23 | recursive?: boolean 24 | nestingPath?: string 25 | }): AsyncGenerator { 26 | const currentPath = join(baseDir, nestingPath) 27 | const entries = await readdir(currentPath, { withFileTypes: true }) 28 | 29 | for (const entry of entries) { 30 | if (entry.isFile()) { 31 | const res = join(nestingPath, entry.name) 32 | yield res 33 | } 34 | 35 | if (recursive && entry.isDirectory()) { 36 | const newNesting = join(nestingPath, entry.name) 37 | yield* getFilesInRaw({ 38 | baseDir, 39 | nestingPath: newNesting, 40 | recursive: true, 41 | }) 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /packages/sdk/src/utils/read-full-stream.ts: -------------------------------------------------------------------------------- 1 | import { Readable } from 'node:stream' 2 | 3 | export async function readFullStream(stream: Readable) { 4 | const chunks: Buffer[] = [] 5 | for await (const chunk of stream) chunks.push(chunk) 6 | return Buffer.concat(chunks) 7 | } 8 | -------------------------------------------------------------------------------- /packages/sdk/src/watch/error-summary.ts: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | 3 | import { basedOn, toSum } from '../utils/array-utils.js' 4 | import { boldBox } from '../utils/chalk-extensions.js' 5 | 6 | /** 7 | * Class used by the `watch` command to capture errors during pipe execution and 8 | * print a summary of the most common errors afterwards. 9 | */ 10 | export class ErrorSummary { 11 | private errorMap: Map = new Map() 12 | 13 | public captureError(error: Error) { 14 | if (!this.errorMap.has(error.message)) { 15 | this.errorMap.set(error.message, []) 16 | } 17 | this.errorMap.get(error.message)!.push(error) 18 | } 19 | 20 | public isEmpty() { 21 | return this.errorMap.size === 0 22 | } 23 | 24 | public render({ onlyShowTop }: { onlyShowTop?: number } = {}) { 25 | const lines = [] 26 | 27 | const errors = [...this.errorMap.entries()] 28 | .map(([message, instances]) => ({ message, instances })) 29 | .sort(basedOn(error => error.instances.length, 'desc')) 30 | 31 | const topErrorTypes = errors.slice(0, onlyShowTop) 32 | 33 | lines.push( 34 | chalk.red('The following errors occurred:\n'), 35 | ...topErrorTypes.flatMap(error => this.renderError(error)), 36 | ) 37 | 38 | const totalErrorCount = errors 39 | .map(errors => errors.instances.length) 40 | .reduce(...toSum) 41 | const topErrorCount = topErrorTypes 42 | .map(errors => errors.instances.length) 43 | .reduce(...toSum) 44 | 45 | const additionalErrors = totalErrorCount - topErrorCount 46 | if (additionalErrors > 0) { 47 | lines.push('', chalk.red.italic(`+ ${additionalErrors} other errors`)) 48 | } 49 | 50 | return lines 51 | } 52 | 53 | private renderError(errorType: { message: string; instances: Error[] }) { 54 | const error = errorType.instances[0] 55 | const errorName = error.name ?? 'UnknownError' 56 | const count = errorType.instances.length 57 | 58 | const messageLines = errorType.message.split('\n') 59 | const firstMessageLine = messageLines.at(0) 60 | const remainingMessageLines = messageLines.slice(1) 61 | 62 | const countString = `${count}x` 63 | 64 | const prefix = boldBox(countString) + ` ${chalk.bold(errorName)}:` 65 | // We have to manually calculate the length due to ANSI escape codes 66 | const prefixLength = 1 + countString.length + 2 + errorName.length + 1 67 | 68 | const firstLine = chalk.red( 69 | `${prefix} ${firstMessageLine ?? chalk.italic('Unknown error!')}`, 70 | ) 71 | const remainingLines = remainingMessageLines.map(line => 72 | chalk.red(`${' '.repeat(prefixLength)} ${line}`), 73 | ) 74 | 75 | return [firstLine, ...remainingLines] 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /packages/sdk/src/watch/render-watch-progress.ts: -------------------------------------------------------------------------------- 1 | import { relative } from 'node:path' 2 | 3 | import chalk from 'chalk' 4 | import ct from 'chalk-template' 5 | 6 | import { boldBox } from '../utils/chalk-extensions.js' 7 | import { WatchProgress } from './watch-progress.js' 8 | 9 | export function renderWatchProgress( 10 | progress: WatchProgress, 11 | { error }: { error?: Error } = {}, 12 | ) { 13 | const documentCounts = renderDocumentCounts(progress) 14 | const dumpFiles = renderDumpFiles(progress) 15 | 16 | const lines: string[] = [ 17 | '▶ ' + chalk.bold(renderProgressingLine(progress)), 18 | ...(documentCounts ? ['', documentCounts] : []), 19 | ...(error ? ['', ...chalk.red(error.message).split('\n')] : []), 20 | ...(dumpFiles ? ['', dumpFiles] : []), 21 | ] 22 | 23 | if (!progress.errorSummary.isEmpty()) 24 | lines.push('', ...progress.errorSummary.render({ onlyShowTop: 3 })) 25 | 26 | return ['', ...lines, ''].map(line => ` ${line}`).join('\n') 27 | } 28 | 29 | function renderProgressingLine(progress: WatchProgress) { 30 | if (progress.stage === 'BUILD') return 'Building pipe code…' 31 | if (progress.stage === 'DONE') 32 | return `Finished processing all ${progress.documentNumbers.currentDocumentNumber} documents!` 33 | 34 | const documentNumber = progress.documentNumbers.currentDocumentNumber 35 | 36 | return ( 37 | `Processing document ${documentNumber}` + 38 | (progress.documentNumbers.total 39 | ? ` out of ${progress.documentNumbers.total}…` 40 | : '…') 41 | ) 42 | } 43 | 44 | function renderDumpFiles(progress: WatchProgress) { 45 | if (!progress.dumpFiles || progress.dumpFiles.length === 0) return 46 | 47 | const dumpFiles = progress.dumpFiles.map(x => { 48 | const cwd = process.cwd() 49 | const path = relative(cwd, x.path) 50 | return ct` - {bold ${x.key}}: ${path}` 51 | }) 52 | 53 | return chalk.gray(`Currently dumping into: 54 | ${dumpFiles.join('\n')}`) 55 | } 56 | 57 | function renderDocumentCounts(progress: WatchProgress) { 58 | const rawCounts = progress.documentNumbers 59 | if (progress.stage === 'BUILD') return '' 60 | if (rawCounts.failed + rawCounts.succeeded === 0) 61 | return chalk.gray(`${boldBox('0')} documents processed so far`) 62 | 63 | const counts = [ 64 | { 65 | value: rawCounts.succeeded, 66 | label: 'succeeded', 67 | color: chalk.green, 68 | }, 69 | { value: rawCounts.failed, label: 'failed', color: chalk.red }, 70 | { value: rawCounts.published, label: 'results', color: chalk.blue }, 71 | ] 72 | 73 | return counts 74 | .flatMap(count => { 75 | if (count.value === 0) return [] 76 | return count.color(`${boldBox(count.value.toString())} ${count.label}`) 77 | }) 78 | .join(chalk.dim(' | ')) 79 | } 80 | -------------------------------------------------------------------------------- /packages/sdk/src/watch/run-pipe.ts: -------------------------------------------------------------------------------- 1 | import { 2 | AnyDocument, 3 | resourceFromRef, 4 | } from '@typestream/core-protocol/resources' 5 | 6 | import { PipePath } from '../paths/pipe-paths.js' 7 | import { ProjectPaths } from '../paths/project-paths.js' 8 | import { buildPipe } from '../pipe/bundling/build.js' 9 | import { getResourceProvider } from '../resources/providers/index.js' 10 | import { PipeController } from '../runner/pipe-controller.js' 11 | import { IpcDisconnectError } from '../runner/rpc.js' 12 | import { processSchemas } from '../typing/process-schemas.js' 13 | import { asyncGeneratorToArray } from '../utils/async-gen-to-array.js' 14 | import { DataDumper } from '../utils/data-dumper.js' 15 | import observeAsync from '../utils/observe-async.js' 16 | import { ErrorSummary } from './error-summary.js' 17 | import { WatchProgress } from './watch-progress.js' 18 | 19 | interface PipeRunOptions { 20 | pipeName: string 21 | debuggingEnabled: boolean 22 | paths: { 23 | project: ProjectPaths 24 | pipe: PipePath 25 | } 26 | sampleCount: number 27 | abortSignal: AbortSignal 28 | checkSampleCounts: boolean 29 | captureSchemas: boolean 30 | } 31 | 32 | export function runPipe(options: PipeRunOptions) { 33 | return observeAsync(async next => { 34 | const progress: WatchProgress = { 35 | stage: 'BUILD', 36 | documentNumbers: { 37 | total: 0, 38 | failed: 0, 39 | succeeded: 0, 40 | published: 0, 41 | currentDocumentNumber: 0, 42 | }, 43 | errorSummary: new ErrorSummary(), 44 | } 45 | next(progress) 46 | 47 | const pipeController = new PipeController(options.debuggingEnabled) 48 | options.abortSignal.addEventListener('abort', () => { 49 | void pipeController.stop() 50 | }) 51 | const dataDumper = new DataDumper(options.paths.project) 52 | 53 | await buildPipe(options.paths.pipe) 54 | 55 | const resourceRef = await pipeController.loadPipe(options.pipeName, { 56 | enableSchemaCapturing: options.captureSchemas, 57 | dumpFunction: options => { 58 | dataDumper.dump(options) 59 | progress.dumpFiles = dataDumper.activeDumps 60 | }, 61 | enableWriting: false, 62 | }) 63 | const resource = resourceFromRef(resourceRef) 64 | 65 | const provider = getResourceProvider(resource, options.paths.project) 66 | await provider.cacheSamples({ 67 | count: options.sampleCount, 68 | validateCounts: options.checkSampleCounts, 69 | }) 70 | 71 | const samples = await asyncGeneratorToArray( 72 | provider.getSamples() as AsyncGenerator, 73 | ) 74 | 75 | progress.stage = 'PROCESS' 76 | progress.documentNumbers.total = samples.length 77 | for (const [index, doc] of samples.entries()) { 78 | progress.documentNumbers.currentDocumentNumber = index + 1 79 | 80 | next(progress) 81 | 82 | try { 83 | const result = await pipeController.processDocument(doc) 84 | 85 | progress.documentNumbers.succeeded++ 86 | progress.documentNumbers.published += result.documentsToPublish.length 87 | } catch (error: any) { 88 | if (error instanceof IpcDisconnectError) return 89 | 90 | progress.documentNumbers.failed++ 91 | progress.errorSummary.captureError(error) 92 | } 93 | 94 | next(progress) 95 | } 96 | 97 | const schemas = await pipeController.getCapturedSchemas() 98 | 99 | await pipeController.stop() 100 | 101 | progress.stage = 'DONE' 102 | next(progress) 103 | 104 | await processSchemas(schemas, options.paths.project) 105 | }) 106 | } 107 | -------------------------------------------------------------------------------- /packages/sdk/src/watch/watch-progress.ts: -------------------------------------------------------------------------------- 1 | import { ErrorSummary } from './error-summary.js' 2 | 3 | export type ActiveDumpFiles = { key: string; path: string }[] 4 | export interface WatchProgress { 5 | stage: 'BUILD' | 'PROCESS' | 'DONE' 6 | 7 | documentNumbers: DocumentNumbers 8 | 9 | /** A summary of the errors that have occurred so far. */ 10 | errorSummary: ErrorSummary 11 | 12 | /** A map of all the dump files that are currently being used by the project */ 13 | dumpFiles?: ActiveDumpFiles 14 | } 15 | 16 | interface DocumentNumbers { 17 | /** The number of the document currently being processed. */ 18 | currentDocumentNumber: number 19 | 20 | /** How many documents are currently being processed in total. */ 21 | total?: number 22 | 23 | /** How many documents have been successfully processed so far. */ 24 | succeeded: number 25 | 26 | /** How many documents have failed to process so far. */ 27 | failed: number 28 | 29 | /** How many documents have been published so far. */ 30 | published: number 31 | } 32 | -------------------------------------------------------------------------------- /packages/sdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "dist", 4 | "rootDir": "src", 5 | "strict": true, 6 | "target": "ES2021", 7 | "module": "NodeNext", 8 | "skipLibCheck": true 9 | }, 10 | "include": ["src/**/*"] 11 | } 12 | -------------------------------------------------------------------------------- /packages/tyst/.npmignore: -------------------------------------------------------------------------------- 1 | # Necessary because npm otherwise completely ignores gitignored files even if 2 | # they are explicitly listed under `files` in the `package.json` file 3 | # See: https://npm.github.io/publishing-pkgs-docs/publishing/the-npmignore-file.html 4 | -------------------------------------------------------------------------------- /packages/tyst/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import chalk from 'chalk' 4 | 5 | const isNpx = process.env.npm_execpath?.includes('npx') ?? false 6 | const tystCommand = isNpx ? 'npx tyst' : 'tyst' 7 | 8 | // prettier-ignore 9 | console.log(` 10 | ${chalk.yellowBright.bold(`It seems like you're trying to use "${tystCommand}" outside of a TypeStream project!`)} 11 | 12 | You probably want to either: 13 | 14 | - ${chalk.bold('switch to directory')} containing one using ${chalk.greenBright.bold('cd ')} 15 | - ${chalk.bold('create a new project')} using ${chalk.greenBright.bold('npm init typestream')} 16 | 17 | If you're still stuck, check out ${chalk.blueBright.underline('https://typestream.dev')} for help. 18 | `) 19 | 20 | process.exit(1) 21 | -------------------------------------------------------------------------------- /packages/tyst/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tyst", 3 | "version": "0.0.6", 4 | "description": "CLI that warns people about using tyst (TypeStream CLI) outside of TypeStream projects", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/scopashq/typestream", 8 | "path": "packages/tyst" 9 | }, 10 | "license": "MIT", 11 | "author": "Scopas Technologies GmbH", 12 | "type": "module", 13 | "bin": { 14 | "tyst": "./index.js" 15 | }, 16 | "files": [ 17 | "index.js" 18 | ], 19 | "scripts": { 20 | "check-deps": "depcheck --ignores @types/node", 21 | "clean": "rimraf node_modules/" 22 | }, 23 | "dependencies": { 24 | "chalk": "^5.0.1" 25 | }, 26 | "devDependencies": { 27 | "@types/node": "16" 28 | }, 29 | "engines": { 30 | "node": ">=16.0.0" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /tsconfig.eslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { "noEmit": true }, 3 | "include": [ 4 | "./.eslintrc.cjs", 5 | "./packages/cli/bin/*.js", 6 | "./packages/create-typestream/samples/*.ts" 7 | ] 8 | } 9 | --------------------------------------------------------------------------------