├── .circleci └── config.yml ├── .editorconfig ├── .eslintignore ├── .eslintrc.js ├── .github └── workflows │ ├── beta.yml │ ├── build.yml │ ├── gh-pages.yml │ ├── markdown.yml │ └── release.yml ├── .gitignore ├── .markdown.config.json ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docker ├── Dockerfile └── Dockerfile.cpu ├── docs ├── .nojekyll ├── GLOSSORY.md ├── INSTALL.md ├── README.md ├── _navbar.md ├── _sidebar.md ├── contributing │ ├── contribute-a-script.md │ ├── guide-to-collaborator.md │ └── guide-to-contributor.md ├── faq │ ├── index.md │ ├── pipcook-framework.md │ └── plugins.md ├── images │ ├── community_qrcode.jpg │ ├── logo.png │ └── plugin-script-map.png ├── index.html ├── manual │ ├── README.md │ ├── intro-to-boa.md │ ├── intro-to-framework.md │ ├── intro-to-pipeline.md │ ├── intro-to-script.md │ ├── pipcook-models.md │ └── pipcook-tools.md ├── meta │ └── PROJECT_GUIDE.md ├── rfcs │ ├── 0000-new-pipeline.md │ └── 0001-framework-migration.md ├── spec │ ├── dataset.md │ └── script.md ├── tutorials │ ├── component-image-classification.md │ ├── machine-learning-overview.md │ └── using-python-functions-in-nodejs.md └── zh-cn │ ├── GLOSSORY.md │ ├── INSTALL.md │ ├── README.md │ ├── _navbar.md │ ├── _sidebar.md │ ├── contributing │ ├── contribute-a-script.md │ ├── guide-to-collaborator.md │ └── guide-to-contributor.md │ ├── faq │ ├── index.md │ ├── pipcook-framework.md │ └── plugins.md │ ├── manual │ ├── intro-to-boa.md │ ├── intro-to-framework.md │ ├── intro-to-pipeline.md │ ├── intro-to-script.md │ └── pipcook-tools.md │ ├── spec │ ├── dataset.md │ └── script.md │ └── tutorials │ ├── component-image-classification.md │ ├── machine-learning-overview.md │ └── using-python-functions-in-nodejs.md ├── example └── pipelines │ ├── README-CN.md │ ├── README.md │ ├── image-classification-mobilenet.json │ ├── image-classification-resnet.json │ ├── object-detection-yolo.json │ └── text-classification-bayes.json ├── lerna.json ├── notebooks ├── pipcook_image_classification.ipynb └── pipcook_object_detection.ipynb ├── package-lock.json ├── package.json ├── packages ├── cli │ ├── .npmignore │ ├── .nycrc │ ├── package-lock.json │ ├── package.json │ ├── serve-resource │ │ ├── image │ │ │ └── index.html │ │ └── text │ │ │ └── index.html │ ├── src │ │ ├── bin │ │ │ ├── pipcook.test.ts │ │ │ └── pipcook.ts │ │ ├── constants │ │ │ ├── index.test.ts │ │ │ └── index.ts │ │ ├── runtime.test.ts │ │ ├── runtime.ts │ │ ├── standalone-impl.test.ts │ │ ├── standalone-impl.ts │ │ └── utils │ │ │ ├── cache.test.ts │ │ │ ├── cache.ts │ │ │ ├── framework.test.ts │ │ │ ├── framework.ts │ │ │ ├── index.test.ts │ │ │ ├── index.ts │ │ │ ├── plugin.test.ts │ │ │ ├── plugin.ts │ │ │ ├── post-predict.test.ts │ │ │ ├── post-predict.ts │ │ │ ├── predict-databset.test.ts │ │ │ ├── predict-dataset.ts │ │ │ ├── script.test.ts │ │ │ ├── script.ts │ │ │ ├── serve-predict.test.ts │ │ │ └── serve-predict.ts │ └── tsconfig.json ├── core │ ├── .gitignore │ ├── .npmignore │ ├── package-lock.json │ ├── package.json │ ├── src │ │ ├── artifact.ts │ │ ├── dataset-pool │ │ │ ├── format │ │ │ │ ├── coco.test.ts │ │ │ │ ├── coco.ts │ │ │ │ ├── csv.test.ts │ │ │ │ ├── csv.ts │ │ │ │ ├── index.ts │ │ │ │ ├── pascal-voc.test.ts │ │ │ │ └── pascal-voc.ts │ │ │ ├── index.test.ts │ │ │ ├── index.ts │ │ │ ├── pipeline-type │ │ │ │ ├── image-classification.test.ts │ │ │ │ ├── image-classification.ts │ │ │ │ ├── index.ts │ │ │ │ ├── object-detection.test.ts │ │ │ │ ├── object-detection.ts │ │ │ │ └── text-classification.ts │ │ │ └── types │ │ │ │ ├── format │ │ │ │ ├── coco.ts │ │ │ │ ├── csv.ts │ │ │ │ ├── index.ts │ │ │ │ └── pascal-voc.ts │ │ │ │ ├── index.ts │ │ │ │ └── pipeline-type │ │ │ │ ├── image-classification.ts │ │ │ │ ├── index.ts │ │ │ │ ├── object-detection.ts │ │ │ │ └── text-classification.ts │ │ ├── index.ts │ │ └── runtime.ts │ └── tsconfig.json └── costa │ ├── .gitignore │ ├── .npmignore │ ├── .nycrc │ ├── benchmark │ ├── bootstrap.js │ ├── makefile │ └── plugin-install.js │ ├── package-lock.json │ ├── package.json │ ├── src │ ├── constans.ts │ ├── index.test.ts │ ├── index.ts │ ├── types.ts │ └── utils.ts │ └── tsconfig.json ├── test └── pipelines │ ├── image-classification-mobilenet.json │ ├── image-classification-resnet.json │ ├── object-detection-yolo.json │ └── text-classification-bayes.json ├── tools ├── benchmark.sh ├── coverage.sh ├── mkdoc.sh └── run_pipeline.sh └── tsconfig.json /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | node: circleci/node@3.0.0 4 | 5 | jobs: 6 | benchmark: 7 | docker: 8 | - image: cimg/node:14.11.0 9 | steps: 10 | - checkout 11 | - run: bash ./tools/benchmark.sh 12 | 13 | workflows: 14 | node-bench: 15 | when: 16 | condition: 17 | euqal: [master, << pipeline.git.branch >>] 18 | jobs: 19 | - benchmark 20 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | 2 | root = true 3 | 4 | [*] 5 | insert_final_newline = true 6 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | docs 3 | dist 4 | test 5 | pipcook_venv 6 | output 7 | example 8 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | 'env': { 3 | 'node': true, 4 | 'es6': true 5 | }, 6 | 'extends': [ 7 | 'eslint:recommended', 8 | 'plugin:@typescript-eslint/eslint-recommended', 9 | 'plugin:@typescript-eslint/recommended' 10 | ], 11 | 'rules': { 12 | 'quotes': [ 'error', 'single' ], 13 | 'no-trailing-spaces': 'error', 14 | 'prefer-const': 'warn', 15 | 'no-useless-escape': 'off', 16 | '@typescript-eslint/no-var-requires': 'off', 17 | '@typescript-eslint/camelcase': 'off', 18 | '@typescript-eslint/no-explicit-any': 'off', 19 | '@typescript-eslint/explicit-function-return-type': 'off', 20 | '@typescript-eslint/no-use-before-define': [ 21 | 'error', 22 | { 23 | 'functions': false 24 | } 25 | ], 26 | '@typescript-eslint/no-unused-vars': [ 'error' ], 27 | 'comma-spacing': [ 28 | 'error', 29 | { 30 | 'before': false, 'after': true 31 | } 32 | ], 33 | 'indent': [ 'error', 2 ], 34 | 'keyword-spacing': [ 35 | 'error', { 'before': true } 36 | ], 37 | 'array-bracket-spacing': [ 'error', 'always' ], 38 | 'space-infix-ops': 'error', 39 | 'object-curly-spacing': [ 'error', 'always' ], 40 | 'semi': [ 'error', 'always' ], 41 | 'eol-last': [ 'error', 'always' ], 42 | 'comma-dangle': [ 'error', 'never' ], 43 | 'no-multi-spaces': 'error', 44 | 'no-multiple-empty-lines': 'error', 45 | 'no-irregular-whitespace': 'error', 46 | 'arrow-parens': [ 'error', 'always' ], 47 | 'arrow-spacing': [ 'error', { before: true, after: true } ], 48 | 'block-spacing': 'error', 49 | 'brace-style': [ 'error', '1tbs', { allowSingleLine: true } ], 50 | 'comma-style': 'error', 51 | 'no-unused-vars': [ 'off' ] // Use @typescript-eslint/no-unused-vars instead, otherwise there will be checked twice 52 | } 53 | }; 54 | -------------------------------------------------------------------------------- /.github/workflows/beta.yml: -------------------------------------------------------------------------------- 1 | name: Beta Build Packages 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' 5 | 6 | jobs: 7 | release: 8 | name: Release 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Delete branch 12 | uses: dawidd6/action-delete-branch@v3 13 | with: 14 | github_token: ${{ secrets.GITHUB_TOKEN }} 15 | branches: beta 16 | 17 | - name: Create Beta Branch 18 | uses: peterjgrainger/action-create-branch@v1.0.0 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | with: 22 | branch: 'beta' 23 | 24 | - name: Checkout beta 25 | uses: actions/checkout@v2 26 | with: 27 | ref: beta 28 | - name: Activity check 29 | run: | 30 | curl -sL https://api.github.com/repos/${{github.repository}}/commits?since=yesterday | jq -r '.[0]' > $HOME/commit.json 31 | date="$(jq -r '.commit.committer.date' $HOME/commit.json)" 32 | echo "Last commit is made @ $date" 33 | 34 | if [ -n "${date}" ]; then 35 | echo "UPDATED=true" >> $GITHUB_ENV 36 | fi 37 | shell: bash 38 | 39 | - name: Setup Node.js 40 | uses: actions/setup-node@v1 41 | if: env.UPDATED == 'true' 42 | with: 43 | node-version: 14 44 | 45 | - name: Install Dependencies 46 | if: env.UPDATED == 'true' 47 | run: | 48 | npm install 49 | npm run build 50 | 51 | - name: Publish npm beta packages 52 | if: env.UPDATED == 'true' 53 | run: | 54 | sha_short="$(git rev-parse --short HEAD)" 55 | echo "//registry.npmjs.org/:_authToken=${{ secrets.npm_token }}" > ~/.npmrc 56 | git config --global user.name 'pipcook' 57 | git config --global user.email 'queyue.crk@alibaba-inc.com' 58 | git add . 59 | git commit -m "bump beta version" --allow-empty 60 | npm run beta-release-tag -- --preid "${sha_short}-beta" 61 | npm run beta-release 62 | env: 63 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 64 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 65 | NPM_AUTH_TOKEN: ${{ secrets.npm_token }} 66 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths-ignore: 7 | - 'docker/**' 8 | - 'docs/**' 9 | - 'notebooks/**' 10 | - '.github/markdown.yml' 11 | - '*.md' 12 | pull_request: 13 | types: [ opened, synchronize, reopened, ready_for_review ] 14 | branches: 15 | - main 16 | paths-ignore: 17 | - 'docker/**' 18 | - 'docs/**' 19 | - 'notebooks/**' 20 | - '.github/markdown.yml' 21 | - '*.md' 22 | 23 | jobs: 24 | universal: 25 | name: Build on node ${{ matrix.node_version }} and ${{ matrix.os }} 26 | if: ${{ github.event_name == 'push' || github.event.pull_request.draft == false }} 27 | runs-on: ${{ matrix.os }} 28 | timeout-minutes: 30 29 | strategy: 30 | matrix: 31 | node_version: ['14', '16'] 32 | os: [ubuntu-latest, macOS-latest, windows-latest] 33 | steps: 34 | - name: Cancel previous runs 35 | uses: imgcook/cancel-workflow-action@81524cf38ed0e3a5865a550dde6118d26b7a5ede 36 | with: 37 | access_token: ${{ github.token }} 38 | exception: main 39 | - uses: actions/checkout@v2 40 | - name: Restore node_modules 41 | uses: actions/cache@v2 42 | with: 43 | path: | 44 | node_modules 45 | packages/*/node_modules 46 | key: ${{ runner.os }}-${{ matrix.node_version }}-nodepkg-${{ hashFiles('**/package-lock.json') }} 47 | restore-keys: | 48 | ${{ runner.os }}-nodepkg-${{ matrix.node_version }} 49 | ${{ runner.os }}-nodepkg 50 | - name: Using Node.js ${{ matrix.node_version }} 51 | uses: actions/setup-node@v1 52 | with: 53 | node-version: ${{ matrix.node_version }} 54 | - name: Building Pipcook 55 | run: | 56 | npm install 57 | npm run build 58 | du -h -d 1 59 | - name: Running unit tests 60 | if: ${{ runner.os != 'Linux' || matrix.node_version != 14 }} 61 | run: | 62 | npm run test 63 | - name: Run cov 64 | if: ${{ runner.os == 'Linux' && matrix.node_version == 14 }} 65 | run: | 66 | npm run cov && npm run cov:report 67 | - name: Coveralls 68 | if: ${{ runner.os == 'Linux' && matrix.node_version == 14 }} 69 | uses: coverallsapp/github-action@master 70 | with: 71 | github-token: ${{ secrets.GITHUB_TOKEN }} 72 | - name: Run bayes 73 | run: | 74 | node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/text-classification-bayes.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/ 75 | node ./packages/cli/dist/bin/pipcook.js predict ./my-workspace -s testword -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/ 76 | - name: Run mobilenet 77 | run: | 78 | node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/image-classification-mobilenet.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/ 79 | - name: Run resnet 80 | run: | 81 | node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/image-classification-resnet.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/ 82 | - name: Run yolo 83 | run: | 84 | node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/object-detection-yolo.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/ 85 | -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: gh-pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | deployment: 10 | runs-on: macOS-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Use Node.js 14 | uses: actions/setup-node@v1 15 | with: 16 | node-version: 14 17 | - name: Build documentation 18 | run: | 19 | npm install 20 | npm run build 21 | npm run typedoc 22 | - name: Deploy to GitHub Pages 23 | uses: peaceiris/actions-gh-pages@v3 24 | with: 25 | github_token: ${{ secrets.GITHUB_SECRET_TOKEN }} 26 | publish_dir: ./docs 27 | publish_branch: gh-pages 28 | -------------------------------------------------------------------------------- /.github/workflows/markdown.yml: -------------------------------------------------------------------------------- 1 | name: check markdown files 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | types: [ opened, synchronize, reopened, ready_for_review ] 8 | branches: 9 | - main 10 | 11 | jobs: 12 | markdown-link-check: 13 | if: ${{ github.event_name == 'push' || github.event.pull_request.draft == false }} 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - uses: gaurav-nelson/github-action-markdown-link-check@v1 18 | with: 19 | use-quiet-mode: 'no' 20 | use-verbose-mode: 'yes' 21 | config-file: '.markdown.config.json' 22 | folder-path: 'docs' 23 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Packages 2 | on: 3 | push: 4 | tags: 5 | - 'v*' 6 | 7 | jobs: 8 | release: 9 | name: Release 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout Repo 13 | uses: actions/checkout@v2 14 | 15 | - name: Setup Node.js 16 | uses: actions/setup-node@v1 17 | with: 18 | node-version: 14 19 | 20 | - name: Install Dependencies 21 | run: | 22 | npm install 23 | npm run build 24 | 25 | - name: Get release version 26 | id: get_version 27 | run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF:11})" >> $GITHUB_ENV 28 | 29 | - name: Publish check 30 | run: | 31 | ver=$RELEASE_VERSION 32 | check_pkg() { 33 | curl -sL "http://registry.npmjs.com/@pipcook/$1" | jq -r '.versions."'$2'".version' 34 | } 35 | 36 | check_image() { 37 | curl -sL "https://hub.docker.com/v2/repositories/pipcook/pipcook/tags/$1" | jq -r '.name' 38 | } 39 | 40 | if 41 | [ $(check_pkg pipcook-cli $ver) == $ver ] && 42 | [ $(check_pkg daemon $ver) == $ver ] 43 | then 44 | echo "NPM_PUBLISHED=true" >> $GITHUB_ENV 45 | fi 46 | 47 | if [ $(check_image $ver) == $ver ]; then 48 | echo "DOCKER_PUBLISHED=true" >> $GITHUB_ENV 49 | fi 50 | 51 | - name: Publish npm packages 52 | if: env.NPM_PUBLISHED != 'true' 53 | run: | 54 | echo "//registry.npmjs.org/:_authToken=${{ secrets.npm_token }}" > ~/.npmrc 55 | git config --global user.name 'pipcook' 56 | git config --global user.email 'queyue.crk@alibaba-inc.com' 57 | git add . 58 | git commit -m "bump version" --allow-empty 59 | npm run release 60 | env: 61 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 62 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 63 | NPM_AUTH_TOKEN: ${{ secrets.npm_token }} 64 | 65 | - name: Publish to Docker registry 66 | uses: docker/build-push-action@v1 67 | if: env.DOCKER_PUBLISHED != 'true' 68 | with: 69 | username: ${{ secrets.DOCKER_USERNAME }} 70 | password: ${{ secrets.DOCKER_PASSWORD }} 71 | repository: pipcook/pipcook 72 | dockerfile: docker/Dockerfile 73 | build_args: VER=${{ env.RELEASE_VERSION }} 74 | tags: "latest,${{ env.RELEASE_VERSION }}" 75 | 76 | 77 | - name: Sync from dockerhub to Aliyun 78 | env: 79 | ALI_DOCKER_USERNAME: ${{ secrets.ALI_DOCKER_USERNAME }} 80 | ALI_DOCKER_PASSWORD: ${{secrets.ALI_DOCKER_PASSWORD}} 81 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 82 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 83 | run: | 84 | wget https://github.com/AliyunContainerService/image-syncer/releases/download/v1.2.0/image-syncer-v1.2.0-linux-amd64.tar.gz 85 | tar -zxvf image-syncer-v1.2.0-linux-amd64.tar.gz 86 | 87 | echo {\"registry.cn-hangzhou.aliyuncs.com\": {\"username\": \"$ALI_DOCKER_USERNAME\", \"password\": \"$ALI_DOCKER_PASSWORD\"} \ 88 | , \"registry.hub.docker.com\": {\"username\":\"$DOCKER_USERNAME\", \"password\":\"$DOCKER_PASSWORD\"}} >> auth.json 89 | 90 | echo {\"pipcook/pipcook:latest\": \"registry.cn-hangzhou.aliyuncs.com/pipcook/pipcook:latest\", \ 91 | \"pipcook/pipcook:$RELEASE_VERSION\": \"registry.cn-hangzhou.aliyuncs.com/pipcook/pipcook:$RELEASE_VERSION\"} >> images.json 92 | 93 | ./image-syncer --auth=./auth.json --images=./images.json --namespace=pipcook --registry=registry.cn-hangzhou.aliyuncs.com --retries=3 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | output 3 | /lerna-debug.log 4 | pipcook-pipeline-* 5 | .DS_STORE 6 | .vscode/ 7 | dist 8 | tsconfig.tsbuildinfo 9 | samples/ 10 | samples*/ 11 | .pipcook-log/ 12 | pipcook_venv 13 | .temp 14 | detectron2 15 | .temp1 16 | pipcook-output/ 17 | docs/typedoc 18 | .eslintcache 19 | packages/pipboard/build 20 | .eslintcache 21 | ENV 22 | .test 23 | .pip 24 | packages/cli/assets/server/** 25 | packages/daemon/.github/** 26 | .pipcook 27 | __pycache__ 28 | .history 29 | *.pb 30 | coverage 31 | .nyc_output 32 | -------------------------------------------------------------------------------- /.markdown.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [{ 3 | "pattern": "^/.*" 4 | }, { 5 | "pattern": "*/index.html" 6 | }] 7 | } 8 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @FeelyChau @yorkie 2 | *.ts @WenheLI @yorkie 3 | 4 | /docs/ @yorkie 5 | /tools/ @FeelyChau 6 | /test/ @FeelyChau 7 | /notebooks/ @rickycao-qy 8 | 9 | /packages/cli/ @rickycao-qy 10 | /packages/core/ @WenheLI @FeelyChau 11 | /packages/costa/ @FeelyChau @yorkie 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Alibaba Open Source Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at opensource@alibaba-inc.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | We are excited that you are interested in contributing to Pipcook. Before submitting your contribution, please take a moment to read through a few small guidelines here 3 | 4 | ## Reporting Issues 5 | - We use Github issues to manage our issues. We use status to mark the progress of our issues. 6 | 7 | - Try to search for your issue, it may have already been asked, answered or even fixed in the development branch. 8 | 9 | - Check if the issue is reproducible with the latest stable version of Pipcook. If you are using a pre-release, please indicate the specific version you are using. 10 | 11 | - It is required that you clearly describe the steps necessary to reproduce the issue you are running into. If the issues are asked to provide clear descriptions for more than 5 days, we will close it immediately. 12 | 13 | - If your issue is resolved but still open, don’t hesitate to close it. In case you found a solution by yourself, it could be helpful to explain how you fixed it. 14 | 15 | ## Pull Request Guidelines 16 | - Only code that's ready for release should be committed to the main branch. All development should be done in dedicated branches. 17 | - Checkout a **new** topic branch from main branch, and merge back against main branch. 18 | - If adding new feature: 19 | - Add accompanying test case. 20 | - Provide convincing reason to add this feature. Ideally you should open a suggestion issue first and have it greenlighted before working on it. 21 | - If fixing a bug: 22 | - If you are resolving a special issue, add `(fix #xxxx[,#xxx])` (#xxxx is the issue id) in your title for a better release log, 23 | - Provide detailed description of the bug in the PR. Live demo preferred. 24 | - Add appropriate test coverage if applicable. 25 | 26 | 27 | ## Git Commit Specific 28 | - Your commits message must follow our git commit specific. 29 | - We will check your commit message, if it does not conform to the specification, the commit will be automatically refused, make sure you have read the specification above. 30 | 31 | ## Providing Feedback 32 | We are happy to hear any feedbacks or are delighted to ask any questions. You can join our Dingding Group or ask away on Stack Overflow using the tag Pipcook -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu18.04 2 | ARG VER=latest 3 | 4 | LABEL version=${VER} 5 | LABEL description="docker image for pipcook runtime" 6 | 7 | ENV TF_FORCE_GPU_ALLOW_GROWTH=true 8 | 9 | WORKDIR /root/ 10 | RUN apt-get update && apt-get install -y curl wget python git libglib2.0-0 libsm6 libxrender-dev 11 | 12 | RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - 13 | RUN apt-get install -y nodejs 14 | 15 | RUN npm install @pipcook/cli@${VER} -g --unsafe-perm 16 | ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda-11.2/targets/x86_64-linux/lib/ 17 | -------------------------------------------------------------------------------- /docker/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM node:14 2 | ARG VER=latest 3 | 4 | LABEL version=${VER} 5 | LABEL description="docker image for pipcook runtime without gpu" 6 | 7 | WORKDIR /root/ 8 | 9 | RUN npm install @pipcook/cli@${VER} -g --unsafe-perm 10 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/.nojekyll -------------------------------------------------------------------------------- /docs/GLOSSORY.md: -------------------------------------------------------------------------------- 1 | # Glossory 2 | 3 | This glossary hopes to definitively represent the tacit and explicit conventions applied in Pipcook toolkit, while providing a reference for users and contributors. It aims to describe the concepts and either detail their corresponding API or link to other relevant parts of the documentation which do so. By linking to glossary entries from the API Reference and User Guide, we may minimize redundancy and inconsistency. 4 | 5 | ### Pipboard 6 | 7 | The abbreviation of Pipcook board. On Pipboard, you can manage and operate your pipelines and plug-ins through the Web interface. 8 | 9 | ### Pipcook 10 | 11 | The Pipcook project, generally https://github.com/alibaba/pipcook. 12 | 13 | ### Pipcook Daemon 14 | 15 | It is actually responsible for the management and execution of Pipeline's components. It provides remote access to [Pipcook Tools][] and [Pipboard][] through HTTP, and also supports the ability of other clients to integrate Pipcook Daemon through HTTP. 16 | 17 | ### Pipcook Script 18 | 19 | Scripts are Lego blocks in pipeline. By selecting different scripts, you can quickly complete different pipelines to train different models. 20 | 21 | ### Pipcook Tools 22 | 23 | The abbreviation of Pipcook command-line tool, installed via `npm install @pipcook/pipcook-cli`. 24 | 25 | ### Pipeline 26 | 27 | In computing, a pipeline, also known as a data pipeline, is a set of data processing elements connected in series, where the output of one element is the input of the next one. The elements of a pipeline are often executed in parallel or in time-sliced fashion. 28 | 29 | [Pipcook Tools]: #pipcook-tools 30 | [Pipboard]: #pipboard 31 | -------------------------------------------------------------------------------- /docs/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installing Pipcook 2 | 3 | There are different ways to install [Pipcook][]: 4 | 5 | - [Install via NPM][]. This is the best approach for most users. It will provide a stable version and pre-built packages are available for most platforms. 6 | - [Build from source][]. This is best for users who want the latest-and-greatest features and aren’t afraid of running a brand-new code. This is also needed for users who wish to contribute to the project. 7 | 8 | Before starting the installation, please make sure the following environments are correct: 9 | 10 | - macOS, Linux, Windows 11 | - Node.js >= 12.17 or >= 14.0.0 12 | 13 | ## Install via NPM 14 | 15 | Installing [Pipcook][] via NPM is easy, just run: 16 | 17 | ```sh 18 | $ npm install -g @pipcook/cli 19 | ``` 20 | 21 | Then check if installed via `pipcook --help`. 22 | 23 | ## Install via Docker 24 | 25 | You could also install pipcook with Docker. Just run command: 26 | 27 | ```sh 28 | $ docker pull pipcook/pipcook:latest 29 | ``` 30 | 31 | After pulling successfully, run command below to start: 32 | 33 | ```sh 34 | $ docker run -it pipcook/pipcook:latest /bin/bash 35 | ``` 36 | 37 | ## Troubleshooting 38 | 39 | If you have any installation problems, please feedback to [issue tracker](https://github.com/alibaba/pipcook/issues/new). 40 | 41 | [Install via NPM]: #install-via-npm 42 | [Install via Docker]: #install-via-docker 43 | [Build from source]: contributing/guide-to-contributor#download-source 44 | [Pipcook]: https://github.com/alibaba/pipcook 45 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Pipcook 2 | 3 | The Pipcook Project is an open-source toolkit to enable and accelerate the intelligentization of front-end engineering for Web developers. 4 | 5 | # Usage 6 | Using Pipcook for machine learning development is very simple. It only takes four steps: install, train, test, and deploy. 7 | 8 | Install the [Pipcook][] command-line tool: 9 | 10 | ```shell 11 | $ npm install -g @pipcook/cli 12 | ``` 13 | 14 | Then train from anyone of those [pipelines](./example/pipelines/), we take image classification as an example: 15 | 16 | ```shell 17 | $ pipcook train https://cdn.jsdelivr.net/gh/alibaba/pipcook@main/example/pipelines/image-classification-mobilenet.json -o ./output 18 | ``` 19 | This dataset specfied by the pipeline includes 2 categories image: `avatar` and `blurBackground`. 20 | After training, we can predict the category of a image: 21 | 22 | ```shell 23 | $ pipcook predict ./output/image-classification-mobilenet.json -s ./output/data/validation/blurBackground/71197_223__30.7_36.jpg 24 | ✔ Origin result:[{"id":1,"category":"blurBackground","score":0.9998120665550232}] 25 | ``` 26 | 27 | The input is a `blurBackground` image from the validation dataset. And the model determines that its category is `blurBackground`. 28 | 29 | Want to deploy it? 30 | ```shell 31 | $ pipcook serve ./output 32 | ℹ preparing framework 33 | ℹ preparing scripts 34 | ℹ preparing artifact plugins 35 | ℹ initializing framework packages 36 | Pipcook has served at: http://localhost:9091 37 | ``` 38 | 39 | Then you can open the browser and try your image classification server. 40 | 41 | ## Why Pipcook 42 | 43 | With the mission of enabling Web engineers to utilize the power of machine learning without any prerequisites and the vision to lead front-end technical field to the intelligence. [Pipcook][] is to become the toolkit for the cross-cutting area of machine learning and front-end interaction. 44 | 45 | We are truly to design Pipcook's machine learning APIs for front-end applications, and focusing on the front-end area and developed from the Web engineers' view. With the principle of being friendly to the Web, we will push the whole area forward with the machine learning engineering. 46 | 47 | ## What's Pipcook 48 | 49 | The project provides subprojects include machine learning pipeline framework, management tools, a JavaScript runtime for machine learning, and these can be also used as building blocks in conjunction with other projects. 50 | 51 | ### Principles 52 | 53 | [Pipcook][] is an open-source project guided by strong principles, aiming to be modular and flexible on user experience. It is open to the community to help set its direction. 54 | 55 | - **Modular** the project includes some of projects that have well-defined functions and APIs that work together. 56 | - **Swappable** the project includes enough modules to build what Pipcook has done, but its modular architecture ensures that most of the modules can be swapped by different implementations. 57 | 58 | ### Audience 59 | 60 | [Pipcook][] is intended for Web engineers looking to: 61 | 62 | - learn what's machine learning. 63 | - train their models and serve them. 64 | - optimize own models for better model evaluation results, like higher accuracy for image classification. 65 | 66 | > If you are in the above conditions, just try it via [installation guide](INSTALL.md). 67 | 68 | ### Subprojects 69 | 70 | __Pipcook Pipeline__ 71 | 72 | It's used to represent ML pipelines consisting of Pipcook scripts. This layer ensures the stability and scalability of the whole system and uses a [Script](manual/intro-to-script.md) mechanism to support rich functions including datasource, dataflow, training, validations. 73 | 74 | A Pipcook Pipeline is generally composed of lots of scripts. Through different scripts and configurations, the final output to us is a directory, which contains the trained model. 75 | 76 | __Pipcook Bridge to Python__ 77 | 78 | For JavaScript engineers, the most difficult part is the lack of a mature machine learning toolset in the ecosystem. In Pipcook, a module called **Boa**, which provides access to Python packages by bridging the interface of [CPython][] using N-API. 79 | 80 | With it, developers can use packages such as `numpy`, `scikit-learn`, `jieba`, `tensorflow`, or any other Python ecology in the Node.js runtime through JavaScript. 81 | 82 | ## The Next 83 | 84 | Can't wait to start a [Pipcook][] project? You can follow the guidance below to proceed to the next step: 85 | 86 | - [Learn how to install Pipcook?](INSTALL.md) 87 | - [Learn machine learning](tutorials/machine-learning-overview.md) 88 | - [Learn Pipcook from Pipeline](manual/intro-to-pipeline.md) 89 | - [Learn Pipcook from Boa](manual/intro-to-boa.md) 90 | - [Learn Pipcook Tools](manual/pipcook-tools.md) 91 | 92 | [Pipcook]: https://github.com/alibaba/pipcook 93 | [CPython]: https://github.com/python/cpython 94 | -------------------------------------------------------------------------------- /docs/_navbar.md: -------------------------------------------------------------------------------- 1 | - API Reference 2 | - [Runtime](typedoc/runtime/index.html) 3 | - [Script](typedoc/script/index.html) 4 | - Translations 5 | - [English](/) 6 | - [中文](/zh-cn/) 7 | -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | - [What's Pipcook](/README.md) 2 | - [Install](/INSTALL.md) 3 | - User Manual 4 | - [Introduction to Pipeline](/manual/intro-to-pipeline.md) 5 | - [Introduction to Script](/manual/intro-to-script.md) 6 | - [Introduction to Boa](/manual/intro-to-boa.md) 7 | - [Pipcook Tools](/manual/pipcook-tools.md) 8 | - Tutorials 9 | - [Machine Learning Overview](/tutorials/machine-learning-overview.md) 10 | 11 | 12 | - [Using Python functions in Node.js](/tutorials/using-python-functions-in-nodejs.md) 13 | - [Classify images of UI components](/tutorials/component-image-classification.md) 14 | - Contributing 15 | - [Contributor guide](/contributing/guide-to-contributor.md) 16 | - [Collaborator guide](/contributing/guide-to-collaborator.md) 17 | - [Contribute a script](/contributing/contribute-a-script.md) 18 | - Specification 19 | - [Script Specification](/spec/script.md) 20 | - [Dataset Specification](/spec/dataset.md) 21 | - [FAQ](/faq/index.md) 22 | - [Pipcook Framework](/faq/pipcook-framework.md) 23 | - [Scripts](/faq/scripts.md) 24 | - [Glossory](/GLOSSORY.md) 25 | -------------------------------------------------------------------------------- /docs/contributing/guide-to-contributor.md: -------------------------------------------------------------------------------- 1 | # Contributor Guide 2 | 3 | Pipcook is a community-driven open source project. We do our best to hope that every bug fixed, new feature, and how this project evolves is visible and transparent to everyone in this Community. 4 | 5 | Therefore, we believe that from source code to our documentation are more friendly to contributors, so in order to make it easier for contributors to participate in Pipcook, some paths have been developed. If you want to get involved, you can follow it. 6 | 7 | - If you are going to browse source code only, goto [GitHub](https://github.com/alibaba/pipcook). 8 | - If you are a rookie and no experience in contributing to any open source project, then we have organized [good first issue][] for you, all of which are relatively simple tasks, easy to start with. 9 | - If you want to learn machine learning through contributing this project, you can try our [good first model][] to help us do some model implementation and migration tasks (rest assured, you only need to complete the call to the Python ecosystem through [Boa][]). 10 | - Otherwise, discussions on any of our issues are open to everyone, and you are welcome to contribute your ideas. 11 | 12 | ## Submit a patch 13 | 14 | Next, let ’s take a look at how to submit patches to Pipcook. 15 | 16 | ### Requirements 17 | 18 | - macOS / Linux / Windows 19 | - Node.js >= 12.17 || >= 14.0.0 20 | 21 | ### Download source 22 | 23 | Clone the repository from GitHub: 24 | 25 | ```bash 26 | $ git clone git@github.com:alibaba/pipcook.git 27 | ``` 28 | 29 | ### Build from source 30 | 31 | And install the requirements and build: 32 | 33 | ```bash 34 | $ npm install 35 | $ npm run build 36 | ``` 37 | 38 | We provide a way to use [tuna mirror](https://mirrors.tuna.tsinghua.edu.cn/) for downloading Python and packages: 39 | 40 | ```sh 41 | $ BOA_TUNA=1 npm install 42 | ``` 43 | 44 | Or you could specify your custom miniconda mirror and Python index page: 45 | 46 | ```sh 47 | $ export BOA_CONDA_MIRROR=https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda # this is for miniconda 48 | $ export BOA_CONDA_INDEX=https://pypi.tuna.tsinghua.edu.cn/simple # this is for pip 49 | $ npm install 50 | ``` 51 | 52 | ### Test 53 | 54 | Run all the tests in the following 55 | 56 | ```bash 57 | $ npm test 58 | ``` 59 | 60 | And run tests for single specific package: 61 | 62 | ```bash 63 | $ ./node_modules/.bin/lerna run --scope 64 | ``` 65 | 66 | ### Pipeline 67 | 68 | ```bash 69 | $ sh tools/run_pipeline.sh 70 | ``` 71 | 72 | The `pipeline_name` is the name of the pipeline file under "test/pipelines", such as: 73 | 74 | - "text-bayes-classification" 75 | - "mnist-image-classification" 76 | - "databinding-image-classification" 77 | 78 | ### Database 79 | 80 | If your modification involves `Database` structure, you need to write a migration script under `packages/daemon/src/migrations`. 81 | For more detail about it, please refer to [Migration](https://sequelize.org/master/manual/migrations.html). 82 | 83 | ### Push and create a pull request 84 | 85 | After the local test is passed, you can push the code and create a pull request: 86 | 87 | ```sh 88 | $ git push git@github.com:/pipcook.git 89 | ``` 90 | 91 | ## Internal documentations 92 | 93 | ### Plugin Specification 94 | 95 | You can refer [here](../spec/plugin.md) for Plugin Specification. 96 | 97 | We have defined a set of interfaces for each plugin. Each type of plugin must be implemented strictly according to the interfaces. The detailed information is as follows: 98 | 99 | - [Data Collect](../spec/plugin/0-data-collect.md) 100 | - [Data Access](../spec/plugin/1-data-access.md) 101 | - [Data Process](../spec/plugin/2-data-process.md) 102 | - [Model Load](../spec/plugin/3-model-define.md) 103 | - [Model Train](../spec/plugin/4-model-train.md) 104 | - [Model Evaluate](../spec/plugin/5-model-evaluate.md) 105 | 106 | ### Dataset Specification 107 | 108 | For data reading and processing involved in the development, please refer to our [Dataset Specification](../spec/dataset.md). 109 | 110 | [good first issue]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22 111 | [good first model]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+model%22 112 | [Boa]: https://github.com/alibaba/pipcook/tree/master/packages/boa 113 | -------------------------------------------------------------------------------- /docs/faq/index.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions 2 | 3 | ## Questions about Pipcook framework 4 | 5 | - [Where should I put the Pipcook JSON config file?](./pipcook-framework.md#q1) 6 | 7 | - [Why is it so slow to install Pipcook](./pipcook-framework.md#q2) 8 | 9 | - [Can I use Pipcook in Electron?](./pipcook-framework.md#q3) 10 | 11 | - [Does Pipcook support Windows platform?](./pipcook-framework.md#q4) 12 | 13 | ## Questions about Pipcook plugins(v1.x) 14 | 15 | - [How can I specify which GPU card to use for yolov5 training?](./plugins.md#q1) 16 | 17 | - [How can I tell plugins not to use GPU even you have right GPU and cuda environment for yolov5 training?](./plugins.md#q2) 18 | -------------------------------------------------------------------------------- /docs/faq/pipcook-framework.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions 2 | 3 | ## Where should I put the Pipcook JSON config file? 4 | 5 | Pipcook runs daemon behind the scene and provide the service to the user via command line tool or Pipboard. There is no restriction on the current working directory or where you should put your config file. You can run `pipcook run ` at any location and url is the path of your config file, which can be both local path or remote url. 6 | 7 | ## Why is it so slow to install Pipcook 8 | 9 | Currently the installation of Pipcook and plugins rely on npm registry and pip(python) registry. Probably these default registries have slow connection to you. You can specify `pipcook init -c `, for example, `pipcook init -c cnpm` to change your npm client. Meanwhile, you can use `pipcook init --tuna` to use tuna pip registry. 10 | 11 | ## Can I use Pipcook in Electron? 12 | 13 | Thereotically as long as the environment supports Node.js >= 12.17 or >= 14.0.0 and corresponding N-API, you can run Pipcook smoothly. Meanwhile, Pipcook will support to produce WASM model so that you can easily integrate the model to your system. 14 | 15 | ## Does Pipcook support Windows platform? 16 | 17 | Not yet. We will support Windows soon. 18 | -------------------------------------------------------------------------------- /docs/faq/plugins.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions(v1.x) 2 | 3 | ### How can I specify which GPU card to use for yolov5 training? 4 | 5 | You can just set the environment variable `$CUDA_VISIBLE_DEVICES` to your GPU card number to achieve this. If this environment variable is not set, plugins will just use GPU:0 for default. Notice that the environment variable should be set at daemon (server) side before running daemon. 6 | 7 | ### How can I tell plugins not to use GPU even you have right GPU and cuda environment for yolov5 training? 8 | 9 | By default, plugins will use GPU if the environment is good for GPU training. If you want to disable this feature, just set `export CUDA_VISIBLE_DEVICES=""` Notice that the environment variable should be set at daemon (server) side before running daemon. 10 | -------------------------------------------------------------------------------- /docs/images/community_qrcode.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/images/community_qrcode.jpg -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/images/logo.png -------------------------------------------------------------------------------- /docs/images/plugin-script-map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/images/plugin-script-map.png -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Pipcook, JavaScript application framework for machine learning and its engineering 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /docs/manual/README.md: -------------------------------------------------------------------------------- 1 | # Pipcook Manual 2 | -------------------------------------------------------------------------------- /docs/manual/intro-to-framework.md: -------------------------------------------------------------------------------- 1 | # Pipcook Framework 2 | 3 | Pipcook uses scripts that don't bundle some of the heavier dependencies like `@tensorflow/tfjs`, so how do we use them in our scripts? 4 | In fact, Pipcook packages these dependencies in a so-called `framework`, which is actually a set of packages related to the platform and node.js version, such as the following pipeline: 5 | 6 | ```json 7 | { 8 | "specVersion": "2.0", 9 | "type": "ObjectDetection", 10 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/object-detection-yolo-min.zip", 11 | "dataflow": [ 12 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416" 13 | ], 14 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js", 15 | "artifact": [{ 16 | "processor": "pipcook-artifact-zip@0.0.2", 17 | "target": "./object-detection-yolo-model.zip" 18 | }], 19 | "options": { 20 | "framework": "tfjs@3.8", 21 | "train": { 22 | "epochs": 10, 23 | "gpu": true 24 | } 25 | } 26 | } 27 | ``` 28 | 29 | This pipeline uses the framework `tfjs@3.8`, which contains `@tensorflow/tfjs-node@3.8`, `@tensorflow/tfjs-node-gpu@3.8`. Note that the script does not need to reference these two packages, but rather `@tensorflow/tfjs`. When the framework loads, it determines whether `@tensorflow/tfjs-node-gpu@3.8` needs to be set to backend based on the `train.gpu` option. Since macOS does not support CUDA, the framework also smoothes out platform differences: `train.gpu` will not take effect on macOS systems. This capability is implemented by initialization scripts in the framework, which involves the framework's structure: each framework contains a framework description file, a framework initialization script, and several dependency folders. As an example, the `tfjs@3.8` package has the following directory structure: 30 | 31 | 32 | ```sh 33 | ├── framework.json 34 | ├── index.js 35 | └── node_modules 36 | ``` 37 | 38 | Where `framework.json` is the framework's description file, `index.js` is the framework's initialization script, and `node_modules` contains the dependency folder that the framework will provide. 39 | Content of `framework.json` is as follows: 40 | 41 | ```json 42 | { 43 | "name": "tfjs", 44 | "version": "3.8", 45 | "packages": [ 46 | { 47 | "name": "@tensorflow/tfjs-node", 48 | "version": "3.8.0", 49 | "type": "js" 50 | }, 51 | { 52 | "name": "@tensorflow/tfjs-node-gpu", 53 | "version": "3.8.0", 54 | "type": "js" 55 | } 56 | ] 57 | } 58 | ``` 59 | 60 | The initialization script exports an initialization function that will be called each time the pipeline runs to the framework initialization phase, passing in the `options` field of the pipeline file, as in the following example: 61 | 62 | ```js 63 | const os = require('os'); 64 | 65 | module.exports = { 66 | initialize(opts) { 67 | if ( 68 | opts && opts.train 69 | && ( 70 | opts.train.gpu === 'true' 71 | || opts.train.gpu === true 72 | ) 73 | ) { 74 | if (os.platform() !== 'darwin') { 75 | require('@tensorflow/tfjs-node-gpu'); 76 | console.log('gpu enabled'); 77 | } else { 78 | require('@tensorflow/tfjs-node'); 79 | console.warn('platform darwin does not support gpu'); 80 | } 81 | } else { 82 | require('@tensorflow/tfjs-node'); 83 | console.log('gpu disabled'); 84 | } 85 | } 86 | } 87 | ``` 88 | 89 | In addition, the backend of `tfjs` has different binary libraries for different platforms, so Pipcook will choose to download different packages depending on the environment, for example, on macOS, node.js v12.22, the actual framework file downloaded is `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node12-py37/tfjs%403.8-darwin-x64-v8.zip`, while on linux, node.js v14.0, it will download `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node14-py37/tfjs%403.8-linux-x64-v6.zip`. Of course, if the script dependes on a custom framework, you can also use it directly by filling in the `framework` option with the full url, or by creating your own framework mirror and specifying the framework mirror address with the `-m` argument of the `trian`, `predict`, `serve` commands. 90 | A complete framework mirror directory structure is as follows: 91 | 92 | 93 | Translated with www.DeepL.com/Translator (free version) 94 | ```sh 95 | ├── node14-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip 96 | └── node12-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip 97 | ``` 98 | 99 | The `py37` in the path is the version of python supported by the referenced `BOA`, which currently only supports v3.7. 100 | -------------------------------------------------------------------------------- /docs/manual/intro-to-pipeline.md: -------------------------------------------------------------------------------- 1 | # Introduction to Pipeline 2 | 3 | In Pipcook, we use Pipeline to represent the training process of a model, so in general, what kind of pipeline is needed to train a model? The developer can use a JSON to describe pipeline of modeling from sample collection, model definition, training to model evaluation: 4 | 5 | ```js 6 | { 7 | "specVersion": "2.0", 8 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/datasource.js?url=http://ai-sample.oss-cn-hangzhou.aliyuncs.com/image_classification/datasets/imageclass-test.zip", 9 | "dataflow": [ 10 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/dataflow.js?size=224&size=224" 11 | ], 12 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/model.js", 13 | "artifact": [{ 14 | "processor": "pipcook-artifact-zip@0.0.2", 15 | "target": "/tmp/mobilenet-model.zip" 16 | }], 17 | "options": { 18 | "framework": "tfjs@3.8", 19 | "train": { 20 | "epochs": 20, 21 | "validationRequired": true 22 | } 23 | } 24 | } 25 | ``` 26 | 27 | As shown above, a Pipeline consists of three types of Script, `dataSource`, `dataflow` and `model`, as well as the build plugin `artifacts`, and the Pipeline options `options`. 28 | Each Pipcook script passes parameters via a URI query, and the parameters of the model script can also be defined via `options.train`. 29 | `artifacts` defines a set of build scripts, each of which will be called in turn after the training is completed, allowing the output model to be transformed, packaged, deployed, etc. 30 | `options` contains the framework definition and the definition of training parameters. 31 | Then, Pipcook prepares the environment, runs the Script, and finally outputs and processes the model based on the URIs and parameters defined in this JSON file. 32 | 33 | > See [Introduction to Script](./intro-to-script.md) for more details about Pipcook script. 34 | 35 | > The script of a pipeline supports `http`, `https` and `file` protocol. 36 | 37 | Next, when we have defined such a pipeline, we can run it through Pipcook. 38 | 39 | ## Preparation 40 | 41 | Follow the [Pipcook Tools Installation](./pipcook-tools.md) to get the Pipcook ready. 42 | 43 | ## Run Pipeline 44 | 45 | Save the above JSON of your pipeline in the disk, and run: 46 | 47 | ```sh 48 | $ pipcook run /path/to/your/pipeline-config.json 49 | ``` 50 | 51 | Or serve it on static source server: 52 | 53 | ```sh 54 | $ pipcook run https://host/path/to/your/pipeline-config.json 55 | ``` 56 | 57 | After execution, the trained model files are generated in a folder named with the current timestamp under the current [working directory](https://linux.die.net/man/3/cwd), and the model files are compressed by the build plugin `pipcook-artifact-zip` into a zip file and saved in the tmp directory. 58 | 59 | ``` 60 | ├── pipeline-config.json 61 | ├── cache 62 | ├── data 63 | ├── framework 64 | ├── model 65 | └── scripts 66 | ``` 67 | 68 | The directory named model holds the model files, and the ability to use the model will be added in the next release. 69 | -------------------------------------------------------------------------------- /docs/manual/intro-to-script.md: -------------------------------------------------------------------------------- 1 | # Pipcook Scripts 2 | 3 | In Pipcook, each Pipeline represents a specific machine learning task, so how do we define a workflow? Pipcook uses scripts to define and configure the different phases in a Pipeline. A Pipcook script is a js script file that exports a specific method and contains 3 different types: `datasource`, `dataflow` and `model`, as defined [here] (... /spec/script.md). A text classification task, for example, could be composed with the following script. 4 | 5 | - `datasource` The datasource script is used to download the sample data and provide the data access interface. 6 | - `dataflow` converts the format of the downloaded dataset to a format acceptable to the model that follows (not needed in this example). 7 | - `model` Define the model for text classification, [plain Bayesian classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier), obtain samples for model training and evaluate accuracy through the sample data interface. 8 | 9 | > The source code of the above Pipeline is defined in [here](https://github.com/alibaba/pipcook/blob/main/example/pipelines/text-classification-bayes.json). 10 | 11 | With the above example, for a text classifier task, we follow a machine learning workflow which executes in order of different types of subtasks, and each subtask corresponds to a user-defined plug-in, while the user can also quickly tune the Pipeline for the whole task at a lower cost. 12 | 13 | > The available official scripts are [here](https://github.com/imgcook/pipcook-script). 14 | -------------------------------------------------------------------------------- /docs/manual/pipcook-models.md: -------------------------------------------------------------------------------- 1 | # Introduction to Pipcook models 2 | 3 | Pipcook now supports two types (Node.js & wasm) of model. In this manual, we will dive into these two types of models and show users how to use them. 4 | 5 | ## nodejs 6 | 7 | ### Background 8 | 9 | Node.js models are powered by boa, a python-js bridge that allows users to directly run python module with JavaScript syntax. 10 | 11 | The common folder structure for such model is like: 12 | ``` 13 | ├── boapkg.js 14 | ├── index.js 15 | ├── metadata.json 16 | ├── model 17 | └── package.json 18 | ``` 19 | 20 | The black magic here is to use boa to connect JavaScript and python. This will allow users to use the flourish python eco-system and powerful pc serving as backend in Nodejs. 21 | 22 | But the trade-off is a heavy runtime and long installation time. 23 | 24 | ### How to use 25 | 26 | To use the Node.js model, the following steps are needed: 27 | 28 | ```bash 29 | $ cd output/nodejs 30 | $ npm install # To install deps 31 | ``` 32 | 33 | Then just treat the `output/nodejs` as an npm package with `predict` function. You can include it in any Nodejs runtime. And use the following code to call the model: 34 | ```js 35 | const model = require('./nodejs/index'); 36 | const result = await model.predict([1, 2, 3, 4]); 37 | ``` 38 | 39 | ## WASM 40 | 41 | ### Background 42 | 43 | To give a more portable and user-friendly model solution, Pipcook uses [TVM](https://tvm.apache.org/docs) to compile a given model to wasm format. In this way, the model can run in both browser and Nodejs natively. However, since the standard for webGPU is not stable yet, Pipcook does not target the compiled model to GPU yet. In another word, **WASM format only works for CPU right now**. 44 | 45 | THe generated folder structure looks like: 46 | 47 | ``` 48 | ├── browser.js 49 | ├── node.js 50 | ├── model.wasi.js 51 | ├── model.wasi.wasm 52 | ├── modelDesc.json 53 | ├── modelParams.parmas 54 | ├── modelSpec.json 55 | └── tvmjs.bundle.js 56 | ``` 57 | 58 | ### How to use 59 | 60 | The entry files are `browser.js` and `node.js`, as the name suggests, they are prepared for the browser environment and Nodejs environment. 61 | To run the model, users just need to include the corresponding entry file and call the `predict` function. 62 | 63 | Node.js: 64 | ```js 65 | const model = require('./node'); 66 | const data = [0, 1, 2, 3]; // Mock data, the real data layout depends on model's define 67 | const res = model.predict(data); // return type is Float32Array 68 | ``` 69 | 70 | Browser: 71 | ```js 72 | const model = require('./browser.js'); 73 | const data = [0, 1, 2, 3]; // Mock data, the real data layout depends on model's define 74 | const res = model.predict(data); // return type is Float32Array 75 | ``` 76 | -------------------------------------------------------------------------------- /docs/manual/pipcook-tools.md: -------------------------------------------------------------------------------- 1 | # Pipcook Tools 2 | 3 | Pipcook Tools is a command-line tool provided by Pipcook for developers. It can help you manage your pipelines. 4 | 5 | ## Installation 6 | 7 | ```sh 8 | $ npm install @pipcook/cli -g 9 | ``` 10 | 11 | Follow [Install](../INSTALL.md) for other installation guide. 12 | 13 | 14 | ## User's Guide 15 | 16 | To run a Pipeline from a URI, simply execute the following command. 17 | 18 | ```sh 19 | $ pipcook run protocal://location/to/your/pipeline-config.json 20 | ``` 21 | 22 | The supported pipeline file protocols are: `http:`, `https:`, `file:`. `file:` is the default protocol if not defined. 23 | 24 | More run options can be obtained with the following command: 25 | 26 | ```sh 27 | $ pipcook run --help 28 | ``` 29 | 30 | > For more information on writing a pipeline, please see [here](./intro-to-pipeline.md). 31 | 32 | ## Cache Manage 33 | 34 | When Pipeline run with the `pipcook run` command, if the script or framework is a non-file protocol, it will be 35 | saved to a cache directory to speed up the next run. 36 | 37 | If you want to remove these cache files manually, you can execute the following: 38 | ```sh 39 | $ pipcook clean 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/rfcs/0001-framework-migration.md: -------------------------------------------------------------------------------- 1 | - Start Date: 2020-11-25 2 | - Target Major Version: 2.0 3 | - Reference Issues: (leave this empty) 4 | - Implementation PR: (leave this empty) 5 | - Author: @FeelyChau @SirM2z 6 | 7 | # Summary 8 | 9 | We have accumulated a lot of debt in the iteration process in 1.x, and hope to solve these problems in 2.0. 10 | 11 | # Test 12 | 13 | The running cost of unit test and integration test is very different, but 1.x does not classify the tests well, which leads to the mixing of unit test and integration test, which will reduce the efficiency of continuous integration and cause trouble to test coding. In 2.0, we will make a clear division between unit testing and integration testing. 14 | 15 | ## Unit test 16 | 17 | Unit test is the test of functions. Before writing the unit test code for an function, we should make clear the input and output boundary of the function to be tested and the exception handling method, and then we should cover the boundary pertinently and run it in a low-cost way so as to verify the change at any time. 18 | 19 | In this way, we can ensure that the function works normally according to the design expectation. We have written many unit tests in 1.x, but there are still some problems as follows: 20 | 21 | * **insufficient coverage**: `cli` is not covered, the coverage rate in other projects is 88% 22 | * **case design is not enough**: there is no complete test design each unit 23 | * **too many test frameworks**: `boa` uses `Tape`, `daemon` uses `Mocha`, and other projects use `Jasmine` 24 | 25 | It is necessary to solve the above problems. The specific objectives and measures: 26 | 27 | * the coverage rate of single test should be increased to more than **95%** 28 | * the boundary of function input and output should be defined, tested and covered, the coverage of `cli` shoud be enabled 29 | * test framework should be unified as `Ava`. Unit test cases should be as free of IO and side effects as possible, the parallel running mechanism of AVA will force us to write more efficient test codes 30 | 31 | Switching the test framework to `Ava` will bring some refactoring work: 32 | 33 | ```js 34 | // ava 35 | import test from 'ava'; 36 | import * as fs from 'fs-extra'; 37 | import * as sinon from 'sinon'; 38 | import { shuffle } from './public'; 39 | 40 | test('array shuffle', (t) => { 41 | const array = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]; 42 | shuffle(array); 43 | t.notDeepEqual(array, [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]); 44 | t.deepEqual(array.sort(), [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]); 45 | }); 46 | 47 | // serial hook 48 | test.serial.afterEach(() => { 49 | sinon.restore(); 50 | }); 51 | 52 | // serial case 53 | test.serial('test a', (t) => { 54 | const fsReadJsonMock = sinon.stub(fs, 'readJson').resolves({}); 55 | t.deepEqual(await fs.readJson('mockFileName.json'), {}); 56 | }); 57 | 58 | test.serial('test b', (t) => { 59 | const fsReadJsonMock = sinon.stub(fs, 'readJson').resolves(undefined); 60 | t.is(await fs.readJson('mockFileName.json'), undefined); 61 | }); 62 | 63 | test.todo('some todo cases'); 64 | ``` 65 | 66 | ```js 67 | // jasmine 68 | import { shuffle } from './public'; 69 | 70 | describe('public utils', () => { 71 | it('test if the array is shuffled', () => { 72 | const array = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]; 73 | shuffle(array); 74 | expect(array).not.toEqual([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]); 75 | expect(array.sort()).toEqual([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]); 76 | }); 77 | } 78 | ``` 79 | 80 | ## Integration test 81 | 82 | The integration test of 1.x depends on the workflow configuration file. In a strict sense, this is not a rigorous integration testing method and hard to maintain. In addition, we do not assert any of the running processes except exit code. This situation will be improved in 2.0 as follows: 83 | 84 | * define integration test cases 85 | * use or develop an integration test framework suitable for Pipbook 86 | * extend the run environment 87 | 88 | # Framework migration for Daemon 89 | 90 | The framework of daemon will be moved from the original framework to `loopback 4` for the following reasons: 91 | 92 | * automatic code generation capability 93 | * framework maturity: the version of typescript cannot be configured in the original framework, resulting in the overall size of the pipbook package unable to be trimmed, and the test framework is constrained and cannot be switched 94 | * i18n 95 | 96 | # Framework migration for Pipboard 97 | 98 | The pipboard UI library will be migrated from the icejs to the [ant.design](https://ant.design/) for the following reasons: 99 | - The icejs has no English documentation, so it is not easy to foreign contributors 100 | - Compared with the familiarity of community contributors, the [ant.design](https://ant.design/) library is more widely used 101 | - Reduce the complexity of the framework to make it easy to maintain. The icejs contains some complex functions that are unnecessary for pipboard, such as MPA (Multi-page application), SSR (Server-side rendering), Permission routing, etc 102 | 103 | And the packaging tools will be migrated from `webpack` to `parcel`. Mainly compared the configuration complexity, parcel is lower than webpack, easy to use. 104 | -------------------------------------------------------------------------------- /docs/spec/dataset.md: -------------------------------------------------------------------------------- 1 | # Dataset Specification 2 | 3 | Dataset is an important part of machine learning. Subsequent models are built based on datasets. We need to manage datasets. The following is the standard format of the dataset that Pipcook should save after the data is collected through the `datasource` script. 4 | 5 | For different dataset formats, `datasource` script is used to smooth the differences. 6 | 7 | #### Image 8 | 9 | PascalVOC Dataset format, the detailed directory is as follows: 10 | 11 | ``` 12 | 📂dataset 13 | ┣ 📂annotations 14 | ┃ ┣ 📂train 15 | ┃ ┃ ┣ 📜... 16 | ┃ ┃ ┗ 📜${image_name}.xml 17 | ┃ ┣ 📂test 18 | ┃ ┗ 📂validation 19 | ┗ 📂images 20 | ┣ 📜... 21 | ┗ 📜${image_name}.jpg 22 | ``` 23 | 24 | Or representing in XML: 25 | 26 | ```xml 27 | 28 | 29 | folder path 30 | image name 31 | 32 | width 33 | height 34 | 35 | 36 | category name 37 | 38 | left 39 | top 40 | right 41 | bottom 42 | 43 | 44 | 45 | ``` 46 | 47 | #### Text 48 | 49 | The text category should be a CSV file. The first column is the text content, and the second column is the category name. The delimiter is ',' without a header. 50 | 51 | ```csv 52 | name, category 53 | prod1, type1 54 | prod2, type2 55 | prod3, type2 56 | prod4, type1 57 | ``` 58 | -------------------------------------------------------------------------------- /docs/spec/script.md: -------------------------------------------------------------------------------- 1 | # Script Specification 2 | 3 | [Pipcook][] uses scripts to achieve tasks in a specific machine learning lifecycle, which ensures that the framework is simple, stable, and efficient enough. 4 | 5 | At the same time, through a set of script specifications defined by [Pipcook][], we can also allow anyone to develop scripts, which ensures the scalability of [Pipcook][]. Theoretically, through scripts, we can achieve all kinds of the machine learning task. 6 | 7 | 8 | ## Script Category 9 | 10 | We have defined the following script categories for the machine learning lifecycle. 11 | 12 | - datasource: [`DatasourceEntry: (options: Record, context: ScriptContext) => Promise>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#datasourceentry) Download data from data sources and provide data access interfaces. 13 | - dataflow: [`DataflowEntry: (api: DatasetPool, options: Record, context: ScriptContext) => Promise>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#dataflowentry) Get the data from the datasource, process it and let the next dataflow script or model script get the processed data by returning the data access interface. 14 | - model: [`{ train: ModelEntry, predict: PredictEntry }`](https://alibaba.github.io/pipcook/typedoc/script/interfaces/extmodelentry.html) Get sample data from dataflow or datasource scripts, train, validate, and output the model file. And predict from the input. 15 | 16 | ## Developing 17 | 18 | Check [this contributing documentation](../contributing/contribute-a-script.md) for learning how to develop a new script. 19 | 20 | [Pipcook]: https://github.com/alibaba/pipcook 21 | [Pipcook Script]: ../../GLOSSORY.md#pipcook-script 22 | [Pipcook Tools]: ../../manual/pipcook-tools.md 23 | [PyPI]: https://pypi.org 24 | -------------------------------------------------------------------------------- /docs/zh-cn/GLOSSORY.md: -------------------------------------------------------------------------------- 1 | # 术语表 2 | 3 | 术语表用于表达 Pipcook 中的若干的默认、隐式约定,同时为用户和贡献者提供参考。它旨在描述概念,并详细说明其相应的 API 或至文档的其他相关部分。 通过从接口文档或用户手册中的链接跳转过来并查阅术语含义,这样可以最大程度地减少阅读中的不一致。 4 | 5 | ### Pipboard 6 | 7 | Pipcook Board 的缩写,在 Pipboard 上,您可以通过 Web 来管理你的 Pipeline 和插件。 8 | 9 | ### Pipcook 10 | 11 | 指 Pipcook 项目,一般来说指向 GitHub(https://github.com/alibaba/pipcook) 地址。 12 | 13 | ### Pipcook Daemon 14 | 15 | 用于管道 Pipeline 和执行,它通过 HTTP 提供对 [Pipcook Tools][] 和 [Pipboard][] 的远程访问,同时也支持其他客户端通过 HTTP 集成Pipcook Daemon。 16 | 17 | ### Pipcook script 18 | 19 | 在 Pipeline 中,脚本就像乐高积木,用户选择不同的脚本,就可以快速完成不同的 Pipeline 的搭建,并训练出不同的模型。 20 | 21 | ### Pipcook Tools 22 | 23 | Pipcook 命令行工具的简称,通过 `npm install -g @pipcook/cli` 安装。 24 | 25 | ### Pipeline 26 | 27 | 在计算机系统中,Pipeline(也称为数据流)是一组串联连接的数据处理节点,其中一个节点的输出是下一个节点的输入。 Pipeline 的节点通常以并行或按时间分割的方式执行。 28 | 29 | [Pipcook Tools]: #pipcook-tools 30 | [Pipboard]: #pipboard 31 | -------------------------------------------------------------------------------- /docs/zh-cn/INSTALL.md: -------------------------------------------------------------------------------- 1 | # 安装 2 | 3 | 现在有以下两种不同的方式来安装 [Pipcook][]: 4 | 5 | - [通过 NPM 安装][] 对于大多数用户来说,这是最好的方法。它将提供一个稳定的版本,并且预编译的软件包可用于大多数平台。 6 | - [通过源码安装][] 这是最适合需要最新和最强大功能而又不怕运行全新代码的用户,希望为该项目做出贡献的开发者也需要这样做。 7 | 8 | 在开始安装之前,需要保证下面的环境: 9 | 10 | - macOS,Linux,Windows 11 | - Node.js 12.17 以上 / 14.0.0 以上 12 | 13 | ## 通过 NPM 安装 14 | 15 | 安装 [Pipcook][] 只需运行下面的命令即可: 16 | 17 | ```sh 18 | $ npm install -g @pipcook/cli 19 | ``` 20 | 21 | 然后通过 `pipcook --help` 来检查安装是否成功。 22 | 23 | ## 通过 Docker 安装 24 | 25 | 我们提供了阿里源 docker 镜像,您可以运行如下命令安装 26 | 27 | ```sh 28 | $ docker pull registry.cn-beijing.aliyuncs.com/pipcook/pipcook:latest 29 | ``` 30 | 31 | 安装完成之后,可以运行如下命令启动 docker 32 | 33 | ```sh 34 | $ docker run -it registry.cn-beijing.aliyuncs.com/pipcook/pipcook:latest /bin/bash 35 | ``` 36 | 37 | ## 疑难排查 38 | 39 | 如果你有任何安装方面的问题,请反馈到我们的 [issue tracker](https://github.com/alibaba/pipcook/issues/new)。 40 | 41 | [通过 NPM 安装]: #通过-NPM-安装 42 | [通过源码安装]: contributing/guide-to-contributor#download-source 43 | [Pipcook]: https://github.com/alibaba/pipcook 44 | -------------------------------------------------------------------------------- /docs/zh-cn/README.md: -------------------------------------------------------------------------------- 1 | # Pipcook 2 | 3 | [Pipcook][] 项目是一个开源工具集,它能让 Web 开发者更好地使用机器学习,从而开启和加速前端智能化时代! 4 | 5 | ## 用法 6 | 使用 Pipcook 进行机器学习开发非常简单,只需 4 步:安装、训练、测试和部署。 7 | 8 | 安装 Pipcook-cli: 9 | 10 | ```sh 11 | $ npm install -g @pipcook/cli 12 | ``` 13 | 14 | 从[内置 pipeline](https://github.com/alibaba/pipcook/tree/main/example/pipelines) 选择一个进行训练,比如图片分类: 15 | 16 | ```sh 17 | $ pipcook train https://cdn.jsdelivr.net/gh/alibaba/pipcook@main/example/pipelines/image-classification-mobilenet.json -o output 18 | ``` 19 | 20 | Pipeline 中指定的数据集包含了两类图片,分别是 `avatar` 和 `blurBackground`. 21 | 在训练结束之后,我们可以使用训练结果进行预测: 22 | 23 | ```sh 24 | $ pipcook predict ./output/image-classification-mobilenet.json -s ./output/data/test/blurBackground/4572_58__1500.94_453.jpg 25 | Origin result:[{"id":1,"category":"blurBackground","score":1}] 26 | ``` 27 | 28 | 模型的输入是一张验证数据集的图片,分类是 `blurBackground`, 预测的结果显示当前输入的图片分类为 `blurBackground`,可信度为 1。 29 | 30 | 想要部署服务? 31 | 32 | ```sh 33 | $ pipcook serve ./output 34 | ℹ preparing framework 35 | ℹ preparing scripts 36 | ℹ preparing artifact plugins 37 | ℹ initializing framework packages 38 | Pipcook has served at: http://localhost:9091 39 | ``` 40 | 41 | 接下来,打开浏览器并访问 `http://localhost:9091` 就可以访问到你的图片分类服务了。 42 | 43 | ## 为什么要开发 Pipcook 44 | 45 | 它旨在使 Web 工程师能够在零门槛的前提下使用机器学习,并拥有将前端技术领域带到智能领域的视角。[Pipcook][] 的目标就是成为机器学习和前端交互的跨领域工具包。 46 | 47 | 我们将完全基于前端应用程序来设计 Pipcook API,并专注于前端领域,以真实解决 Web 工程师使用机器学习的痛点来开发 Pipcook。以面向 Web 友好为原则,来推动机器学习工程和前端工程师的融合。 48 | 49 | ## 简单的介绍 50 | 51 | [Pipcook][] 项目提供了若干独立的子项目,包括机器学习工作流框架,命令行管理工具,机器学习的 JavaScript 运行时。你也可以在其他项目中使用这些框架来搭建你所需要的系统。 52 | 53 | ### 设计原则 54 | 55 | 在 [Pipcook][] 中,我们遵循一些基本的设计原则,来保证整个软件是模块化和灵活的,这些原则也能帮助社区来对 [Pipcook][] 未来的方向作出指导。 56 | 57 | - **模块化** 项目中包含了一些子项目,它们自身都必须保证是良好定义的。 58 | - **可更换** 项目中包含了足够的模块来构建现在的 Pipcook,不过我们通过模块化的架构和规范,开发者可以按照自己的需要对部分模块切换为其他的实现方式。 59 | 60 | ### 受众 61 | 62 | [Pipcook][] 面向以下的 Web 工程师: 63 | 64 | - 想要学习机器学习 65 | - 想要训练和部署自己的模型 66 | - 想要优化模型的性能,比如针对一个图片分类模型,有一个更高的准确度 67 | 68 | > 如果你满足上面条件之一,那么就尝试从[安装](INSTALL.md)开始吧。 69 | 70 | ### 子项目 71 | 72 | __Pipcook Pipeline__ 73 | 74 | 它用于表达机器学习的工作流,其中包含了 Pipcook Script,在这一层,我们需要保证整个系统的稳定性和拓展性,同时使用[Script](manual/intro-to-script.md)机制来支持丰富的数据源、数据流、训练和验证。 75 | 76 | 一条 Pipcook Pipeline 由多个 script 组成,通过配置不同的插件以及参数,最终会输出一个目录,其中包含了训练好的模型。 77 | 78 | __Pipcook Bridge to Python__ 79 | 80 | 对于 JavaScript 工程师来说,开始机器学习最苦难的一点就是缺乏一套成熟的工具集。在 Pipcook,我们提供了 **Boa**,它使用 N-API 将 [CPython][] 集成在了 Node.js 环境,从而让开发者能够通过 JavaScript 访问到 Python 生态来解决这个痛点。 81 | 82 | 通过它,开发者可以毫无顾虑地在 Node.js 中使用诸如 `numpy`、`scikit-learn`、`jieba` 或 `tensorflow` 这样的 Python 包。 83 | 84 | ## 下一步 85 | 86 | 看到这里,已经按耐不住想要使用 [Pipcook][] 了吗?可以按照下面的介绍开始你下一步的学习之旅: 87 | 88 | - [如何安装](INSTALL.md) 89 | - [什么是机器学习](tutorials/machine-learning-overview.md) 90 | - [如何使用 Pipeline](manual/intro-to-pipeline.md) 91 | - [如何使用 Boa](manual/intro-to-boa.md) 92 | - [如何使用 Pipcook Tools](manual/pipcook-tools.md) 93 | 94 | [Pipcook]: https://github.com/alibaba/pipcook 95 | [CPython]: https://github.com/python/cpython 96 | -------------------------------------------------------------------------------- /docs/zh-cn/_navbar.md: -------------------------------------------------------------------------------- 1 | - API文档 2 | - [Runtime](typedoc/runtime/index.html) 3 | - [Script](typedoc/script/index.html) 4 | - 选择语言 5 | - [English](/) 6 | - [中文](/zh-cn/) 7 | 8 | -------------------------------------------------------------------------------- /docs/zh-cn/_sidebar.md: -------------------------------------------------------------------------------- 1 | - [什么是 Pipcook](/zh-cn/README.md) 2 | - [安装](/zh-cn/INSTALL.md) 3 | - 用户手册 4 | - [Pipeline](/zh-cn/manual/intro-to-pipeline.md) 5 | - [脚本](/zh-cn/manual/intro-to-script.md) 6 | - [Boa 使用指南](/zh-cn/manual/intro-to-boa.md) 7 | - [命令行工具](/zh-cn/manual/pipcook-tools.md) 8 | - 教程 9 | - [开始机器学习](/zh-cn/tutorials/machine-learning-overview.md) 10 | - [在 Node.js 中使用 Python](/zh-cn/tutorials/using-python-functions-in-nodejs.md) 11 | - [分类图片中的前端组件](/zh-cn/tutorials/component-image-classification.md) 12 | - 如何贡献 13 | - [贡献者指南](/zh-cn/contributing/guide-to-contributor.md) 14 | - [维护者指南](/zh-cn/contributing/guide-to-collaborator.md) 15 | - [贡献脚本](/zh-cn/contributing/contribute-a-script.md) 16 | - 规范 17 | - [脚本](/zh-cn/spec/script.md) 18 | - [数据集](/zh-cn/spec/dataset.md) 19 | - [FAQ](/zh-cn/faq/index.md) 20 | - [pipcook 框架](/zh-cn/faq/pipcook-framework.md) 21 | - [脚本](/zh-cn/faq/scripts.md) 22 | - [术语表](/zh-cn/GLOSSORY.md) 23 | -------------------------------------------------------------------------------- /docs/zh-cn/contributing/guide-to-contributor.md: -------------------------------------------------------------------------------- 1 | # Contributor Guide 2 | 3 | Pipcook is a community-driven open source project. We do our best to hope that every bug fixed, new feature, and how this project evolves is visible and transparent to everyone in this Community. 4 | 5 | Therefore, we believe that from source code to our documentation are more friendly to contributors, so in order to make it easier for contributors to participate in Pipcook, some paths have been developed. If you want to get involved, you can follow it. 6 | 7 | - If you are going to browse source code only, goto [GitHub](https://github.com/alibaba/pipcook). 8 | - If you are a rookie and no experience in contributing to any open source project, then we have organized [good first issue][] for you, all of which are relatively simple tasks, easy to start with. 9 | - If you want to learn machine learning through contributing this project, you can try our [good first model][] to help us do some model implementation and migration tasks (rest assured, you only need to complete the call to the Python ecosystem through [Boa][]). 10 | - Otherwise, discussions on any of our issues are open to everyone, and you are welcome to contribute your ideas. 11 | 12 | ## Submit a patch 13 | 14 | Next, let ’s take a look at how to submit patches to Pipcook. 15 | 16 | ### Requirements 17 | 18 | - macOS / Linux / Windows 19 | - Node.js >= 12 20 | 21 | ### Download source 22 | 23 | Clone the repository from GitHub: 24 | 25 | ```bash 26 | $ git clone git@github.com:alibaba/pipcook.git 27 | ``` 28 | 29 | ### Build from source 30 | 31 | And install the requirements and build: 32 | 33 | ```bash 34 | $ npm install 35 | $ npm run build 36 | ``` 37 | 38 | We provide a way to use [tuna mirror](https://mirrors.tuna.tsinghua.edu.cn/) for downloading Python and packages: 39 | 40 | ```sh 41 | $ BOA_TUNA=1 npm install 42 | ``` 43 | 44 | Or you could specify your custom miniconda mirror and Python index page: 45 | 46 | ```sh 47 | $ export BOA_CONDA_MIRROR=https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda # this is for miniconda 48 | $ export BOA_CONDA_INDEX=https://pypi.tuna.tsinghua.edu.cn/simple # this is for pip 49 | $ npm install 50 | ``` 51 | 52 | ### Test 53 | 54 | Run all the tests in the following 55 | 56 | ```bash 57 | $ npm test 58 | ``` 59 | 60 | And run tests for single specific package: 61 | 62 | ```bash 63 | $ ./node_modules/.bin/lerna run --scope 64 | ``` 65 | 66 | ### Pipeline 67 | 68 | ```bash 69 | $ sh tools/run_pipeline.sh 70 | ``` 71 | 72 | The `pipeline_name` is the name of the pipeline file under "test/pipelines", such as: 73 | 74 | - "text-bayes-classification" 75 | - "mnist-image-classification" 76 | - "databinding-image-classification" 77 | 78 | ### Push and create a pull request 79 | 80 | After the local test is passed, you can push the code and create a pull request: 81 | 82 | ```sh 83 | $ git push git@github.com:/pipcook.git 84 | ``` 85 | 86 | ## Internal documentations 87 | 88 | ### Plugin Specification 89 | 90 | You can refer [here](../spec/plugin.md) for Plugin Specification. 91 | 92 | We have defined a set of interfaces for each plugin. Each type of plugin must be implemented strictly according to the interfaces. The detailed information is as follows: 93 | 94 | - [Data Collect](../spec/plugin/0-data-collect.md) 95 | - [Data Access](../spec/plugin/1-data-access.md) 96 | - [Data Process](../spec/plugin/2-data-process.md) 97 | - [Model Load](../spec/plugin/3-model-define.md) 98 | - [Model Train](../spec/plugin/4-model-train.md) 99 | - [Model Evaluate](../spec/plugin/5-model-evaluate.md) 100 | 101 | ### Dataset Specification 102 | 103 | For data reading and processing involved in the development, please refer to our [Dataset Specification](../spec/dataset.md). 104 | 105 | [good first issue]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22 106 | [good first model]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+model%22 107 | [Boa]: https://github.com/alibaba/pipcook/tree/master/packages/boa 108 | -------------------------------------------------------------------------------- /docs/zh-cn/faq/index.md: -------------------------------------------------------------------------------- 1 | # 常见问题 2 | 3 | ## Pipcook 核心功能相关 4 | 5 | - [我应该在哪里放置我的 pipeline 的配置文件?](./pipcook-framework.md#q1) 6 | 7 | - [为什么 Pipcook 安装非常缓慢?](./pipcook-framework.md#q2) 8 | 9 | - [我可以在 Electron 里面使用 Pipcook 吗?](./pipcook-framework.md#q3) 10 | 11 | - [Pipcook 支持 Windows 系统吗?](./pipcook-framework.md#q4) 12 | 13 | ## Pipcook 插件相关(v1.x) 14 | 15 | - [在 yolov5 的链路中我怎样指定用哪张显卡进行训练?](./plugins.md#q1) 16 | 17 | - [在 yolov5 的训练中我怎样指定仅使用 cpu 训练?](./plugins.md#q2) 18 | -------------------------------------------------------------------------------- /docs/zh-cn/faq/pipcook-framework.md: -------------------------------------------------------------------------------- 1 | # 常见问题 2 | 3 | ## 我应该在哪里放置我的 pipeline 的配置文件? 4 | 5 | Pipcook 会在后台启动服务,用户可以通过 cli 工具或者可视化工具访问服务。因此,我们对您的工作目录没有特别的要求,你可以在任意地方放置你的配置文件。只需要使用 `pipcook run ` 并且指定正确的配置文件路径就可以进行训练了, url 参数支持本地路径或者远程 url。 6 | 7 | ## 为什么 Pipcook 安装非常缓慢? 8 | 9 | 目前 Pipcook 的安装依赖于 npm 源和 pip 源。有可能这些默认源的链接非常缓慢。你可以指定 `pipcook init -c ` 去改变你的 npm 源,同时,你也可以使用 `pipcook init --tuna` 指定 pip 清华源。 10 | 11 | ## 我可以在 Electron 里面使用 Pipcook 吗? 12 | 13 | 理论上只要 Node.js >= 12.17 或者 >= 14.0.0 并且相应的 N-API 可用,你就可以使用 Pipcook。同时,Pipcook 也将会支持产出 WASM 模型所以你可以非常容易的集成到你的系统中去。 14 | 15 | ## Pipcook 支持 Windows 系统吗? 16 | 17 | 目前不支持,未来将会支持。 18 | -------------------------------------------------------------------------------- /docs/zh-cn/faq/plugins.md: -------------------------------------------------------------------------------- 1 | # 常见问题(v1.x) 2 | 3 | ### 在 yolov5 的链路中我怎样指定用哪张显卡进行训练? 4 | 5 | 你可以设置 `$CUDA_VISIBLE_DEVICES` 这个环境变量,它的值就是你的显卡编号,默认我们会使用 GPU:0 进行训练。注意环境变量需要设置在 daemon 进程运行的机器上,并且在 daemon 启动之前设置。 6 | 7 | ### 在 yolov5 的训练中我怎样指定仅使用 cpu 训练? 8 | 9 | 如果你的环境支持 GPU 训练,我们默认会使用 GPU。如果你想禁掉此功能,可以设置环境变量 `export CUDA_VISIBLE_DEVICES=""`。 注意环境变量需要设置在 daemon 进程运行的机器上,并且在 daemon 启动之前设置。 10 | -------------------------------------------------------------------------------- /docs/zh-cn/manual/intro-to-framework.md: -------------------------------------------------------------------------------- 1 | # Pipcook 框架 2 | 3 | Pipcook 使用的脚本是没有 bundle 一些比较重的依赖的,比如 `@tensorflow/tfjs`,那么我们如何在脚本中使用他们呢? 4 | 事实上,Pipcook 会把这些依赖打包在所谓的`框架`中,实际上,框架是一组和平台、node.js 版本,相关的包,比如以下 pipeline: 5 | ```json 6 | { 7 | "specVersion": "2.0", 8 | "type": "ObjectDetection", 9 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/object-detection-yolo-min.zip", 10 | "dataflow": [ 11 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416" 12 | ], 13 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js", 14 | "artifact": [{ 15 | "processor": "pipcook-artifact-zip@0.0.2", 16 | "target": "./object-detection-yolo-model.zip" 17 | }], 18 | "options": { 19 | "framework": "tfjs@3.8", 20 | "train": { 21 | "epochs": 10, 22 | "gpu": true 23 | } 24 | } 25 | } 26 | ``` 27 | 28 | 这条 pipeline 使用了框架 `tfjs@3.8`,包含 `@tensorflow/tfjs-node@3.8`、`@tensorflow/tfjs-node-gpu@3.8`,需要注意的是,脚本不需要引用这两个包,而是引用 `@tensorflow/tfjs`,在框架加载时,会根据 `train.gpu` 选项确定是否需要将`@tensorflow/tfjs-node-gpu@3.8` 设置为 backend,由于 macOS 不支持 CUDA,因此框架也会抹平平台差异:`train.gpu` 在 macOS 系统上不会生效。这种能力是由框架中的初始化脚本实现的,这就涉及到框架的组织结构:每个框架中包含了一个框架描述文件,框架初始化脚本和若干依赖文件夹。以 `tfjs@3.8` 包为例,目录结构如下: 29 | 30 | ```sh 31 | ├── framework.json 32 | ├── index.js 33 | └── node_modules 34 | ``` 35 | 36 | 其中 `framework.json` 是框架的描述文件,`index.js` 是框架的初始化脚本,`node_modules` 内包含的则是框架所要提供的依赖文件夹。 37 | `framework.json` 如下: 38 | 39 | ```json 40 | { 41 | "name": "tfjs", 42 | "version": "3.8", 43 | "packages": [ 44 | { 45 | "name": "@tensorflow/tfjs-node", 46 | "version": "3.8.0", 47 | "type": "js" 48 | }, 49 | { 50 | "name": "@tensorflow/tfjs-node-gpu", 51 | "version": "3.8.0", 52 | "type": "js" 53 | } 54 | ] 55 | } 56 | ``` 57 | 58 | 初始化脚本导出一个初始化函数,将在每次 pipeline 运行到框架初始化阶段时被调用,传入 pipeline 的 `option` 字段,示例如下: 59 | 60 | ```js 61 | const os = require('os'); 62 | 63 | module.exports = { 64 | initialize(opts) { 65 | if ( 66 | opts && opts.train 67 | && ( 68 | opts.train.gpu === 'true' 69 | || opts.train.gpu === true 70 | ) 71 | ) { 72 | if (os.platform() !== 'darwin') { 73 | require('@tensorflow/tfjs-node-gpu'); 74 | console.log('gpu enabled'); 75 | } else { 76 | require('@tensorflow/tfjs-node'); 77 | console.warn('platform darwin does not support gpu'); 78 | } 79 | } else { 80 | require('@tensorflow/tfjs-node'); 81 | console.log('gpu disabled'); 82 | } 83 | } 84 | } 85 | ``` 86 | 87 | 另外,`tfjs` 的 backend 在不同平台都有不同的二进制库,所以 Pipcook 在会根据环境的不同选择下载不同的包,比如在 macOS,node.js v12.22 上,实际下载的框架文件为 `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node12-py37/tfjs%403.8-darwin-x64-v8.zip`,而在 linux,node.js v14.0 上,则会下载 `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node14-py37/tfjs%403.8-linux-x64-v6.zip`。当然,如果脚本依赖一些自定义的框架,也可以直接把完整的 url 填入 `framework` 选项来直接使用,或者通过自建一个框架镜像,然后通过 `trian`,`predict`,`serve` 命令的 `-m` 参数指定框架镜像地址。 88 | 一个完整的框架镜像目录结构如下: 89 | 90 | ```sh 91 | ├── node14-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip 92 | └── node12-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip 93 | ``` 94 | 95 | 链接中的 `py37` 是引用的 `BOA` 支持的 python 版本,目前只支持 v3.7。 96 | -------------------------------------------------------------------------------- /docs/zh-cn/manual/intro-to-pipeline.md: -------------------------------------------------------------------------------- 1 | # Pipeline 2 | 3 | 在 Pipcook 中,我们使用 Pipeline 来表示一个模型的训练工作流,那么这个 Pipeline 到底是什么样的呢?在 Pipeline 中,开发者能够使用 JSON 来描述从样本收集、模型定义、模型训练和模型评估这些阶段。 4 | 5 | ```js 6 | { 7 | "specVersion": "2.0", 8 | "type": "ImageClassification", 9 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/datasource.js?url=http://ai-sample.oss-cn-hangzhou.aliyuncs.com/image_classification/datasets/imageclass-test.zip", 10 | "dataflow": [ 11 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/dataflow.js?size=224&size=224" 12 | ], 13 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/model.js", 14 | "artifact": [{ 15 | "processor": "pipcook-artifact-zip@0.0.2", 16 | "target": "/tmp/mobilenet-model.zip" 17 | }], 18 | "options": { 19 | "framework": "tfjs@3.8", 20 | "train": { 21 | "epochs": 20, 22 | "validationRequired": true 23 | } 24 | } 25 | } 26 | ``` 27 | 28 | 如上面 JSON 所示,一个 Pipeline 由 `dataSource`, `dataflow` 和 `model` 这三类 Script 以及 构建插件 `artifacts`, Pipeline 选项 `options` 组成。 29 | 每个 Script 通过 URI query 传递参数,model script 的参数也可以通过 `options.train` 定义。 30 | `artifacts` 定义了一组构建插件,每个构建插件会在训练结束后被依次调用,从而可以对输出的模型进行转换、打包、部署等。 31 | `options` 包含框架定义和训练参数的定义。 32 | 接着,Pipcook 就会根据这个 JSON 文件中定义的 URI 和参数,来准备环境,运行 Script,最后输出和处理模型。 33 | 34 | > Pipeline 中的脚本支持 `http`,`https` 和 `file` 协议。 35 | 36 | > 如果想获取更多 Script 相关的知识,可以阅读[如何编写 Pipcook Script](./intro-to-script.md)。 37 | 38 | 下一步,我们在定义好一个 Pipeline 文件后,就能通过 Pipcook 来运行它了。 39 | 40 | ## 准备工作 41 | 42 | 通过[命令行工具安装指南](./pipcook-tools.md#环境设置)来做运行 Pipeline 前的准备。 43 | 44 | ## 运行 45 | 46 | 将上面的 Pipeline 保存在磁盘上,然后执行: 47 | 48 | ```sh 49 | $ pipcook run /path/to/your/pipeline-config.json 50 | ``` 51 | 52 | 或者 serve 在静态资源服务器上: 53 | 54 | ```sh 55 | $ pipcook run https://host/path/to/your/pipeline-config.json 56 | ``` 57 | 58 | 执行完成后,训练好的模型会生成在当前[工作目录](https://linux.die.net/man/3/cwd)下,以当前时间戳命名的文件夹中,同时模型文件会被构建插件 `pipcook-artifact-zip` 压缩成 zip 文件并保存在 tmp 目录下。 59 | 60 | ``` 61 | ├── pipeline-config.json 62 | ├── cache 63 | ├── data 64 | ├── framework 65 | ├── model 66 | └── scripts 67 | ``` 68 | 69 | model 目录下保存了模型文件,在后续的版本迭代中,会增加模型使用的能力。 70 | -------------------------------------------------------------------------------- /docs/zh-cn/manual/intro-to-script.md: -------------------------------------------------------------------------------- 1 | # Pipcook 脚本 2 | 3 | 在 Pipcook 中,每一个 Pipeline 表示一个特定的机器学习任务,那么我们如何定义一个工作流呢?Pipcook 使用脚本来定义和配置 Pipeline 中不同的阶段。Pipcook 脚本是一个暴露了特定方法的 js 脚本文件,包含 3 种不同的类型,分别为 datasource,dataflow 和 model,具体的定义看[这里](../spec/script.md)。比如一个文本分类的任务,就可以用下面的脚本来组成: 4 | 5 | - `datasource` 通过 datasource 脚本来下载样本数据,提供数据访问接口。 6 | - `dataflow` 将下载的数据集格式转换为后面模型能够接受的格式(在此例中不需要)。 7 | - `model` 定义文本分类的模型,[朴素贝叶斯分类器](https://en.wikipedia.org/wiki/Naive_Bayes_classifier),通过样本数据接口获取样本进行模型训练,并评估准确度。 8 | 9 | > 上述 Pipeline 的源码定义在[这里](https://github.com/alibaba/pipcook/blob/main/example/pipelines/text-classification-bayes.json)。 10 | 11 | 通过上面的例子,对于一个文本分类器的任务,我们遵循机器学习工作流,它按照不同类型的子任务顺序执行,而每个子任务就对应一个用户定义的插件,同时用户也可以以较低成本,快速地调整整个任务的 Pipeline。 12 | 13 | > 可用的官方脚本在[这里](https://github.com/imgcook/pipcook-script)。 14 | -------------------------------------------------------------------------------- /docs/zh-cn/manual/pipcook-tools.md: -------------------------------------------------------------------------------- 1 | # Pipcook Tools 2 | 3 | Pipcook Tools 是 Pipcook 提供给开发者使用的命令行工具,它帮助开发者运行和管理 Pipeline。 4 | 5 | ## 安装 6 | 7 | ```sh 8 | $ npm install @pipcook/cli -g 9 | ``` 10 | 11 | 查看[安装指南](../INSTALL.md)查看完成的安装引导。 12 | 13 | ## 使用指南 14 | 15 | 运行一个 Pipeline,只需要执行如下命令: 16 | 17 | ```sh 18 | $ pipcook run protocal://location/to/your/pipeline-config.json 19 | ``` 20 | 21 | 支持的 pipeline 文件协议包括: `http:`, `https:`, `file:`, 默认为 `file:` 协议. 22 | 更多运行参数可以通过以下命令获取: 23 | 24 | ```sh 25 | $ pipcook run --help 26 | ``` 27 | 28 | > 关于如何编写 pipeline, 可以看[这里](./intro-to-pipeline.md). 29 | 30 | ## 缓存管理 31 | 32 | 通过 `pipcook run` 命令运行 Pipeline 时,如果 Script 或 framework 为非 `file` 协议,则会将其缓存到 `~/.pipcook` 目录下,以便加速下次运行。 33 | 如果想手动删除这些缓存,可以通过以下命令: 34 | ```sh 35 | $ pipcook clean 36 | ``` 37 | -------------------------------------------------------------------------------- /docs/zh-cn/spec/dataset.md: -------------------------------------------------------------------------------- 1 | # Dataset Specification 2 | 3 | Dataset is an important part of machine learning. Subsequent models are built based on datasets. We need to manage datasets. The following is the standard format of the dataset that Pipcook should save after the data is collected through the `datasource` script. 4 | 5 | For different dataset formats, `datasource` script is used to smooth the differences. 6 | 7 | #### Image 8 | 9 | PascalVOC Dataset format, the detailed directory is as follows: 10 | 11 | ``` 12 | 📂dataset 13 | ┣ 📂annotations 14 | ┃ ┣ 📂train 15 | ┃ ┃ ┣ 📜... 16 | ┃ ┃ ┗ 📜${image_name}.xml 17 | ┃ ┣ 📂test 18 | ┃ ┗ 📂validation 19 | ┗ 📂images 20 | ┣ 📜... 21 | ┗ 📜${image_name}.jpg 22 | ``` 23 | 24 | Or representing in XML: 25 | 26 | ```xml 27 | 28 | 29 | folder path 30 | image name 31 | 32 | width 33 | height 34 | 35 | 36 | category name 37 | 38 | left 39 | top 40 | right 41 | bottom 42 | 43 | 44 | 45 | ``` 46 | 47 | #### Text 48 | 49 | The text category should be a CSV file. The first column is the text content, and the second column is the category name. The delimiter is ',' without a header. 50 | 51 | ```csv 52 | name, category 53 | prod1, type1 54 | prod2, type2 55 | prod3, type2 56 | prod4, type1 57 | ``` 58 | -------------------------------------------------------------------------------- /docs/zh-cn/spec/script.md: -------------------------------------------------------------------------------- 1 | # 脚本规范 2 | 3 | [Pipcook][] 使用脚本来完成特定机器学习任务的任务,它使得框架足够简单、稳定和高效。 4 | 5 | 同时,通过定义了不同的脚本规范,使得我们可以允许任何人开发插件来拓展 [Pipcook][],理论上,我们可以通过脚本来完成任何的机器学习任务。 6 | 7 | 8 | ## 分类 9 | 10 | 下面是所有在 Pipcook 中支持的脚本分类。 11 | 12 | - datasource: [`DatasourceEntry: (options: Record, context: ScriptContext) => Promise>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#datasourceentry) 从数据源中下载数据,提供数据访问接口。 13 | - dataflow: [`DataflowEntry: (api: DatasetPool, options: Record, context: ScriptContext) => Promise>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#dataflowentry) 从 datasource 获取数据,处理并通过返回数据访问接口让下一个 dataflow 脚本或 model 脚本获取处理后的数据。 14 | - model: [`{ train: ModelEntry, predict: PredictEntry }`](https://alibaba.github.io/pipcook/typedoc/script/interfaces/extmodelentry.html) 从 dataflow 或 datasource 脚本中获取样本数据,并进行训练,验证,产出模型,或者通过 predict 对输入的数据进行预测。 15 | 16 | ## 开发 17 | 18 | 查看[贡献者文档](../contributing/contribute-a-script.md)来学习如何开发一个新的脚本。 19 | 20 | [Pipcook]: https://github.com/alibaba/pipcook 21 | [Pipcook Script]: ../GLOSSORY.md#pipcook-script 22 | [PyPI]: https://pypi.org 23 | -------------------------------------------------------------------------------- /docs/zh-cn/tutorials/machine-learning-overview.md: -------------------------------------------------------------------------------- 1 | # 开始机器学习 2 | 3 | 从这篇文章,我们将介绍什么是机器学习,以及如果使用 [Pipcook][] 来完成机器学习任务。 4 | 5 | ## 如何定义一个机器学习问题 6 | 7 | 一般来说,一个学习问题就是将 N 个样本集数据输入,然后输出与输入相关联对应的结果,下面的例子将展示,如何教会一个程序学会 Node.js 书籍和售价的关系: 8 | 9 | ```ts 10 | const BookPriceModel: Record = {}; 11 | const learnBookPrice = (book: string, price: number) => BookPriceModel[book] = price; 12 | const predictBookPrice = (book: string) => BookPriceModel[book]; 13 | 14 | // prediction without learning. 15 | predictBookPrice('Node.js in Action'); // undefined, because the program don't know nothing 16 | 17 | // learn "Node.js in Action" and "Dive into Node.js". 18 | learnBookPrice('Node.js in Action', 99.0); 19 | learnBookPrice('Dive into Node.js', 199.0); 20 | 21 | // prediction after learning. 22 | predictBookPrice('Node.js in Action'); // 99.0 23 | predictBookPrice('Dive into Node.js'); // 199.0 24 | ``` 25 | 26 | **机器学习**问题也是类似的,只不过可以通过机器学习算法让机器能更“智能”地学习,能够对于一些未知数据作出真正的预测结果,比如可以帮助作者决定写一本什么样的书能够卖得更贵: 27 | 28 | ```js 29 | predictBookPrice('Pipcook in Action'); // 89.0 30 | predictBookPrice('Dive into Pipcook'); // 199.0 31 | ``` 32 | 33 | 机器学习并非万能灵药,因此接下来看看它到底能解决哪些问题,下面我们按照数据类型分为不同的任务类型: 34 | 35 | | Sample Type | Problem Category | Description | 36 | |------------------|--------------------------|--------------------------------| 37 | | Image | 图片分类 | 对于给定类型的图片进行分类 | 38 | | | 图片生成 | 生成图片 | 39 | | | 目标检测 | 识别出给定的对象,并返回目标的位置和类型 | 40 | | | 图片分割 | 与图片检测类似,但是返回的是目标轮廓的像素级显示 | 41 | | | 图片聚类 | 返回自动分类后的结果 | 42 | | Text | 文本分类 | 对于给定类型的文本进行分类 | 43 | | | 命名实体识别 | 从一句话中识别出命名实体 | 44 | | | 关系提取 | 抽取句子与句子间的关系 | 45 | | | 指代消解 | 将一句话中的代词转换为实际代表的个体 | 46 | | | 写作纠错 | 辅助写作的纠错功能 | 47 | | | 翻译 | 从一种语言翻译到另一种语言 | 48 | | | 问答 | 根据问题生成对应的回答 | 49 | | | 文本摘要 | 从一段长文本生成摘要文本 | 50 | | | 文本创作 | 生成一些如诗歌、散文、词等艺术作品 | 51 | | | 文本聚类 | 返回自动分类后的结果 | 52 | 53 | 那么我们如何在日常生活中使用上面的任务呢?我们可以来看看一个机器学习项目都会有哪些阶段: 54 | 55 | 1. 收集样本,并将他们处理成一种格式,用于给后面定义的模型学习数据中的特征。 56 | 2. 选择一个用于训练的机器学习模型,一般来说会根据任务类型和场景进行选择。 57 | 3. 在开始训练之前,需要将上面的样本集分为训练集和测试集。 58 | 4. 训练阶段,将训练集输入到模型中,此时模型开始从训练集中学习特征。 59 | 5. 训练结束后,再使用测试集输入到训练好的模型,来评估模型效果。 60 | 61 | > **训练集和测试集** 62 | > 63 | > 机器学习是关于学习数据集的某些特征,然后针对另一个数据集进行测试。机器学习中的一种常见做法是通过将数据集分成两部分来评估算法。我们称其中一组为训练集,在该集上我们学习数据中的特征,我们称另一组为测试集,在测试集上我们对学习的特征进行测试。 64 | 65 | ## 加载数据集 66 | 67 | [MNIST][](Modified National Institute of Standards and Technology database) 是一个手写识别的大型数据集: 68 | 69 |
70 | 71 |
72 | 73 | 接下来,我们使用手写数字识别作为例子,来介绍如何使用 [Pipcook][] 完成一个图片分类任务。 74 | 75 | 我们使用 Pipeline 来完整地描述机器学习任务,不同的插件表示这个 Pipeline 中不同的阶段,然后再通过 Pipeline 将不同的阶段连接起来形成一个完整的机器学习工作流。 76 | 77 | 在 [Pipcook][],构建一个分类任务的模型就是配置 Pipeline 的脚本,我们从通过 [datasource 脚本](https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet/src/datasource.ts)加载 [MNIST][] 数据集开始创建 Pipeline: 78 | 79 | ```js 80 | { 81 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/datasource.js?url=http://ai-sample.oss-cn-hangzhou.aliyuncs.com/image_classification/datasets/mnist.zip" 82 | } 83 | ``` 84 | 85 | 这个脚本会下载 [MNIST][] 数据集并提供访问接口。 86 | 87 | ## 学习 88 | 89 | 在这个数字识别数据集的例子中,我们的目的是预测一张图片所代表的数字,那么我们给出的样本中,每张图片就拥有10个分类(0-9),这也就是说,我们要让模型做到的是预测一张未知图片的类型,即从0到9的分类。 90 | 我们使用 [image classification dataflow](https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet/src/dataflow.ts) 脚本来调整每张图片的尺寸为 224x224,用一个数组 `[224, 224]` 表示: 91 | ```js 92 | { 93 | "dataflow": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/dataflow.js?size=224&size=224" 94 | } 95 | ``` 96 | 97 | 然后定义[模型脚本](https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet/src/model.ts)和参数: 98 | ```js 99 | { 100 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/model.js", 101 | "options": { 102 | "framework": "tfjs@3.8", 103 | "train": { 104 | "epochs": 20, 105 | "validationRequired": true 106 | } 107 | } 108 | } 109 | ``` 110 | 111 | 这个脚本会使用 [mobilenet][] 来做图片分类任务,训练和评估基于 tfjs 的模型。 112 | 113 | 目前为止,Pipeline 就定义完成了,接下来就可以开始模型训练了。 114 | 115 | ```sh 116 | $ pipcook run pipeline.json 117 | ``` 118 | 119 | ## 预测 120 | 121 | 训练完成后,我们就能发现 model 目录,它里面包含了训练的模型。 122 | 123 | ``` 124 | 📂 model 125 | ┣ 📜 model.json 126 | ┗ 📜 weights.bin 127 | ``` 128 | 129 | [Pipcook]: https://github.com/alibaba/pipcook 130 | [MNIST]: https://en.wikipedia.org/wiki/MNIST_database 131 | [Introduction to Pipeline]: ../manual/intro-to-pipeline.md 132 | [mobilenet]: https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet 133 | -------------------------------------------------------------------------------- /example/pipelines/README-CN.md: -------------------------------------------------------------------------------- 1 | # Pipelines 2 | 3 | [English](./README.md) 4 | 5 | ## 贝叶斯-文本分类 6 | 7 | ### 数据集 8 | 9 | 文本分类 Pipeline 的数据集组织格式如下: 10 | 11 | ```sh 12 | . 13 | ├── test 14 | │   └── textDataBinding.csv 15 | └── train 16 | └── textDataBinding.csv 17 | ``` 18 | 19 | `train` 文件夹内是训练数据,`test` 文件夹内是测试数据,存储为 csv 格式。csv 文件内有两列数据,分别为 input 和 output,input 为样本数据,output 为样本标签,如: 20 | 21 | | input | output | 22 | | ------------------------------------------------------------ | --------- | 23 | | 原创春秋新款宽松黑色牛仔裤男贴布哈伦裤日系潮流胖男大码长裤子 | itemTitle | 24 | | 茗缘翡翠 | shopName | 25 | | 挂画精美 种类丰富 | itemDesc | 26 | 27 | 这3个样本表示了3类不同的文本,他们的标签分别是`itemTitle`,`shopName`,`itemDesc`。需要注意的是,数据集中的数据需要尽可能丰富,且分布相对均匀,也就是说每个类别的样本数量应该差不多,差异过大将影响模型的准确度。 28 | 29 | 数据源可以为本地文件夹路径: 30 | 31 | ```json 32 | { 33 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=file:///path/to/dataset-directory" 34 | } 35 | ``` 36 | 37 | `/path/to/dataset-directory` 内包含 `test` 和 `train` 文件夹。 38 | 39 | 或者可以将 `test` 和 `train` 目录压缩成 zip 文件,存储在 OSS 上,修改数据源为 zip 文件 url: 40 | 41 | ```json 42 | { 43 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=http:///oss-host/my-dataset.zip" 44 | } 45 | ``` 46 | 47 | ### 模型参数 48 | 49 | 贝叶斯模型支持中文和英文两种模式,可以通过 `mode` 参数指定 `cn` 或者 `en`,默认为 `cn`。 50 | 51 | ```json 52 | { 53 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/model.js?mode=en" 54 | } 55 | ``` 56 | 57 | 由于贝叶斯模型使用了一些 `tfjs-backend-cpu` 上的算子,其他 backend 目前还未支持,所以在 `options` 字段中,我们定义了模型训练的 backend 为 `@tensorflow/tfjs-backend-cpu`。 58 | 59 | ## ResNet/MobileNet-图片分类 60 | 61 | ### 数据集 62 | 63 | 图片分类 Pipeline 的数据集组织格式如下: 64 | 65 | ```sh 66 | . 67 | ├── test 68 | │   ├── class-1 69 | │   └── class-2 70 | ├── train 71 | │   ├── class-1 72 | │   └── class-2 73 | └── validation 74 | │   ├── class-1 75 | │   └── class-2 76 | ``` 77 | 78 | `train` 文件夹内是训练数据,`test` 文件夹内是测试数据,`validation` 文件夹内是验证数据,目录中为各类别的图片文件夹,文件夹名称即图片的类别。 79 | 80 | 数据源可以使本地文件夹路径: 81 | 82 | ```json 83 | { 84 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=file:///path/to/dataset-directory" 85 | } 86 | ``` 87 | 88 | `/path/to/dataset-directory` 内包含 `test` 和 `train` 文件夹。 89 | 90 | 也可以把样本目录压缩成 zip 文件,存储在 OSS 上,修改数据源为 zip 文件 url: 91 | 92 | ```json 93 | { 94 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=http://oss-host/dataset.zip" 95 | } 96 | ``` 97 | 98 | ### 数据处理 99 | 100 | 对于图片分类任务来说,输入模型的所有样本图片维度(长宽)必须是一致的,而我们预定义的 MobileNet 和 ResNet 模型都要求输入 224 * 224 的图片,因此在模型训练开始前,我们会通过 `dataflow` 脚本对图片进行 resize 操作: 101 | ```json 102 | { 103 | "dataflow": [ 104 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224" 105 | ] 106 | } 107 | ``` 108 | 109 | ### 模型参数 110 | 111 | 图片分类 pipeline 支持 MobileNet 和 ResNet 两种模型,`modelUrl` 参数指定 `mobilenet` 或者 `resnet`,默认为 `mobilenet`。 112 | 113 | ```json 114 | { 115 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js?modelUrl=resnet", 116 | } 117 | ``` 118 | 119 | 另外,`options` 字段可以配置是否启用 GPU,训练的 epochs: 120 | 121 | ```json 122 | { 123 | "options": { 124 | "framework": "tfjs@3.8", 125 | "gpu": false, 126 | "train": { 127 | "epochs": 10 128 | } 129 | } 130 | } 131 | ``` 132 | 133 | GPU 默认为启用。epochs 越大,训练时长越久。 134 | 135 | ## YOLO-目标检测 136 | 137 | ### 数据集 138 | 139 | 目标检测 Pipeline 支持 [PascalVoc](../../docs/zh-cn/spec/dataset.md) 和 [Coco](https://cocodataset.org/#format-data) 两种数据集格式,通过定义 `format` 参数为 `pascalvoc` 或 `coco` 来指定当前数据集格式: 140 | 141 | ```json 142 | { 143 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?format=pascalvoc&url=https://host/dataset.zip" 144 | } 145 | ``` 146 | 147 | 同样的,如果在本地训练,可以将数据源改为本地文件夹路径: 148 | 149 | ```json 150 | { 151 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?format=pascalvoc&url=file:///path/to/dataset-directory" 152 | } 153 | ``` 154 | 155 | ### 数据处理 156 | 157 | 和图片分类 pipeline 一样,YOLO 要求输入模型的所有样本图片维度(长宽)必须是一致的,为 416 * 416,因此在模型训练开始前,我们会通过 `dataflow` 脚本对图片进行 resize 操作: 158 | ```json 159 | { 160 | "dataflow": [ 161 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416" 162 | ] 163 | } 164 | ``` 165 | 166 | ### 模型参数 167 | 168 | `options` 字段可以配置目标检测 pipeline 是否启用 GPU,训练的 epochs,每次喂入模型的样本数量 batchSize 以及 early-stopping 的 patience 值: 169 | 170 | ```json 171 | { 172 | "options": { 173 | "framework": "tfjs@3.8", 174 | "gpu": false, 175 | "train": { 176 | "epochs": 100, 177 | "batchSize": 16, 178 | "patience": 10 179 | } 180 | } 181 | } 182 | ``` 183 | 184 | GPU 默认为启用。`patience` 表示 loss 在 patience 个 epoch 没有下降后停止训练。比如 `patience` 为 3 的情况下,如果连续出现 3 个 epoch loss 都没有下降,就会触发 early-stopping,训练会提前终止。 185 | -------------------------------------------------------------------------------- /example/pipelines/image-classification-mobilenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "ImageClassification", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/mnist.zip", 5 | "dataflow": [ 6 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224" 7 | ], 8 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js", 9 | "options": { 10 | "framework": "tfjs@3.8", 11 | "train": { 12 | "epochs": 10 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /example/pipelines/image-classification-resnet.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "ImageClassification", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/mnist.zip", 5 | "dataflow": [ 6 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224" 7 | ], 8 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js", 9 | "options": { 10 | "framework": "tfjs@3.8", 11 | "train": { 12 | "epochs": 10, 13 | "modelUrl": "resnet" 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /example/pipelines/object-detection-yolo.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "ObjectDetection", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?format=pascalvoc&url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/mask.zip", 5 | "dataflow": [ 6 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416" 7 | ], 8 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js", 9 | "options": { 10 | "framework": "tfjs@3.8", 11 | "train": { 12 | "epochs": 100, 13 | "batchSize": 16, 14 | "patience": 10 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /example/pipelines/text-classification-bayes.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "TextClassification", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/text-classification.zip", 5 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/model.js", 6 | "options": { 7 | "framework": "tfjs@3.8-nlp", 8 | "backend": "@tensorflow/tfjs-backend-cpu" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /lerna.json: -------------------------------------------------------------------------------- 1 | { 2 | "packages": [ 3 | "packages/core", 4 | "packages/costa", 5 | "packages/cli" 6 | ], 7 | "version": "independent", 8 | "loglevel": "verbose", 9 | "npmClient": "npm", 10 | "command": { 11 | "publish": { 12 | "npmClient": "npm", 13 | "registry": "https://registry.npmjs.org" 14 | }, 15 | "publishConfig": { 16 | "access": "public" 17 | }, 18 | "bootstrap": { 19 | "nohoist": [ 20 | "node-addon-api", 21 | "ice-scripts", 22 | "stylelint" 23 | ] 24 | } 25 | }, 26 | "hoist": true 27 | } 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pipcook", 3 | "private": true, 4 | "workspaces": [ 5 | "packages/*" 6 | ], 7 | "devDependencies": { 8 | "@lerna/bootstrap": "^4.0.0", 9 | "@typescript-eslint/eslint-plugin": "^4.1.1", 10 | "@typescript-eslint/parser": "^4.1.1", 11 | "docsify-cli": "^4.4.3", 12 | "eslint": "^7.15.0", 13 | "http-server": "^0.12.3", 14 | "lerna": "^4.0.0", 15 | "nyc": "^15.1.0", 16 | "ts-node": "^9.1.1", 17 | "typedoc": "^0.17.8", 18 | "typescript": "^4.3.5" 19 | }, 20 | "scripts": { 21 | "build": "lerna run compile", 22 | "postinstall": "lerna clean --yes && lerna bootstrap", 23 | "docsify": "npm run typedoc && http-server -c-1 ./docs", 24 | "lint": "eslint . --ext .ts --ext .js", 25 | "pretest": "npm run lint", 26 | "test": "lerna run test", 27 | "test:pipeline": "sh ./tools/run_pipeline.sh", 28 | "typedoc": "sh tools/mkdoc.sh", 29 | "clean": "lerna run clean --parallel --concurrency 10", 30 | "release": "lerna publish from-package --yes --no-verify-access", 31 | "beta-release-tag": "lerna version prerelease --no-push --force-publish=* --yes", 32 | "beta-release": "lerna publish from-package --no-verify-access --dist-tag beta -y", 33 | "cov": "./tools/coverage.sh", 34 | "cov:report": "nyc report -r=lcov", 35 | "build:docker-cpu": "docker build -t pipcook:latest-cpu -f docker/Dockerfile.cpu ./docker", 36 | "build:docker": "docker build -t pipcook:latest -f docker/Dockerfile ./docker" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /packages/cli/.npmignore: -------------------------------------------------------------------------------- 1 | .* 2 | coverage 3 | *.tgz 4 | src 5 | tsconfig.* 6 | -------------------------------------------------------------------------------- /packages/cli/.nycrc: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "src/**/*.ts" 4 | ], 5 | "all": true, 6 | "instrument": true 7 | } 8 | -------------------------------------------------------------------------------- /packages/cli/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@pipcook/cli", 3 | "version": "2.1.5", 4 | "description": "pipcook cli", 5 | "main": "dist/bin/pipcook.js", 6 | "scripts": { 7 | "preinstall": "(mkdir -p dist/bin && touch dist/bin/pipcook.js) || ((mkdir dist\\bin || echo 'exists') && echo '' > dist\\bin\\pipcook.js)", 8 | "test": "ava -v", 9 | "build": "npm run clean && npm run compile", 10 | "clean": "((rm -rf dist tsconfig.tsbuildinfo) || (rmdir /Q /S dist tsconfig.tsbuildinfo)) || echo 'nothing to clean'", 11 | "compile": "tsc -b tsconfig.json && (chmod +x ./dist/bin/pipcook.js || echo 'windows not support chmod for bin')", 12 | "cov": "nyc -r text-summary npm run test", 13 | "cov:report": "nyc report -r lcov" 14 | }, 15 | "bin": { 16 | "pipcook": "dist/bin/pipcook.js" 17 | }, 18 | "keywords": [], 19 | "author": "", 20 | "license": "Apache-2.0", 21 | "dependencies": { 22 | "@pipcook/core": "^2.1.4", 23 | "@pipcook/costa": "^2.1.4", 24 | "bent": "^7.3.12", 25 | "chalk": "^3.0.0", 26 | "cli-progress": "^3.9.0", 27 | "commander": "^4.0.1", 28 | "dateformat": "^4.5.1", 29 | "debug": "^4.3.1", 30 | "express": "^4.17.1", 31 | "extract-zip": "^2.0.1", 32 | "fs-extra": "^9.1.0", 33 | "jimp": "^0.16.1", 34 | "multer": "^1.4.3", 35 | "nanoid": "^3.1.22", 36 | "ora": "^5.4.1", 37 | "pretty-bytes": "^5.6.0", 38 | "query-string": "^6.14.1", 39 | "semver": "^6.3.0" 40 | }, 41 | "devDependencies": { 42 | "@types/bent": "^7.3.2", 43 | "@types/cli-progress": "^3.9.1", 44 | "@types/dateformat": "^3.0.1", 45 | "@types/express": "^4.17.13", 46 | "@types/extract-zip": "^1.6.2", 47 | "@types/fs-extra": "^9.0.9", 48 | "@types/multer": "^1.4.7", 49 | "@types/node": "^14.6.0", 50 | "@types/semver": "^7.3.4", 51 | "@types/sinon": "^9.0.11", 52 | "ava": "^3.13.0", 53 | "import-fresh": "^3.3.0", 54 | "nyc": "^15.1.0", 55 | "sinon": "^10.0.0", 56 | "ts-node": "^9.1.1", 57 | "typescript": "^4.3.5" 58 | }, 59 | "publishConfig": { 60 | "access": "public" 61 | }, 62 | "ava": { 63 | "extensions": [ 64 | "ts" 65 | ], 66 | "require": [ 67 | "ts-node/register" 68 | ], 69 | "timeout": "2m" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /packages/cli/serve-resource/image/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Pipcook Server 6 | 8 | 76 | 77 | 78 | 79 |
80 |
81 | 82 | 83 |
84 |
85 |

No files currently selected for predict

86 |
87 |
88 | 89 |
90 |
91 |

Predict result...

92 |
93 |
94 | 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /packages/cli/serve-resource/text/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Pipcook Server 6 | 8 | 77 | 78 | 79 | 80 |
81 |
82 |

Input text to predict:

83 |
84 |
85 | 86 |
87 |
88 | 89 |
90 |
91 |

Predict result...

92 |
93 |
94 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /packages/cli/src/constants/index.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as path from 'path'; 3 | import { homedir } from 'os'; 4 | import * as constants from './'; 5 | 6 | test('constants of cli', (t) => { 7 | t.is(constants.PIPCOOK_FRAMEWORK_MIRROR_BASE, 'https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/'); 8 | t.is(constants.FrameworkDescFilename, 'framework.json'); 9 | t.is(constants.PIPCOOK_PLUGIN_ARTIFACT_PATH, path.join(constants.PIPCOOK_HOME_PATH, 'artifact')); 10 | t.is(constants.PIPCOOK_HOME_PATH, path.join(homedir(), '.pipcook')); 11 | t.is(constants.PIPCOOK_TMPDIR, path.join(constants.PIPCOOK_HOME_PATH, '.tmp')); 12 | t.is(constants.PIPCOOK_FRAMEWORK_PATH, path.join(constants.PIPCOOK_HOME_PATH, 'framework')); 13 | t.is(constants.PIPCOOK_SCRIPT_PATH, path.join(constants.PIPCOOK_HOME_PATH, 'script')); 14 | }); 15 | -------------------------------------------------------------------------------- /packages/cli/src/constants/index.ts: -------------------------------------------------------------------------------- 1 | import { homedir } from 'os'; 2 | import { join } from 'path'; 3 | 4 | /** 5 | * Pipcook home directory. 6 | */ 7 | export const PIPCOOK_HOME_PATH = join(homedir(), '.pipcook'); 8 | 9 | /** 10 | * Pipcook temp directory 11 | */ 12 | export const PIPCOOK_TMPDIR = join(PIPCOOK_HOME_PATH, '.tmp'); 13 | 14 | /** 15 | * pipcook framework cache 16 | */ 17 | export const PIPCOOK_FRAMEWORK_PATH = join(PIPCOOK_HOME_PATH, 'framework'); 18 | 19 | /** 20 | * pipcook script cache 21 | */ 22 | export const PIPCOOK_SCRIPT_PATH = join(PIPCOOK_HOME_PATH, 'script'); 23 | 24 | /** 25 | * pipcook artifact plugin path 26 | */ 27 | export const PIPCOOK_PLUGIN_ARTIFACT_PATH = join(PIPCOOK_HOME_PATH, 'artifact'); 28 | 29 | /** 30 | * pipcook framework mirror url 31 | */ 32 | export const PIPCOOK_FRAMEWORK_MIRROR_BASE = 'https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/'; 33 | 34 | /** 35 | * framework description file name, which is located in the root directory of the framework package 36 | */ 37 | export const FrameworkDescFilename = 'framework.json'; 38 | 39 | /** 40 | * framework description filename, which is located in the root directory of the framework package 41 | */ 42 | export const JSDescFilename = 'package.json'; 43 | 44 | /** 45 | * Node module directory name in the framework directory. 46 | */ 47 | export const JSModuleDirName = 'node_modules'; 48 | 49 | /** 50 | * Pipeline file name in the model directory. 51 | */ 52 | export const PipelineFileInModelDir = 'pipeline.json'; 53 | 54 | /** 55 | * Model directory name in the workspace directory. 56 | */ 57 | export const WorkspaceModelDir = 'model'; 58 | 59 | /** 60 | * Script directory name in the workspace directory. 61 | */ 62 | export const WorkspaceScriptDir = 'scripts'; 63 | 64 | /** 65 | * Framework directory name in the workspace directory. 66 | */ 67 | export const WorkspaceFrameworkDir = 'framework'; 68 | 69 | /** 70 | * Data directory name in the workspace directory. 71 | */ 72 | export const WorkspaceDataDir = 'data'; 73 | 74 | export const WorkspaceCacheDir = 'cache'; 75 | -------------------------------------------------------------------------------- /packages/cli/src/standalone-impl.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as sinon from 'sinon'; 3 | import * as fs from 'fs-extra'; 4 | import { createStandaloneRT } from './standalone-impl'; 5 | 6 | test.serial.afterEach(() => sinon.restore()); 7 | 8 | console.log('standalone-impl test start'); 9 | 10 | test('create standalone runtime', (t) => { 11 | const mockDataSourceApi: any = {}; 12 | const rt = createStandaloneRT(mockDataSourceApi, '/tmp'); 13 | t.is((rt as any).dataset, mockDataSourceApi, 'datasource should equal'); 14 | t.is((rt as any).modelDir, '/tmp', 'datasource should equal'); 15 | }); 16 | 17 | test.serial('runtime interface', async (t) => { 18 | const mockDataSourceApi: any = {}; 19 | const rt = createStandaloneRT(mockDataSourceApi, '/tmp'); 20 | const stubLog = sinon.stub(console, 'log'); 21 | await rt.notifyProgress({ value: 10, extendData: {} }); 22 | t.true(stubLog.calledOnce, 'console.log should be called once'); 23 | t.is(await rt.readModel(), '/tmp', 'readModel should be correct'); 24 | }); 25 | 26 | test.serial('runtime save model with path', async (t) => { 27 | const mockDataSourceApi: any = {}; 28 | const stubCopy = sinon.stub(fs, 'copy').resolves(); 29 | const rt = createStandaloneRT(mockDataSourceApi, '/tmp'); 30 | await rt.saveModel('/tmp/file.json'); 31 | t.false(stubCopy.called, 'copy should not be called'); 32 | await rt.saveModel('/data/file.json'); 33 | t.true(stubCopy.calledOnce, 'copy should be called once'); 34 | t.deepEqual(stubCopy.args[0], [ '/data/file.json', '/tmp' ] as any, 'should copy to the correct path'); 35 | }); 36 | 37 | console.log('standalone-impl test end'); 38 | -------------------------------------------------------------------------------- /packages/cli/src/standalone-impl.ts: -------------------------------------------------------------------------------- 1 | import { ProgressInfo } from '@pipcook/core'; 2 | import { pipelineAsync } from './utils'; 3 | import { DefaultDataSet, DefaultRuntime } from '@pipcook/costa'; 4 | import * as fs from 'fs-extra'; 5 | import * as path from 'path'; 6 | 7 | export class StandaloneImpl implements DefaultRuntime { 8 | constructor( 9 | public dataset: DefaultDataSet, 10 | private modelDir: string 11 | ) {} 12 | 13 | async notifyProgress(progress: ProgressInfo): Promise { 14 | console.log(`progress: ${progress.value}%`); 15 | } 16 | 17 | async saveModel(localPathOrStream: string | NodeJS.ReadableStream, filename: 'model'): Promise { 18 | if (typeof localPathOrStream === 'string') { 19 | if (path.parse(localPathOrStream).dir === this.modelDir || this.modelDir === path.resolve(localPathOrStream)) { 20 | return; 21 | } 22 | return fs.copy(localPathOrStream, this.modelDir); 23 | } else { 24 | const modelStream = fs.createWriteStream(path.join(this.modelDir, filename)); 25 | return pipelineAsync(localPathOrStream, modelStream); 26 | } 27 | } 28 | 29 | async readModel(): Promise { 30 | return this.modelDir; 31 | } 32 | } 33 | 34 | export const createStandaloneRT = ( 35 | datasource: DefaultDataSet, 36 | modelDir: string 37 | ): DefaultRuntime => { 38 | return new StandaloneImpl(datasource, modelDir); 39 | }; 40 | -------------------------------------------------------------------------------- /packages/cli/src/utils/cache.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as sinon from 'sinon'; 3 | import * as fs from 'fs-extra'; 4 | import { fetchWithCache } from './cache'; 5 | import * as utils from '.'; 6 | 7 | test.serial.afterEach(() => sinon.restore()); 8 | 9 | test.serial('fetch with cache', async (t) => { 10 | const cacheDir = '.cache'; 11 | const url = 'url'; 12 | const target = 'target'; 13 | 14 | const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves(); 15 | const stubRemove = sinon.stub(fs, 'remove').resolves(); 16 | const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true); 17 | const stubCopy = sinon.stub(fs, 'copy').resolves(); 18 | 19 | await fetchWithCache(cacheDir, url, target, true, true); 20 | 21 | t.false(stubDownloadAndExtractTo.called, 'downloadAndExtractTo function should not called.'); 22 | t.true(stubRemove.calledOnce, 'fs.remove function should called once.'); 23 | t.true(stubPathExists.calledOnce, 'fs.pathExists function should called once.'); 24 | t.true(stubCopy.called, 'fs.copy function should called.'); 25 | }); 26 | 27 | test.serial('fetch with cache and link', async (t) => { 28 | const cacheDir = '.cache'; 29 | const url = 'url'; 30 | const target = 'target'; 31 | 32 | const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves(); 33 | const stubRemove = sinon.stub(fs, 'remove').resolves(); 34 | const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true); 35 | const stubSymlink = sinon.stub(fs, 'symlink').resolves(); 36 | 37 | await fetchWithCache(cacheDir, url, target, true, false); 38 | 39 | t.false(stubDownloadAndExtractTo.called, 'downloadAndExtractTo function should not called.'); 40 | t.true(stubRemove.calledOnce, 'fs.remove function should called once.'); 41 | t.true(stubPathExists.calledOnce, 'fs.pathExists function should called once.'); 42 | t.true(stubSymlink.called, 'fs.symlink function should called.'); 43 | }); 44 | 45 | test.serial('fetch with missed cache', async (t) => { 46 | const cacheDir = '.cache'; 47 | const url = 'url'; 48 | const target = 'target'; 49 | 50 | const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves(); 51 | const stubRemove = sinon.stub(fs, 'remove').resolves(); 52 | const stubMove = sinon.stub(fs, 'move').resolves(); 53 | const stubPathExists = sinon.stub(fs, 'pathExists').resolves(false); 54 | const stubCopy = sinon.stub(fs, 'copy').resolves(); 55 | 56 | await fetchWithCache(cacheDir, url, target, true, true); 57 | 58 | t.true(stubDownloadAndExtractTo.calledOnce, 'downloadAndExtractTo function should called once.'); 59 | t.true(stubRemove.calledThrice, 'fs.remove function should three times.'); 60 | t.true(stubMove.calledOnce, 'fs.move function should called once.'); 61 | t.true(stubPathExists.calledOnce, 'fs.pathExists function should called once.'); 62 | t.true(stubCopy.calledOnce, 'fs.copy function should called once.'); 63 | }); 64 | 65 | test.serial('fetch with disabled cache', async (t) => { 66 | const cacheDir = '.cache'; 67 | const url = 'url'; 68 | const target = 'target'; 69 | 70 | const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves(); 71 | const stubRemove = sinon.stub(fs, 'remove').resolves(); 72 | const stubMove = sinon.stub(fs, 'move').resolves(); 73 | const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true); 74 | const stubCopy = sinon.stub(fs, 'copy').resolves(); 75 | 76 | await fetchWithCache(cacheDir, url, target, false, true); 77 | 78 | t.true(stubDownloadAndExtractTo.calledOnce, 'downloadAndExtractTo function should called once.'); 79 | t.true(stubRemove.calledThrice, 'fs.remove function should three times.'); 80 | t.true(stubMove.calledOnce, 'fs.move function should called once.'); 81 | t.false(stubPathExists.called, 'fs.pathExists function should not called once.'); 82 | t.true(stubCopy.calledOnce, 'fs.copy function should called once.'); 83 | }); 84 | 85 | test.serial('fetch with disabled cache with link', async (t) => { 86 | const cacheDir = '.cache'; 87 | const url = 'url'; 88 | const target = 'target'; 89 | 90 | const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves(); 91 | const stubRemove = sinon.stub(fs, 'remove').resolves(); 92 | const stubMove = sinon.stub(fs, 'move').resolves(); 93 | const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true); 94 | const stubSymlink = sinon.stub(fs, 'symlink').resolves(); 95 | 96 | await fetchWithCache(cacheDir, url, target, false, false); 97 | 98 | t.true(stubDownloadAndExtractTo.calledOnce, 'downloadAndExtractTo function should called once.'); 99 | t.true(stubRemove.calledThrice, 'fs.remove function should three times.'); 100 | t.true(stubMove.calledOnce, 'fs.move function should called once.'); 101 | t.false(stubPathExists.called, 'fs.pathExists function should not called once.'); 102 | t.true(stubSymlink.calledOnce, 'fs.symlink function should called once.'); 103 | }); 104 | -------------------------------------------------------------------------------- /packages/cli/src/utils/cache.ts: -------------------------------------------------------------------------------- 1 | import * as crypto from 'crypto'; 2 | import * as fs from 'fs-extra'; 3 | import * as path from 'path'; 4 | import Debug from 'debug'; 5 | import { downloadAndExtractTo } from '.'; 6 | const debug = Debug('cache'); 7 | 8 | /** 9 | * if the file or directory exists in cache, link to target, or fetch and cache it 10 | * @param cacheDir cache directory 11 | * @param url url to fetch 12 | * @param target target path 13 | * @param enableCache is cache enabled 14 | */ 15 | export const fetchWithCache = async (cacheDir: string, url: string, target: string, enableCache: boolean, isCopy = false): Promise => { 16 | const md5 = crypto.createHash('md5').update(url).digest('hex'); 17 | const cachePath = path.join(cacheDir, md5); 18 | const cacheTmpPath = path.join(cacheDir, 'tmp', md5); 19 | debug('search cache from', cachePath); 20 | await fs.remove(target); 21 | if (enableCache) { 22 | if (await fs.pathExists(cachePath)) { 23 | return await isCopy ? fs.copy(cachePath, target) : fs.symlink(cachePath, target); 24 | } 25 | debug('cache missed'); 26 | } 27 | await fs.remove(cachePath); 28 | await fs.remove(cacheTmpPath); 29 | debug('download from url', url); 30 | await downloadAndExtractTo(url, cacheTmpPath); 31 | debug('move tmp file to cache'); 32 | await fs.move(cacheTmpPath, cachePath); 33 | debug(`copy/link ${cachePath} to ${target}`); 34 | return isCopy ? fs.copy(cachePath, target) : fs.symlink(cachePath, target); 35 | }; 36 | -------------------------------------------------------------------------------- /packages/cli/src/utils/framework.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as sinon from 'sinon'; 3 | import * as fs from 'fs-extra'; 4 | import * as cache from './cache'; 5 | import { prepareFramework } from './framework'; 6 | import { PipcookFramework, PipelineMeta } from '@pipcook/costa'; 7 | import * as utils from './'; 8 | 9 | test.serial.afterEach(() => sinon.restore()); 10 | 11 | test.serial('prepare with invalid options', async (t) => { 12 | const pipelineMeta: PipelineMeta = { 13 | specVersion: 'test', 14 | datasource: 'test', 15 | dataflow: [ 'test' ], 16 | model: 'test', 17 | artifact: [], 18 | options: {} 19 | }; 20 | const frameworkDir = 'test'; 21 | 22 | const stubFetchWithCache = sinon.stub(cache, 'fetchWithCache').resolves(); 23 | const stubReadJson = sinon.stub(fs, 'readJson').resolves(); 24 | 25 | const ret = await prepareFramework(pipelineMeta, frameworkDir, ''); 26 | 27 | t.false(stubFetchWithCache.called); 28 | t.false(stubReadJson.called); 29 | t.is(ret, undefined); 30 | }); 31 | 32 | test.serial('prepare with file protocol and zip extname', async (t) => { 33 | const pipelineMeta: PipelineMeta = { 34 | specVersion: 'test', 35 | datasource: 'test', 36 | dataflow: [ 'test' ], 37 | model: 'test', 38 | artifact: [], 39 | options: { 40 | framework: 'file:///data/a.zip' 41 | } 42 | }; 43 | const frameworkDir = 'test'; 44 | 45 | const stubUnzipData = sinon.stub(utils, 'unZipData').resolves(); 46 | const stubReadJson = sinon.stub(fs, 'readJson').resolves({ mock: 'value' }); 47 | 48 | const ret = await prepareFramework(pipelineMeta, frameworkDir, ''); 49 | 50 | t.true(stubReadJson.calledOnce, 'readJson should be called once'); 51 | t.true(stubUnzipData.calledOnce, 'unzip should be called once'); 52 | t.deepEqual(ret, { mock: 'value', path: frameworkDir } as any); 53 | }); 54 | 55 | test.serial('prepare with file protocol and no-zip extname', async (t) => { 56 | const pipelineMeta: PipelineMeta = { 57 | specVersion: 'test', 58 | datasource: 'test', 59 | dataflow: [ 'test' ], 60 | model: 'test', 61 | artifact: [], 62 | options: { 63 | framework: 'file:///data/a' 64 | } 65 | }; 66 | const frameworkDir = 'test'; 67 | 68 | const stubCopy = sinon.stub(fs, 'copy').resolves(); 69 | const stubReadJson = sinon.stub(fs, 'readJson').resolves({ mock: 'value' }); 70 | 71 | const ret = await prepareFramework(pipelineMeta, frameworkDir, 'http://a.b.c/'); 72 | 73 | t.true(stubReadJson.calledOnce, 'readJson should be called once'); 74 | t.true(stubCopy.calledOnce, 'unzip should be called once'); 75 | t.deepEqual(ret, { mock: 'value', path: frameworkDir } as any); 76 | }); 77 | 78 | test.serial('prepare with valid options', async (t) => { 79 | const pipelineMeta: PipelineMeta = { 80 | specVersion: 'test', 81 | datasource: 'test', 82 | dataflow: [ 'test' ], 83 | model: 'test', 84 | artifact: [], 85 | options: { 86 | framework: 'test' 87 | } 88 | }; 89 | 90 | const framework: PipcookFramework = { 91 | path : 'test', 92 | name: 'test', 93 | desc: 'test', 94 | version: 'test', 95 | arch: 'test', 96 | platform: 'test', 97 | pythonVersion: 'test', 98 | nodeVersion: 'test', 99 | napiVersion: 7, 100 | pythonPackagePath: 'test', 101 | jsPackagePath: 'test' 102 | }; 103 | 104 | const frameworkDir = 'test'; 105 | 106 | const stubFetchWithCache = sinon.stub(cache, 'fetchWithCache').resolves(); 107 | const stubReadJson = sinon.stub(fs, 'readJson').resolves(framework); 108 | 109 | const ret = await prepareFramework(pipelineMeta, frameworkDir, ''); 110 | 111 | const expectedRet = { 112 | ...framework, 113 | path: frameworkDir 114 | }; 115 | 116 | t.true(stubFetchWithCache.calledOnce); 117 | t.true(stubReadJson.calledOnce); 118 | t.deepEqual(ret, expectedRet); 119 | }); 120 | -------------------------------------------------------------------------------- /packages/cli/src/utils/framework.ts: -------------------------------------------------------------------------------- 1 | import { fetchWithCache } from './cache'; 2 | import * as fs from 'fs-extra'; 3 | import * as path from 'path'; 4 | import * as url from 'url'; 5 | import { PipelineMeta, PipcookFramework } from '@pipcook/costa'; 6 | import * as constants from '../constants'; 7 | import { mirrorUrl, DownloadProtocol, unZipData } from './'; 8 | 9 | export const prepareFramework = async ( 10 | pipelineMeta: PipelineMeta, 11 | frameworkDir: string, 12 | mirror: string, 13 | enableCache = true 14 | ): Promise => { 15 | if (pipelineMeta.options.framework) { 16 | const urlObj = url.parse(pipelineMeta.options.framework); 17 | if (urlObj.protocol === DownloadProtocol.FILE) { 18 | if (path.extname(urlObj.path) === '.zip') { 19 | await unZipData(urlObj.path, frameworkDir); 20 | } else { 21 | await fs.copy(urlObj.path, frameworkDir); 22 | } 23 | } else { 24 | let realUrl = ''; 25 | if (urlObj.protocol === DownloadProtocol.HTTP || urlObj.protocol === DownloadProtocol.HTTPS) { 26 | realUrl = pipelineMeta.options.framework; 27 | } else { 28 | realUrl = mirrorUrl(mirror, pipelineMeta.options.framework); 29 | } 30 | await fetchWithCache( 31 | constants.PIPCOOK_FRAMEWORK_PATH, 32 | realUrl, 33 | frameworkDir, 34 | enableCache 35 | ); 36 | } 37 | const framework = await fs.readJson(path.join(frameworkDir, constants.FrameworkDescFilename)); 38 | // todo: validate framework 39 | return { 40 | ...framework, 41 | path: frameworkDir 42 | }; 43 | } 44 | }; 45 | -------------------------------------------------------------------------------- /packages/cli/src/utils/plugin.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs-extra'; 2 | import * as path from 'path'; 3 | import { ArtifactExports } from '@pipcook/core'; 4 | import { PipelineMeta } from '@pipcook/costa'; 5 | import * as constants from '../constants'; 6 | import { execAsync } from './'; 7 | 8 | export interface ArtifactMeta { 9 | artifactExports: ArtifactExports; 10 | options: Record; 11 | } 12 | 13 | export interface PluginVersion { 14 | name: string, 15 | version: string 16 | } 17 | 18 | /** 19 | * extract the verison in the name expression, return empty string if no version found 20 | * @param name package name with semver 21 | */ 22 | export const extractVersion = (name: string): PluginVersion => { 23 | let n = name.length; 24 | while (n-- > 0) { 25 | if (name[n] === '/') { 26 | break; 27 | } else if (name[n] === '@') { 28 | return { 29 | name: name.substr(0, n), 30 | version: name.substr(n + 1) 31 | }; 32 | } 33 | } 34 | return { name, version: 'latest' }; 35 | }; 36 | 37 | /** 38 | * install plugin 39 | * @param name package name: pipcook-ali-oss-uploader or pipcook-ali-oss-uploader@0.0.1 40 | * @param pluginHomeDir plugin home directory 41 | */ 42 | export const install = async (name: string, pluginHomeDir: string, npmClient: string, registry?: string): Promise => { 43 | if (!await fs.pathExists(pluginHomeDir)) { 44 | await fs.mkdirp(pluginHomeDir); 45 | } 46 | const pluginVersion = extractVersion(name); 47 | const alias = `${pluginVersion.name}-${pluginVersion.version}`; 48 | const requirePath = path.join(pluginHomeDir, 'node_modules', alias); 49 | // always update plugin if version is 'beta', 'alpha' or 'latest' 50 | if ([ 'beta', 'alpha', 'latest' ].includes(pluginVersion.version) || !(await fs.pathExists(requirePath))) { 51 | await execAsync( 52 | `${npmClient} install ${alias}@npm:${name} -P --save${ registry ? ' --registry=' + registry : '' }`, 53 | { cwd: pluginHomeDir } 54 | ); 55 | } 56 | return requirePath; 57 | }; 58 | 59 | export const prepareArtifactPlugin = async (pipelineMeta: PipelineMeta, npmClient: string, registry?: string): Promise> => { 60 | if ( 61 | !pipelineMeta.artifact || 62 | (Array.isArray(pipelineMeta.artifact) && pipelineMeta.artifact.length === 0) 63 | ) { 64 | return []; 65 | } 66 | const allPlugins: Array = []; 67 | for (const plugin of pipelineMeta.artifact) { 68 | const requirePath = await install(plugin.processor, constants.PIPCOOK_PLUGIN_ARTIFACT_PATH, npmClient, registry); 69 | let pluginExport: ArtifactExports = await import(requirePath); 70 | if ( 71 | typeof pluginExport.initialize !== 'function' 72 | || typeof pluginExport.build !== 'function' 73 | ) { 74 | if ( 75 | (pluginExport as any).default 76 | && typeof (pluginExport as any).default.initialize === 'function' 77 | && typeof (pluginExport as any).default.build === 'function' 78 | ) { 79 | pluginExport = (pluginExport as any).default; 80 | } else { 81 | throw new TypeError(`${plugin} is not a valid artifact plugin`); 82 | } 83 | } 84 | await pluginExport.initialize(plugin); 85 | allPlugins.push({ 86 | artifactExports: pluginExport, 87 | options: plugin 88 | }); 89 | } 90 | return allPlugins; 91 | }; 92 | -------------------------------------------------------------------------------- /packages/cli/src/utils/post-predict.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as sinon from 'sinon'; 3 | import * as fs from 'fs-extra'; 4 | import { PipelineType } from '@pipcook/costa'; 5 | import { processData } from './post-predict'; 6 | import * as Jimp from 'jimp'; 7 | 8 | test.serial.afterEach(() => sinon.restore()); 9 | 10 | test.serial('should process object detection', async (t) => { 11 | const result = [ 12 | [ 13 | { 14 | id: 1, 15 | category: 'mock-category', 16 | score: 0.5, 17 | box: [ 0, 0, 15, 15 ] 18 | } 19 | ] 20 | ]; 21 | const stubWrite = sinon.stub(Jimp.prototype, 'write').resolves(); 22 | await t.notThrowsAsync(processData(result, { 23 | type: PipelineType.ObjectDetection, 24 | inputs: [ __dirname + '/../../../../docs/images/logo.png' ] 25 | })); 26 | t.true(stubWrite.calledOnce, 'write should be called once'); 27 | }); 28 | 29 | test.serial('should process object detection with buffer', async (t) => { 30 | const result = [ 31 | [ 32 | { 33 | id: 1, 34 | category: 'mock-category', 35 | score: 0.5, 36 | box: [ 0, 0, 15, 15 ] 37 | } 38 | ] 39 | ]; 40 | const stubWrite = sinon.stub(Jimp.prototype, 'write').resolves(); 41 | const buffer = await fs.readFile(__dirname + '/../../../../docs/images/logo.png'); 42 | await t.notThrowsAsync(processData(result, { 43 | type: PipelineType.ObjectDetection, 44 | inputs: [ buffer ] 45 | })); 46 | t.true(stubWrite.calledOnce, 'write should be called once'); 47 | }); 48 | 49 | test.serial('should process object detection with buffer but args count not matched', async (t) => { 50 | const result = [ 51 | [ 52 | { 53 | id: 1, 54 | category: 'mock-category', 55 | score: 0.5, 56 | box: [ -10, -10, 15, 15 ] 57 | } 58 | ] 59 | ]; 60 | const buffer = await fs.readFile(__dirname + '/../../../../docs/images/logo.png'); 61 | await t.throwsAsync(processData(result, { 62 | type: PipelineType.ObjectDetection, 63 | inputs: [ buffer, buffer ] 64 | }), { message: 'Size of predict result is not equal to inputs.' }); 65 | }); 66 | 67 | test.serial('should process text classification', async (t) => { 68 | const result = [ 69 | [ 70 | { 71 | id: 1, 72 | category: 'mock-category', 73 | score: 0.5 74 | } 75 | ] 76 | ]; 77 | await t.notThrowsAsync(processData(result, { 78 | type: PipelineType.TextClassification, 79 | inputs: [ 'mock input text' ] 80 | })); 81 | }); 82 | -------------------------------------------------------------------------------- /packages/cli/src/utils/post-predict.ts: -------------------------------------------------------------------------------- 1 | import { PredictResult, DatasetPool } from '@pipcook/core'; 2 | import { PipelineType } from '@pipcook/costa'; 3 | import * as Jimp from 'jimp'; 4 | import { PredictInput } from './predict-dataset'; 5 | import * as path from 'path'; 6 | import { logger } from './'; 7 | 8 | export interface Options { 9 | type: PipelineType; 10 | inputs: Array; 11 | [k: string]: any 12 | } 13 | 14 | export async function processData(predictResult: PredictResult, opts: Options): Promise { 15 | logger.success(`Origin result:${JSON.stringify(predictResult)}`); 16 | switch (opts.type) { 17 | case PipelineType.ObjectDetection: 18 | await processObjectDetection(predictResult, opts); 19 | break; 20 | default: 21 | return; 22 | } 23 | } 24 | 25 | async function processObjectDetection(predictResult: DatasetPool.Types.ObjectDetection.PredictResult, opts: Options): Promise { 26 | if (predictResult.length !== opts.inputs.length) { 27 | throw new TypeError('Size of predict result is not equal to inputs.'); 28 | } 29 | for (let i = 0; i < opts.inputs.length; i++) { 30 | let img: Jimp; 31 | if (typeof opts.inputs[i] === 'string') { 32 | img = await Jimp.read(opts.inputs[i] as string); 33 | } else { 34 | img = await Jimp.read(opts.inputs[i] as Buffer); 35 | } 36 | const font = await Jimp.loadFont(Jimp.FONT_SANS_16_BLACK); 37 | predictResult[i].forEach((res: DatasetPool.Types.ObjectDetection.PredictObject) => { 38 | const x = Math.round(res.box[0] < 0 ? 0 : res.box[0]); 39 | const y = Math.round(res.box[1] < 0 ? 0 : res.box[1]); 40 | const w = Math.round(res.box[0] < 0 ? res.box[2] - Math.abs(res.box[0]) : res.box[2]); 41 | const h = Math.round(res.box[1] < 0 ? res.box[3] - Math.abs(res.box[1]) : res.box[3]); 42 | // draw class name and score 43 | img.print(font, x, y, `${res.category}:${res.score.toFixed(2)}`); 44 | // draw box 45 | for (let drawX = x; drawX <= x + w; ++drawX) { 46 | img.setPixelColor(0xFF, drawX, y); 47 | img.setPixelColor(0xFF, drawX, y + h); 48 | } 49 | for (let drawY = y; drawY <= y + h; ++drawY) { 50 | img.setPixelColor(0xFF, x, drawY); 51 | img.setPixelColor(0xFF, x + w, drawY); 52 | } 53 | }); 54 | await img.write(`${path.join(process.cwd(), `predict-result-${i}.png`)}`); 55 | } 56 | logger.success('Object detection result has been saved to:'); 57 | for (let i = 0; i < opts.inputs.length; ++i) { 58 | logger.info(`predict-result-${i}.png`); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /packages/cli/src/utils/predict-databset.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as sinon from 'sinon'; 3 | import { PipelineType } from '@pipcook/costa'; 4 | import { makePredictDataset } from './predict-dataset'; 5 | 6 | test.serial.afterEach(() => sinon.restore()); 7 | 8 | test('unsupported type', async (t) => { 9 | t.is(makePredictDataset([], -1 as any), null); 10 | }); 11 | 12 | test('make object detection dataset', async (t) => { 13 | const dataset = makePredictDataset([ 's' ], PipelineType.ObjectDetection); 14 | t.deepEqual(await dataset.predicted.next(), { data: { uri: 's' }, label: undefined }); 15 | }); 16 | 17 | test('make object detection dataset from buffer', async (t) => { 18 | const buffer = Buffer.from([ 1 ]); 19 | const dataset = makePredictDataset([ buffer ], PipelineType.ObjectDetection); 20 | t.deepEqual(await dataset.predicted.next(), { data: { buffer: buffer.buffer }, label: undefined }); 21 | }); 22 | 23 | test('make image classification dataset', async (t) => { 24 | const dataset = makePredictDataset([ 's' ], PipelineType.ImageClassification); 25 | t.deepEqual(await dataset.predicted.next(), { data: { uri: 's' }, label: undefined }); 26 | }); 27 | 28 | test('make image classification dataset from buffer', async (t) => { 29 | const buffer = Buffer.from([ 1 ]); 30 | const dataset = makePredictDataset([ buffer ], PipelineType.ImageClassification); 31 | t.deepEqual(await dataset.predicted.next(), { data: { buffer: buffer.buffer }, label: undefined }); 32 | }); 33 | 34 | test('make text classification dataset from string', async (t) => { 35 | const dataset = makePredictDataset([ 'text1' ], PipelineType.TextClassification); 36 | t.deepEqual(await dataset.predicted.next(), { data: 'text1', label: undefined }); 37 | }); 38 | 39 | test('make text classification dataset from buffer', async (t) => { 40 | const buffer = Buffer.from([ 1 ]); 41 | t.throws(() => makePredictDataset([ buffer ], PipelineType.TextClassification), { message: 'Should input text for text classification.' }); 42 | }); 43 | -------------------------------------------------------------------------------- /packages/cli/src/utils/predict-dataset.ts: -------------------------------------------------------------------------------- 1 | import { DatasetPool, DataCook } from '@pipcook/core'; 2 | import { PipelineType } from '@pipcook/costa'; 3 | /** 4 | * Data type for predict tasks 5 | */ 6 | export type PredictInput = string | Buffer; 7 | 8 | export function makePredictDataset(inputs: Array, pipelineType: PipelineType): DatasetPool.Types.DatasetPool { 9 | let samples; 10 | if (pipelineType === PipelineType.ObjectDetection) { 11 | samples = inputs.map((input) => { 12 | if (typeof input === 'string') { 13 | return { 14 | data: { 15 | uri: input 16 | }, 17 | label: undefined 18 | } as DataCook.Dataset.Types.ObjectDetection.Sample; 19 | } else { 20 | return { 21 | data: { 22 | buffer: input.buffer 23 | }, 24 | label: undefined 25 | } as DataCook.Dataset.Types.ObjectDetection.Sample; 26 | } 27 | }); 28 | 29 | const datasetData: DatasetPool.Types.DatasetData = { 30 | predictedData: samples 31 | }; 32 | return DatasetPool.ArrayDatasetPoolImpl.from(datasetData, { type: DataCook.Dataset.Types.DatasetType.Image }); 33 | } else if (pipelineType === PipelineType.ImageClassification) { 34 | samples = inputs.map((input) => { 35 | if (typeof input === 'string') { 36 | return { 37 | data: { 38 | uri: input 39 | }, 40 | label: undefined 41 | } as DataCook.Dataset.Types.ImageClassification.Sample; 42 | } else { 43 | return { 44 | data: { 45 | buffer: input.buffer 46 | }, 47 | label: undefined 48 | } as DataCook.Dataset.Types.ImageClassification.Sample; 49 | } 50 | }); 51 | 52 | const datasetData: DatasetPool.Types.DatasetData = { 53 | predictedData: samples 54 | }; 55 | return DatasetPool.ArrayDatasetPoolImpl.from(datasetData, { type: DataCook.Dataset.Types.DatasetType.Image }); 56 | } else if (pipelineType === PipelineType.TextClassification) { 57 | samples = inputs.map((input) => { 58 | if (typeof input === 'string') { 59 | return { 60 | data: input, 61 | label: undefined 62 | } as DataCook.Dataset.Types.TextClassification.Sample; 63 | } else { 64 | throw new TypeError('Should input text for text classification.'); 65 | } 66 | }); 67 | 68 | const datasetData: DatasetPool.Types.DatasetData = { 69 | predictedData: samples 70 | }; 71 | return DatasetPool.ArrayDatasetPoolImpl.from(datasetData, { type: DataCook.Dataset.Types.DatasetType.Table }); 72 | } else { 73 | return null; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /packages/cli/src/utils/script.ts: -------------------------------------------------------------------------------- 1 | import { 2 | PipelineMeta, 3 | ScriptConfig, 4 | ScriptType, 5 | PipcookScript 6 | } from '@pipcook/costa'; 7 | import * as constants from '../constants'; 8 | import * as fs from 'fs-extra'; 9 | import * as path from 'path'; 10 | import { parse } from 'url'; 11 | import { fetchWithCache } from './cache'; 12 | import * as queryString from 'query-string'; 13 | import { DownloadProtocol } from './'; 14 | 15 | export const downloadScript = async (scriptDir: string, scriptOrder: number, url: string, type: ScriptType, enableCache: boolean, devMode: boolean): Promise => { 16 | const urlObj = parse(url); 17 | const baseName = path.parse(urlObj.pathname).base; 18 | let localPath = path.join(scriptDir, `${scriptOrder}-${baseName}`); 19 | const query = queryString.parse(urlObj.query); 20 | // if the url is is file protocol, import it directly in development mode or copy it in normal mode. 21 | if (urlObj.protocol === DownloadProtocol.FILE || urlObj.protocol === null) { 22 | if (path.isAbsolute(urlObj.pathname)) { 23 | if (devMode) { 24 | localPath = urlObj.pathname; 25 | } else { 26 | await fs.copy(urlObj.pathname, localPath); 27 | } 28 | } else { 29 | if (devMode) { 30 | localPath = path.join(process.cwd(), urlObj.pathname); 31 | } else { 32 | await fs.copy(path.join(process.cwd(), urlObj.pathname), localPath); 33 | } 34 | } 35 | } else { 36 | if (urlObj.protocol === DownloadProtocol.HTTP || urlObj.protocol === DownloadProtocol.HTTPS) { 37 | // maybe should copy the script with COW 38 | await fetchWithCache(constants.PIPCOOK_SCRIPT_PATH, url, localPath, enableCache, true); 39 | } else { 40 | throw new TypeError(`unsupported protocol ${urlObj.protocol}`); 41 | } 42 | } 43 | return { 44 | name: baseName, 45 | path: localPath, 46 | type, 47 | query 48 | }; 49 | }; 50 | 51 | export const prepareScript = async (pipelineMeta: PipelineMeta, scriptDir: string, enableCache = true, devMode = false): Promise => { 52 | const scripts: ScriptConfig = { 53 | datasource: null, 54 | dataflow: null, 55 | model: null 56 | }; 57 | let scriptOrder = 0; 58 | scripts.datasource 59 | = await downloadScript(scriptDir, scriptOrder, pipelineMeta.datasource, ScriptType.DataSource, enableCache, devMode); 60 | scriptOrder++; 61 | if (Array.isArray(pipelineMeta.dataflow) && pipelineMeta.dataflow.length > 0) { 62 | scripts.dataflow = []; 63 | for (const dataflowUri of pipelineMeta.dataflow) { 64 | scripts.dataflow.push(await downloadScript(scriptDir, scriptOrder, dataflowUri, ScriptType.Dataflow, enableCache, devMode)); 65 | scriptOrder++; 66 | } 67 | } 68 | scripts.model = await downloadScript(scriptDir, scriptOrder, pipelineMeta.model, ScriptType.Model, enableCache, devMode); 69 | return scripts; 70 | }; 71 | 72 | export const linkCoreToScript = async (scriptModulePath: string): Promise => { 73 | const coreTargetPath = path.join(scriptModulePath, '@pipcook/core'); 74 | await fs.remove(path.join(coreTargetPath)); 75 | const coreScriptPath = require.resolve('@pipcook/core'); 76 | const coreDir = path.join('/core/'); 77 | const coreSourcePath = coreScriptPath.substr(0, coreScriptPath.lastIndexOf(coreDir) + coreDir.length - 1); 78 | await fs.mkdirp(path.join(scriptModulePath, '@pipcook')); 79 | await fs.symlink(coreSourcePath, coreTargetPath); 80 | }; 81 | -------------------------------------------------------------------------------- /packages/cli/src/utils/serve-predict.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as express from 'express'; 3 | import * as sinon from 'sinon'; 4 | import * as path from 'path'; 5 | import { stop, predictText, predictImage, serveText, serveImage, serve } from './serve-predict'; 6 | import { PipelineType } from '@pipcook/costa'; 7 | 8 | test.serial.afterEach(() => sinon.restore()); 9 | 10 | test.serial('serveText', async (t) => { 11 | const mockStatic: any = {}; 12 | const stubStatic = sinon.stub(express, 'static').returns(mockStatic); 13 | const app = express(); 14 | const stubUse = sinon.stub(app, 'use').returns(app); 15 | const stubGet = sinon.stub(app, 'get').returns(app); 16 | const mockCb = sinon.stub(); 17 | serveText(app, mockCb); 18 | t.true(stubStatic.calledOnce); 19 | t.true(stubGet.calledOnce); 20 | t.true(stubUse.calledOnce); 21 | t.is(stubStatic.args[0][0], path.join(__dirname, '../../serve-resource/text')); 22 | }); 23 | 24 | test.serial('serveImage', async (t) => { 25 | const mockStatic: any = {}; 26 | const stubStatic = sinon.stub(express, 'static').returns(mockStatic); 27 | const app = express(); 28 | const stubUse = sinon.stub(app, 'use').returns(app); 29 | const stubGet = sinon.stub(app, 'get').returns(app); 30 | const mockCb = sinon.stub(); 31 | serveImage(app, mockCb); 32 | t.true(stubStatic.calledOnce); 33 | t.true(stubGet.calledOnce); 34 | t.true(stubUse.calledOnce); 35 | t.is(stubStatic.args[0][0], path.join(__dirname, '../../serve-resource/image')); 36 | }); 37 | 38 | test.serial('predict image', async (t) => { 39 | const mockPredictResult = { mock: 'value' }; 40 | const mockCb = sinon.stub().resolves(mockPredictResult); 41 | const req: any = { 42 | files: [ { buffer: Buffer.from([ 1, 2, 3 ]) }, { buffer: Buffer.from([ 2, 3, 4 ]) } ] 43 | }; 44 | const resp: any = { 45 | json: sinon.stub() 46 | }; 47 | await predictImage(mockCb, req, resp); 48 | t.true(mockCb.calledOnce); 49 | t.true(resp.json.calledOnce); 50 | t.deepEqual(resp.json.args[0][0], { success: true, data: mockPredictResult }); 51 | }); 52 | 53 | test.serial('predict image but no data', async (t) => { 54 | const message = 'no file available'; 55 | const mockCb = sinon.stub(); 56 | const req: any = { 57 | files: undefined 58 | }; 59 | const resp: any = { 60 | json: sinon.stub() 61 | }; 62 | await predictImage(mockCb, req, resp); 63 | t.false(mockCb.called); 64 | t.true(resp.json.calledOnce); 65 | t.deepEqual(resp.json.args[0][0], { success: false, message }); 66 | }); 67 | 68 | test.serial('predict text', async (t) => { 69 | const mockPredictResult = { mock: 'value' }; 70 | const mockCb = sinon.stub().resolves(mockPredictResult); 71 | const req: any = { 72 | query: { input: [ 'input1', 'input2' ] } 73 | }; 74 | const resp: any = { 75 | json: sinon.stub() 76 | }; 77 | await predictText(mockCb, req, resp); 78 | t.true(mockCb.calledOnce); 79 | t.deepEqual(mockCb.args[0][0], req.query.input); 80 | t.true(resp.json.calledOnce); 81 | t.deepEqual(resp.json.args[0][0], { success: true, data: mockPredictResult }); 82 | }); 83 | 84 | test.serial('predict text but no input', async (t) => { 85 | const message = 'no input available'; 86 | const mockCb = sinon.stub(); 87 | const req: any = { 88 | query: {} 89 | }; 90 | const resp: any = { 91 | json: sinon.stub() 92 | }; 93 | await predictText(mockCb, req, resp); 94 | t.false(mockCb.called); 95 | t.true(resp.json.calledOnce); 96 | t.deepEqual(resp.json.args[0][0], { success: false, message }); 97 | }); 98 | 99 | test('serve predict but pipeline type is not valid', async (t) => { 100 | const mockCb = sinon.stub(); 101 | await t.throwsAsync( 102 | serve(1234, undefined, mockCb), 103 | { message: 'Pipeline type is not supported: undefined' } 104 | ); 105 | t.false(mockCb.called); 106 | }); 107 | 108 | test('start and stop', async (t) => { 109 | const mockCb = sinon.stub(); 110 | await t.notThrowsAsync(serve(1234, PipelineType.TextClassification, mockCb)); 111 | await t.notThrowsAsync(stop()); 112 | }); 113 | -------------------------------------------------------------------------------- /packages/cli/src/utils/serve-predict.ts: -------------------------------------------------------------------------------- 1 | import * as express from 'express'; 2 | import { Express, Request, Response } from 'express'; 3 | import * as multer from 'multer'; 4 | import * as path from 'path'; 5 | import * as http from 'http'; 6 | import { PipelineType } from '@pipcook/costa'; 7 | 8 | let server: http.Server; 9 | const ServeMap = { 10 | [PipelineType.TextClassification]: serveText, 11 | [PipelineType.ImageClassification]: serveImage, 12 | [PipelineType.ObjectDetection]: serveImage 13 | }; 14 | 15 | export type PredictCallBack 16 | = (input: Buffer[] | string[]) => Promise[]>; 17 | 18 | /** 19 | * Serve model. 20 | * @param port listen port. 21 | * @param pipelineType pipeline type. 22 | * @param predictCallback callback for predict. 23 | */ 24 | export async function serve( 25 | port: number, 26 | pipelineType: PipelineType, 27 | predictCallback: PredictCallBack 28 | ): Promise { 29 | if (!ServeMap[pipelineType]) { 30 | throw new TypeError(`Pipeline type is not supported: ${pipelineType}`); 31 | } 32 | 33 | const app = express(); 34 | ServeMap[pipelineType](app, predictCallback); 35 | return new Promise((resolve) => { 36 | server = app.listen(port, () => { 37 | resolve(); 38 | }); 39 | }); 40 | } 41 | 42 | export async function stop(): Promise { 43 | if (server) { 44 | return new Promise((resolve, reject) => { 45 | server.close((err?: Error) => { 46 | server = undefined; 47 | if (err) { 48 | reject(err); 49 | } else { 50 | resolve(); 51 | } 52 | }); 53 | }); 54 | } 55 | } 56 | 57 | export async function predictText( 58 | predictCallback: PredictCallBack, 59 | req: Request, 60 | res: Response 61 | ): Promise { 62 | if (req.query && req.query['input']) { 63 | let inputs: string[]; 64 | if (Array.isArray(req.query['input'])) { 65 | inputs = req.query['input'] as string[]; 66 | } else if (typeof req.query['input'] === 'string') { 67 | inputs = [ req.query['input'] ]; 68 | } 69 | const result = await predictCallback(inputs); 70 | res.json({ success: true, data: result }); 71 | } else { 72 | res.json({ success: false, message: 'no input available' }); 73 | } 74 | } 75 | 76 | export function serveText( 77 | app: Express, 78 | predictCallback: PredictCallBack 79 | ): void { 80 | app.use(express.static(path.join(__dirname, '../../serve-resource/text'))) 81 | .get('/predict', predictText.bind(this, predictCallback)); 82 | } 83 | 84 | export async function predictImage( 85 | predictCallback: PredictCallBack, 86 | req: Request, 87 | res: Response 88 | ): Promise { 89 | let buf: Buffer[]; 90 | if (Array.isArray(req.files)) { 91 | buf = (req.files as Express.Multer.File[]).map((file) => file.buffer); 92 | } 93 | 94 | if (buf) { 95 | const result = await predictCallback(buf); 96 | res.json({ success: true, data: result }); 97 | } else { 98 | res.json({ success: false, message: 'no file available' }); 99 | } 100 | } 101 | 102 | export function serveImage( 103 | app: Express, 104 | predictCallback: PredictCallBack 105 | ): void { 106 | const upload = multer({ storage: multer.memoryStorage() }); 107 | app.use(express.static(path.join(__dirname, '../../serve-resource/image'))) 108 | .post('/predict', upload.array('image'), predictImage.bind(this, predictCallback)); 109 | } 110 | -------------------------------------------------------------------------------- /packages/cli/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "compilerOptions": { 4 | "resolveJsonModule": true, 5 | "composite": false, 6 | "declaration": true, 7 | "outDir": "./dist", 8 | "rootDir": "./src" 9 | }, 10 | "exclude": [ 11 | "**/*.test.ts", 12 | "dist" 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /packages/core/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules -------------------------------------------------------------------------------- /packages/core/.npmignore: -------------------------------------------------------------------------------- 1 | .* 2 | tsconfig.* 3 | src 4 | coverage 5 | *.tgz 6 | -------------------------------------------------------------------------------- /packages/core/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@pipcook/core", 3 | "version": "2.1.4", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "typescript": { 8 | "version": "4.3.5", 9 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.3.5.tgz", 10 | "integrity": "sha512-DqQgihaQ9cUrskJo9kIyW/+g0Vxsk8cDtZ52a3NGh0YNTfpUSArXSohyUGnvbPazEPLu398C0UxmKSOrPumUzA==", 11 | "dev": true 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /packages/core/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@pipcook/core", 3 | "version": "2.1.5", 4 | "main": "dist/index", 5 | "types": "dist/index", 6 | "files": [ 7 | "dist" 8 | ], 9 | "scripts": { 10 | "test": "ava -v", 11 | "build": "npm run clean && npm run compile", 12 | "clean": "((rm -rf dist tsconfig.tsbuildinfo) || (rmdir /Q /S dist tsconfig.tsbuildinfo)) || echo 'nothing to clean'", 13 | "compile": "tsc -b tsconfig.json", 14 | "cov": "nyc -r text-summary npm run test", 15 | "cov:report": "nyc report -r lcov" 16 | }, 17 | "author": "", 18 | "license": "Apache-2.0", 19 | "dependencies": { 20 | "@pipcook/datacook": "0.1.1", 21 | "papaparse": "^5.3.1" 22 | }, 23 | "devDependencies": { 24 | "@types/node": "^14.6.0", 25 | "@types/papaparse": "^5.2.6", 26 | "@types/sinon": "^9.0.11", 27 | "ava": "^3.13.0", 28 | "nyc": "^15.1.0", 29 | "sinon": "^10.0.0", 30 | "typescript": "^4.3.5" 31 | }, 32 | "publishConfig": { 33 | "access": "public" 34 | }, 35 | "keywords": [], 36 | "description": "", 37 | "ava": { 38 | "extensions": [ 39 | "ts" 40 | ], 41 | "require": [ 42 | "ts-node/register" 43 | ] 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /packages/core/src/artifact.ts: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * The type for artifact plugin export. As a pipcook artifact plugin, we need to export 4 | * two function named `initialize` and `build`. 5 | */ 6 | export interface ArtifactExports { 7 | /** 8 | * `initialize` is called before the pipeline starting, 9 | * plugin can do initialization here, something like environment checking, 10 | * login to the server, etc. The options are defined in the pipeline metadata, like: 11 | * ```json 12 | * { 13 | * artifacts:[{ 14 | * processor: 'server-uploader', 15 | * options: { 16 | * targetUrl: 'http://os.alibaba.com/pipcook/model/' 17 | * } 18 | * }] 19 | * } 20 | * ``` 21 | * @param options the options for the plugin 22 | */ 23 | initialize(options: Record): Promise; 24 | 25 | /** 26 | * After the model being trained successfully, the function `build` will 27 | * be called with the model directory and options. 28 | * @param options the options for the plugin 29 | */ 30 | build(modelDir: string, options: Record): Promise; 31 | } 32 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/format/coco.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { Types as DatasetTypes, ArrayDatasetPoolImpl } from '..'; 3 | 4 | export type Options = { 5 | trainAnnotationObj?: DataCook.Dataset.Types.Coco.Meta; 6 | testAnnotationObj?: DataCook.Dataset.Types.Coco.Meta; 7 | validAnnotationObj?: DataCook.Dataset.Types.Coco.Meta; 8 | predictedAnnotationObj?: DataCook.Dataset.Types.Coco.Meta; 9 | }; 10 | 11 | export const makeDatasetPoolFromCocoFormat = async ( 12 | options: Options 13 | ): Promise< 14 | DatasetTypes.DatasetPool< 15 | DataCook.Dataset.Types.Sample, 16 | DatasetTypes.Coco.DatasetMeta 17 | > 18 | > => { 19 | const train = options.trainAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.trainAnnotationObj) : undefined; 20 | const test = options.testAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.testAnnotationObj) : undefined; 21 | const valid = options.validAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.validAnnotationObj) : undefined; 22 | const predicted = options.predictedAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.predictedAnnotationObj) : undefined; 23 | 24 | const categories = options.trainAnnotationObj ? DataCook.Dataset.extractCategoriesFromCoco(options.trainAnnotationObj) : undefined; 25 | 26 | const datasetMeta: DatasetTypes.Coco.DatasetMeta = { 27 | type: DataCook.Dataset.Types.DatasetType.Image, 28 | size: { 29 | train: (await train?.nextBatch(-1))?.length || 0, 30 | test: (await test?.nextBatch(-1))?.length || 0, 31 | valid: (await valid?.nextBatch(-1))?.length || 0, 32 | predicted: (await predicted?.nextBatch(-1))?.length || 0 33 | }, 34 | categories, 35 | info: options.trainAnnotationObj?.info, 36 | licenses: options.trainAnnotationObj?.licenses 37 | }; 38 | await Promise.all([ 39 | train?.seek(0), 40 | test?.seek(0), 41 | valid?.seek(0), 42 | predicted?.seek(0) 43 | ]); 44 | return ArrayDatasetPoolImpl.from({ 45 | train, 46 | test, 47 | valid, 48 | predicted 49 | }, datasetMeta); 50 | }; 51 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/format/csv.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as DataCook from '@pipcook/datacook'; 3 | import { Types } from '../'; 4 | import { makeDatasetPoolFromCsv } from './csv'; 5 | import Sample = DataCook.Dataset.Types.Sample; 6 | 7 | const csvDataWithHead = 'A,B,C\n1,2,3\n4,5,6\n7,8,9'; 8 | const csvDataWithoutHead = '1,2,3\n4,5,6\n7,8,9'; 9 | 10 | const sample1: Sample = { 11 | data: { 12 | A: '1', 13 | B: '2' 14 | }, 15 | label: { 16 | C: '3' 17 | } 18 | }; 19 | 20 | const sample2: Sample = { 21 | data: { 22 | A: '4', 23 | B: '5' 24 | }, 25 | label: { 26 | C: '6' 27 | } 28 | }; 29 | 30 | const sample3: Sample = { 31 | data: { 32 | A: '7', 33 | B: '8' 34 | }, 35 | label: { 36 | C: '9' 37 | } 38 | }; 39 | 40 | const sampleNoHead1: Sample = { 41 | data: { 42 | '0': '1', 43 | '1': '2' 44 | }, 45 | label: { 46 | '2': '3' 47 | } 48 | }; 49 | 50 | const sampleNoHead2: Sample = { 51 | data: { 52 | '0': '4', 53 | '1': '5' 54 | }, 55 | label: { 56 | '2': '6' 57 | } 58 | }; 59 | 60 | const sampleNoHead3: Sample = { 61 | data: { 62 | '0': '7', 63 | '1': '8' 64 | }, 65 | label: { 66 | '2': '9' 67 | } 68 | }; 69 | 70 | test('should make a dataset from csv', async (t) => { 71 | const dataset = makeDatasetPoolFromCsv({ 72 | trainData: csvDataWithHead, 73 | testData: csvDataWithHead, 74 | validData: undefined, 75 | hasHeader: true, 76 | labels: [ 'C' ] 77 | }); 78 | 79 | const metadata: Types.Csv.DatasetMeta = { 80 | type: DataCook.Dataset.Types.DatasetType.Table, 81 | size: { train: 3, test: 3, valid: 0, predicted: 0 } 82 | }; 83 | 84 | t.deepEqual(await dataset.getDatasetMeta(), metadata); 85 | t.deepEqual(await dataset.train?.next(), sample1); 86 | t.deepEqual(await dataset.test?.next(), sample1); 87 | t.deepEqual(await dataset.train?.nextBatch(2), [ sample2, sample3 ]); 88 | t.deepEqual(await dataset.train?.nextBatch(1), []); 89 | t.deepEqual(await dataset.test?.nextBatch(1), [ sample2 ]); 90 | }); 91 | test('should make a dataset from csv with valid', async (t) => { 92 | const dataset = makeDatasetPoolFromCsv({ 93 | trainData: csvDataWithHead, 94 | testData: csvDataWithHead, 95 | validData: csvDataWithHead, 96 | hasHeader: true, 97 | labels: [ 'C' ] 98 | }); 99 | 100 | const metadata: Types.Csv.DatasetMeta = { 101 | type: DataCook.Dataset.Types.DatasetType.Table, 102 | size: { train: 3, test: 3, valid: 3, predicted: 0 } 103 | }; 104 | 105 | t.deepEqual(await dataset.getDatasetMeta(), metadata); 106 | t.deepEqual(await dataset.train?.next(), sample1); 107 | t.deepEqual(await dataset.test?.next(), sample1); 108 | t.deepEqual(await dataset.valid?.next(), sample1); 109 | t.deepEqual(await dataset.train?.nextBatch(2), [ sample2, sample3 ]); 110 | t.deepEqual(await dataset.test?.nextBatch(1), [ sample2 ]); 111 | t.deepEqual(await dataset.valid?.nextBatch(1), [ sample2 ]); 112 | }); 113 | 114 | test('should make a dataset from csv without head', async (t) => { 115 | const dataset = makeDatasetPoolFromCsv({ 116 | trainData: csvDataWithoutHead, 117 | testData: csvDataWithoutHead, 118 | validData: csvDataWithoutHead, 119 | hasHeader: false, 120 | labels: [ '2' ] 121 | }); 122 | 123 | const metadata: Types.Csv.DatasetMeta = { 124 | type: DataCook.Dataset.Types.DatasetType.Table, 125 | size: { train: 3, test: 3, valid: 3, predicted: 0 } 126 | }; 127 | 128 | t.deepEqual(await dataset.getDatasetMeta(), metadata); 129 | t.deepEqual(await dataset.train?.next(), sampleNoHead1); 130 | t.deepEqual(await dataset.test?.next(), sampleNoHead1); 131 | t.deepEqual(await dataset.valid?.next(), sampleNoHead1); 132 | t.deepEqual(await dataset.train?.nextBatch(2), [ sampleNoHead2, sampleNoHead3 ]); 133 | t.deepEqual(await dataset.test?.nextBatch(1), [ sampleNoHead2 ]); 134 | t.deepEqual(await dataset.valid?.nextBatch(1), [ sampleNoHead2 ]); 135 | }); 136 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/format/csv.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import * as Papaparse from 'papaparse'; 3 | import { ArrayDatasetPoolImpl, Types } from '..'; 4 | import Csv = DataCook.Dataset.Types.Csv; 5 | 6 | export interface Options { 7 | trainData?: string; 8 | testData?: string; 9 | validData?: string; 10 | predictedData?: string; 11 | hasHeader: boolean; 12 | delimiter?: string; 13 | labels?: string[]; 14 | } 15 | 16 | function toSamples( 17 | parsedData: Papaparse.ParseResult>, 18 | labelFields?: Array 19 | ): Array { 20 | return parsedData.data.map((data) => { 21 | const label: Record = {}; 22 | const newData = { ...data }; 23 | labelFields?.forEach((field) => { 24 | label[field] = newData[field]; 25 | delete newData[field]; 26 | }); 27 | return { 28 | data: newData, 29 | label 30 | }; 31 | }); 32 | } 33 | 34 | export const makeDatasetPoolFromCsv = (options: Options): Types.DatasetPool => { 35 | const config = { 36 | header: options.hasHeader, delimiter: options.delimiter 37 | }; 38 | const parsedTrainData = options.trainData ? Papaparse.parse>(options.trainData, config) : undefined; 39 | const parsedTestData = options.testData ? Papaparse.parse>(options.testData, config) : undefined; 40 | const parsedValidData = options.validData ? Papaparse.parse>(options.validData, config) : undefined; 41 | const parsedPredictedData = options.predictedData ? Papaparse.parse>(options.predictedData, config) : undefined; 42 | const data = { 43 | trainData: parsedTrainData ? toSamples(parsedTrainData, options.labels) : undefined, 44 | testData: parsedTestData ? toSamples(parsedTestData, options.labels) : undefined, 45 | validData: parsedValidData ? toSamples(parsedValidData, options.labels) : undefined, 46 | predictedData: parsedPredictedData ? toSamples(parsedPredictedData, options.labels) : undefined 47 | }; 48 | const meta: Types.Csv.DatasetMeta = { 49 | type: DataCook.Dataset.Types.DatasetType.Table, 50 | size: { 51 | train: data.trainData?.length || 0, 52 | test: data.testData?.length || 0, 53 | valid: data.validData?.length || 0, 54 | predicted: data.predictedData?.length || 0 55 | } 56 | }; 57 | return ArrayDatasetPoolImpl.from(data, meta); 58 | }; 59 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/format/index.ts: -------------------------------------------------------------------------------- 1 | export * as PascalVoc from './pascal-voc'; 2 | export * as Coco from './coco'; 3 | export * as Csv from './csv'; 4 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/format/pascal-voc.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { ArrayDatasetPoolImpl, Types } from '../'; 3 | import DatasetType = DataCook.Dataset.Types.DatasetType; 4 | import PascalVoc = DataCook.Dataset.Types.PascalVoc; 5 | 6 | export interface Options { 7 | trainAnnotationList?: Array; 8 | testAnnotationList?: Array; 9 | validAnnotationList?: Array; 10 | predictedAnnotationList?: Array; 11 | } 12 | 13 | export const makeDatasetPoolFromPascalVoc = async (options: Options): Promise> => { 14 | const train = options.trainAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.trainAnnotationList) : undefined; 15 | const test = options.testAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.testAnnotationList) : undefined; 16 | const valid = options.validAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.validAnnotationList) : undefined; 17 | const predicted = options.predictedAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.predictedAnnotationList) : undefined; 18 | const categories: Array = options.trainAnnotationList ? DataCook.Dataset.extractCategoriesFromPascalVoc(options.trainAnnotationList) : []; 19 | 20 | const datasetMeta: Types.PascalVoc.DatasetMeta = { 21 | type: DatasetType.Image, 22 | size: { 23 | train: (await train?.nextBatch(-1))?.length || 0, 24 | test: (await test?.nextBatch(-1))?.length || 0, 25 | valid: (await valid?.nextBatch(-1))?.length || 0, 26 | predicted: (await predicted?.nextBatch(-1))?.length || 0 27 | }, 28 | categories 29 | }; 30 | await Promise.all([ 31 | train?.seek(0), 32 | test?.seek(0), 33 | valid?.seek(0), 34 | predicted?.seek(0) 35 | ]); 36 | return ArrayDatasetPoolImpl.from({ 37 | train, 38 | test, 39 | valid, 40 | predicted 41 | }, datasetMeta); 42 | }; 43 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/index.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import * as Types from './types'; 3 | 4 | import Dataset = DataCook.Dataset.Types.Dataset; 5 | import Sample = DataCook.Dataset.Types.Sample; 6 | import ArrayDatasetImpl = DataCook.Dataset.ArrayDatasetImpl; 7 | 8 | export * from './pipeline-type'; 9 | export * from './format'; 10 | export * as Types from './types'; 11 | 12 | function isDatasetData(arg: Types.DatasetGroup | Types.DatasetData): arg is Types.DatasetGroup { 13 | return (arg as any).train || (arg as any).test || (arg as any).predicted || (arg as any).valid; 14 | } 15 | function isTransformOption< 16 | T extends Sample, 17 | D extends Types.DatasetMeta, 18 | TARGET_SAMPLE extends Sample, 19 | TARGET_META extends Types.DatasetMeta 20 | >(arg: Types.TransformOption | ((sample: T) => Promise)): arg is Types.TransformOption { 21 | return typeof arg !== 'function'; 22 | } 23 | 24 | export class ArrayDatasetPoolImpl implements Types.DatasetPool { 25 | public meta?: D; 26 | 27 | public train?: Dataset; 28 | public test?: Dataset; 29 | public valid?: Dataset; 30 | public predicted?: Dataset; 31 | 32 | static from(datasetGrp: Types.DatasetGroup, datasetMeta?: META): ArrayDatasetPoolImpl; 33 | static from(datasetData: Types.DatasetData, datasetMeta?: META): ArrayDatasetPoolImpl; 34 | static from(datasetDataOrGrp: Types.DatasetGroup | Types.DatasetData, datasetMeta?: META): ArrayDatasetPoolImpl { 35 | const obj = new ArrayDatasetPoolImpl(); 36 | obj.meta = datasetMeta; 37 | if (isDatasetData(datasetDataOrGrp)) { 38 | const datasetGrp = datasetDataOrGrp; 39 | obj.train = datasetGrp.train; 40 | obj.test = datasetGrp.test; 41 | obj.valid = datasetGrp.valid; 42 | obj.predicted = datasetGrp.predicted; 43 | } else { 44 | const datasetData = datasetDataOrGrp; 45 | obj.train = datasetData.trainData ? new ArrayDatasetImpl(datasetData.trainData) : undefined; 46 | obj.test = datasetData.testData ? new ArrayDatasetImpl(datasetData.testData) : undefined; 47 | obj.valid = datasetData.validData ? new ArrayDatasetImpl(datasetData.validData) : undefined; 48 | obj.predicted = datasetData.predictedData ? new ArrayDatasetImpl(datasetData.predictedData) : undefined; 49 | } 50 | return obj; 51 | } 52 | 53 | async getDatasetMeta(): Promise { 54 | return this.meta; 55 | } 56 | 57 | shuffle(): void { 58 | this.train?.shuffle(); 59 | this.test?.shuffle(); 60 | this.valid?.shuffle(); 61 | this.predicted?.shuffle(); 62 | } 63 | 64 | transform< 65 | TARGET_SAMPLE extends Sample 66 | > (transformFun: (sample: T) => Promise): Types.DatasetPool; 67 | transform< 68 | TARGET_SAMPLE extends Sample, 69 | TARGET_META extends Types.DatasetMeta = D 70 | > (opts: Types.TransformOption): Types.DatasetPool; 71 | transform< 72 | TARGET_SAMPLE extends Sample, 73 | TARGET_META extends Types.DatasetMeta = D 74 | > (optsOrFun: Types.TransformOption | ((sample: T) => Promise)): Types.DatasetPool { 75 | if (isTransformOption(optsOrFun)) { 76 | const { metadata, transform } = optsOrFun; 77 | const newDatasetPool = ArrayDatasetPoolImpl.from({ 78 | train: this.train ? DataCook.Dataset.makeTransform(this.train, transform) : undefined, 79 | test: this.test ? DataCook.Dataset.makeTransform(this.test, transform) : undefined, 80 | valid: this.valid ? DataCook.Dataset.makeTransform(this.valid, transform) : undefined, 81 | predicted: this.predicted ? DataCook.Dataset.makeTransform(this.predicted, transform) : undefined 82 | }); 83 | const metaPromise = this.getDatasetMeta(); 84 | newDatasetPool.getDatasetMeta = async () => { 85 | return metadata(await metaPromise); 86 | }; 87 | return newDatasetPool; 88 | } else { 89 | return ArrayDatasetPoolImpl.from({ 90 | train: this.train ? DataCook.Dataset.makeTransform(this.train, optsOrFun) : undefined, 91 | test: this.test ? DataCook.Dataset.makeTransform(this.test, optsOrFun) : undefined, 92 | valid: this.valid ? DataCook.Dataset.makeTransform(this.valid, optsOrFun) : undefined, 93 | predicted: this.predicted ? DataCook.Dataset.makeTransform(this.predicted, optsOrFun) : undefined 94 | }, this.meta); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/pipeline-type/image-classification.test.ts: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import * as DataCook from '@pipcook/datacook'; 3 | import { makeImageClassificationDatasetFromList } from './image-classification'; 4 | 5 | test('make dataset pool from train and test list', async (t) => { 6 | const buffer1 = new ArrayBuffer(1); 7 | const buffer2 = new ArrayBuffer(2); 8 | const opts = { 9 | train: [ 10 | { 11 | category: 'a', 12 | uri: 'uri mock' 13 | }, 14 | { 15 | category: 'b', 16 | buffer: buffer1 17 | } 18 | ], 19 | test: [ 20 | { 21 | category: 'b', 22 | uri: 'test uri mock' 23 | }, 24 | { 25 | category: 'a', 26 | buffer: buffer2 27 | } 28 | ], 29 | valid: undefined, 30 | predicted: undefined 31 | }; 32 | const dataset = makeImageClassificationDatasetFromList(opts); 33 | t.deepEqual(await dataset.getDatasetMeta(), { 34 | type: DataCook.Dataset.Types.DatasetType.Image, 35 | size: { train: 2, test: 2, valid: 0, predicted: 0 }, 36 | categories: [ 'a', 'b' ] 37 | }); 38 | t.truthy(dataset.train); 39 | t.truthy(dataset.test); 40 | t.falsy(dataset.valid); 41 | t.falsy(dataset.predicted); 42 | const sample1 = await dataset.train?.next(); 43 | const sample2 = await dataset.train?.next(); 44 | const sample3 = await dataset.train?.next(); 45 | t.deepEqual(sample1, { 46 | data: { uri: 'uri mock', buffer: undefined }, 47 | label: 'a' 48 | }); 49 | t.deepEqual(sample2, { 50 | data: { buffer: buffer1, uri: undefined }, 51 | label: 'b' 52 | }); 53 | t.is(sample3, undefined); 54 | }); 55 | 56 | test('make dataset pool from valid and predict list', async (t) => { 57 | const buffer1 = new ArrayBuffer(1); 58 | const buffer2 = new ArrayBuffer(2); 59 | const opts = { 60 | valid: [ 61 | { 62 | category: 'a', 63 | uri: 'uri mock' 64 | }, 65 | { 66 | category: 'b', 67 | buffer: buffer1 68 | } 69 | ], 70 | predicted: [ 71 | { 72 | category: 'b', 73 | uri: 'test uri mock' 74 | }, 75 | { 76 | category: 'a', 77 | buffer: buffer2 78 | } 79 | ], 80 | train: undefined, 81 | test: undefined 82 | }; 83 | const dataset = makeImageClassificationDatasetFromList(opts); 84 | t.deepEqual(await dataset.getDatasetMeta(), { 85 | type: DataCook.Dataset.Types.DatasetType.Image, 86 | size: { train: 0, test: 0, valid: 2, predicted: 2 }, 87 | categories: undefined 88 | }); 89 | t.falsy(dataset.train); 90 | t.falsy(dataset.test); 91 | t.truthy(dataset.valid); 92 | t.truthy(dataset.predicted); 93 | const sample1 = await dataset.valid?.next(); 94 | const sample2 = await dataset.valid?.next(); 95 | const sample3 = await dataset.valid?.next(); 96 | t.deepEqual(sample1, { 97 | data: { uri: 'uri mock', buffer: undefined }, 98 | label: 'a' 99 | }); 100 | t.deepEqual(sample2, { 101 | data: { buffer: buffer1, uri: undefined }, 102 | label: 'b' 103 | }); 104 | t.is(sample3, undefined); 105 | }); 106 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/pipeline-type/image-classification.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { Types, ArrayDatasetPoolImpl } from '../'; 3 | 4 | import ImageClassification = DataCook.Dataset.Types.ImageClassification; 5 | import Sample = ImageClassification.Sample; 6 | import DatasetMate = Types.ImageClassification.DatasetMeta; 7 | 8 | export const makeImageClassificationDatasetFromList = (opts: Types.ImageClassification.Options): Types.ImageClassification.DatasetPool => { 9 | const categories: Set = new Set(); 10 | if (opts.train) { 11 | for (const data of opts.train) { 12 | categories.add(data.category); 13 | } 14 | } 15 | const meta: Types.ImageClassification.DatasetMeta = { 16 | type: DataCook.Dataset.Types.DatasetType.Image, 17 | size: { 18 | train: opts.train ? opts.train.length : 0, 19 | test: opts.test ? opts.test.length : 0, 20 | valid: opts.valid ? opts.valid.length : 0, 21 | predicted: opts.predicted ? opts.predicted.length : 0 22 | }, 23 | categories: categories.size > 0 ? Array.from(categories) : undefined 24 | }; 25 | return ArrayDatasetPoolImpl.from({ 26 | train: opts.train ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.train) : undefined, 27 | test: opts.test ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.test) : undefined, 28 | valid: opts.valid ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.valid) : undefined, 29 | predicted: opts.predicted ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.predicted) : undefined 30 | }, meta); 31 | }; 32 | 33 | 34 | export const makeImageClassificationDataset = ( 35 | datasetData: Types.DatasetData, 36 | meta: DatasetMate 37 | ): Types.ImageClassification.DatasetPool => { 38 | return ArrayDatasetPoolImpl.from(datasetData, meta); 39 | }; 40 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/pipeline-type/index.ts: -------------------------------------------------------------------------------- 1 | export * from './object-detection'; 2 | export * from './image-classification'; 3 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/pipeline-type/object-detection.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { Coco as CocoDataset, PascalVoc as PascalVocDataset } from '../format'; 3 | import { ArrayDatasetPoolImpl, Types } from '../'; 4 | 5 | import Sample = DataCook.Dataset.Types.Sample; 6 | import Coco = DataCook.Dataset.Types.Coco; 7 | import PascalVoc = DataCook.Dataset.Types.PascalVoc; 8 | import ObjectDetection = DataCook.Dataset.Types.ObjectDetection; 9 | 10 | export const makeObjectDetectionDatasetFromCoco = async (options: CocoDataset.Options): Promise => { 11 | const dataset = await CocoDataset.makeDatasetPoolFromCocoFormat(options); 12 | const categoryFiner: Record = {}; 13 | const categorySet = new Set(); 14 | (await dataset.getDatasetMeta())?.categories?.forEach((item) => { 15 | categoryFiner[item.id] = item; 16 | categorySet.add(item.name); 17 | }); 18 | const categories = Array.from(categorySet); 19 | return dataset.transform({ 20 | transform: async (sample: Sample): Promise => { 21 | const newLabels = sample.label.map((lable) => { 22 | return { 23 | name: categoryFiner[lable.category_id].name, 24 | bbox: lable.bbox 25 | }; 26 | }); 27 | return { 28 | data: { uri: sample.data.url || sample.data.coco_url || sample.data.flickr_url }, 29 | label: newLabels 30 | }; 31 | }, 32 | metadata: async (meta: Types.Coco.DatasetMeta): Promise => { 33 | return { 34 | type: meta.type, 35 | size: meta.size, 36 | categories 37 | }; 38 | } 39 | }); 40 | }; 41 | 42 | export const makeObjectDetectionDatasetFromPascalVoc = async (options: PascalVocDataset.Options): Promise => { 43 | return (await PascalVocDataset.makeDatasetPoolFromPascalVoc(options)).transform( 44 | async (sample: PascalVoc.Sample): Promise => { 45 | const newLabels: ObjectDetection.Label = sample.label.map((lable) => { 46 | return { 47 | name: lable.name, 48 | bbox: [ 49 | lable.bndbox.xmin, 50 | lable.bndbox.ymin, 51 | lable.bndbox.xmax - lable.bndbox.xmin, 52 | lable.bndbox.ymax - lable.bndbox.ymin 53 | ] 54 | }; 55 | }); 56 | return { 57 | data: { uri: sample.data.annotation.path }, 58 | label: newLabels 59 | }; 60 | } 61 | ); 62 | }; 63 | 64 | export const makeObjectDetectionDataset = ( 65 | datasetData: Types.DatasetData, 66 | meta: Types.ObjectDetection.DatasetMeta 67 | ): Types.ObjectDetection.DatasetPool => { 68 | return ArrayDatasetPoolImpl.from(datasetData, meta); 69 | }; 70 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/pipeline-type/text-classification.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { Types, ArrayDatasetPoolImpl } from '..'; 3 | 4 | import TextClassification = DataCook.Dataset.Types.TextClassification; 5 | import Sample = TextClassification.Sample; 6 | import DatasetMate = Types.TextClassification.DatasetMeta; 7 | 8 | export const makeTextClassificationDatasetFromList = (opts: Types.TextClassification.Options): Types.TextClassification.DatasetPool => { 9 | const categories: Set = new Set(); 10 | if (opts.train) { 11 | for (const data of opts.train) { 12 | categories.add(data.category); 13 | } 14 | } 15 | const meta: Types.TextClassification.DatasetMeta = { 16 | type: DataCook.Dataset.Types.DatasetType.Table, 17 | size: { 18 | train: opts.train ? opts.train.length : 0, 19 | test: opts.test ? opts.test.length : 0, 20 | valid: opts.valid ? opts.valid.length : 0, 21 | predicted: opts.predicted ? opts.predicted.length : 0 22 | }, 23 | categories: categories.size > 0 ? Array.from(categories) : undefined 24 | }; 25 | return ArrayDatasetPoolImpl.from({ 26 | train: opts.train ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.train) : undefined, 27 | test: opts.test ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.test) : undefined, 28 | valid: opts.valid ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.valid) : undefined, 29 | predicted: opts.predicted ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.predicted) : undefined 30 | }, meta); 31 | }; 32 | 33 | 34 | export const makeTextClassificationDataset = ( 35 | datasetData: Types.DatasetData, 36 | meta: DatasetMate 37 | ): Types.TextClassification.DatasetPool => { 38 | return ArrayDatasetPoolImpl.from(datasetData, meta); 39 | }; 40 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/format/coco.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { DatasetSize, DatasetMeta as BaseDatasetMeta } from '../'; 3 | 4 | import DatasetType = DataCook.Dataset.Types.DatasetType; 5 | import Category = DataCook.Dataset.Types.Coco.Category; 6 | import Info = DataCook.Dataset.Types.Coco.Info; 7 | import License = DataCook.Dataset.Types.Coco.License; 8 | 9 | export interface DatasetMeta extends BaseDatasetMeta { 10 | type: DatasetType.Image, 11 | size: DatasetSize, 12 | categories?: Array, 13 | info?: Info; 14 | licenses?: Array; 15 | } 16 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/format/csv.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { DatasetMeta as BaseDatasetMeta } from '../'; 3 | 4 | import DatasetType = DataCook.Dataset.Types.DatasetType; 5 | 6 | export interface DatasetMeta extends BaseDatasetMeta { 7 | type: DatasetType.Table 8 | } 9 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/format/index.ts: -------------------------------------------------------------------------------- 1 | export * as Coco from './coco'; 2 | export * as Csv from './csv'; 3 | export * as PascalVoc from './pascal-voc'; 4 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/format/pascal-voc.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { ClassificationDatasetMeta } from '../'; 3 | 4 | import DatasetType = DataCook.Dataset.Types.DatasetType; 5 | 6 | export interface DatasetMeta extends ClassificationDatasetMeta { 7 | type: DatasetType.Image 8 | } 9 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/index.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | 3 | import Dataset = DataCook.Dataset.Types.Dataset; 4 | import Sample = DataCook.Dataset.Types.Sample; 5 | import DatasetType = DataCook.Dataset.Types.DatasetType; 6 | import ImageDimension = DataCook.Dataset.Types.ImageDimension; 7 | import TableSchema = DataCook.Dataset.Types.TableSchema; 8 | 9 | export * from './format'; 10 | export * from './pipeline-type'; 11 | 12 | /** 13 | * size of data source 14 | */ 15 | export interface DatasetSize { 16 | train: number; 17 | test: number; 18 | valid: number; 19 | predicted: number; 20 | } 21 | 22 | /** 23 | * data source api 24 | */ 25 | export interface DatasetPool { 26 | getDatasetMeta: () => Promise; 27 | test?: Dataset; 28 | train?: Dataset; 29 | valid?: Dataset; 30 | predicted?: Dataset; 31 | shuffle: (seed?: string) => void; 32 | transform (transformFun: (sample: T) => Promise): DatasetPool; 33 | transform (opts: TransformOption): DatasetPool; 34 | } 35 | 36 | export interface TransformOption< 37 | IN_SAMPLE extends Sample, 38 | IN_META extends DatasetMeta, 39 | OUT_SAMPLE extends Sample = IN_SAMPLE, 40 | OUT_META extends DatasetMeta = IN_META 41 | > { 42 | transform: (sample: IN_SAMPLE) => Promise, 43 | metadata: (meta?: IN_META) => Promise 44 | } 45 | 46 | export interface DatasetData { 47 | trainData?: Array, 48 | testData?: Array, 49 | validData?: Array, 50 | predictedData?: Array 51 | } 52 | 53 | export interface DatasetMeta { 54 | type: DatasetType; 55 | size?: DatasetSize; 56 | } 57 | 58 | export interface ClassificationDatasetMeta extends DatasetMeta { 59 | categories?: Array; 60 | } 61 | 62 | export interface ObjectDetectionDatasetMeta extends DatasetMeta { 63 | categories?: Array; 64 | } 65 | 66 | /** 67 | * image data source metadata 68 | */ 69 | export interface ImageDatasetMeta extends DatasetMeta { 70 | dimension: ImageDimension; 71 | } 72 | 73 | /** 74 | * table data source metadata 75 | */ 76 | export interface TableDatasetMeta extends DatasetMeta { 77 | tableSchema: TableSchema; 78 | dataKeys: Array | null; 79 | } 80 | 81 | export interface DatasetGroup { 82 | train?: Dataset, 83 | test?: Dataset, 84 | valid?: Dataset, 85 | predicted?: Dataset 86 | } 87 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/pipeline-type/image-classification.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { DatasetPool as BaseDatasetPool, ClassificationDatasetMeta } from '../'; 3 | 4 | import DatasetType = DataCook.Dataset.Types.DatasetType; 5 | import BaseDataset = DataCook.Dataset.Types.Dataset; 6 | 7 | export interface Options { 8 | train?: DataCook.Dataset.Types.ImageClassification.ImageList; 9 | test?: DataCook.Dataset.Types.ImageClassification.ImageList; 10 | valid?: DataCook.Dataset.Types.ImageClassification.ImageList; 11 | predicted?: DataCook.Dataset.Types.ImageClassification.ImageList; 12 | } 13 | 14 | export type Sample = DataCook.Dataset.Types.ImageClassification.Sample; 15 | 16 | export interface DatasetMeta extends ClassificationDatasetMeta { 17 | type: DatasetType.Image; 18 | } 19 | 20 | export type Dataset = BaseDataset; 21 | 22 | export type DatasetPool = BaseDatasetPool; 23 | 24 | export interface SinglePredictResult { 25 | id: number; 26 | category: string; 27 | score: number; 28 | } 29 | 30 | export type PredictResult = Array; 31 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/pipeline-type/index.ts: -------------------------------------------------------------------------------- 1 | export * as ObjectDetection from './object-detection'; 2 | export * as ImageClassification from './image-classification'; 3 | export * as TextClassification from './text-classification'; 4 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/pipeline-type/object-detection.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { DatasetPool as BaseDatasetPool, ClassificationDatasetMeta } from '../'; 3 | 4 | import DatasetType = DataCook.Dataset.Types.DatasetType; 5 | import BaseDataset = DataCook.Dataset.Types.Dataset; 6 | 7 | export type Sample = DataCook.Dataset.Types.ObjectDetection.Sample; 8 | 9 | export interface DatasetMeta extends ClassificationDatasetMeta { 10 | type: DatasetType.Image 11 | } 12 | 13 | export type Dataset = BaseDataset; 14 | 15 | export type DatasetPool = BaseDatasetPool; 16 | 17 | export interface PredictObject { 18 | id: number; 19 | category: string; 20 | score: number; 21 | box: DataCook.Dataset.Types.ObjectDetection.Bbox; 22 | } 23 | 24 | export type SinglePredictResult = Array; 25 | 26 | export type PredictResult = Array; 27 | -------------------------------------------------------------------------------- /packages/core/src/dataset-pool/types/pipeline-type/text-classification.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { DatasetPool as BaseDatasetPool, ClassificationDatasetMeta } from '..'; 3 | 4 | import DatasetType = DataCook.Dataset.Types.DatasetType; 5 | import BaseDataset = DataCook.Dataset.Types.Dataset; 6 | 7 | export interface Options { 8 | train?: DataCook.Dataset.Types.TextClassification.TextList; 9 | test?: DataCook.Dataset.Types.TextClassification.TextList; 10 | valid?: DataCook.Dataset.Types.TextClassification.TextList; 11 | predicted?: DataCook.Dataset.Types.TextClassification.TextList; 12 | } 13 | 14 | export type Sample = DataCook.Dataset.Types.TextClassification.Sample; 15 | 16 | export interface DatasetMeta extends ClassificationDatasetMeta { 17 | type: DatasetType.Table; 18 | } 19 | 20 | export type Dataset = BaseDataset; 21 | 22 | export type DatasetPool = BaseDatasetPool; 23 | 24 | export interface SinglePredictResult { 25 | id: number; 26 | category: string; 27 | score: number; 28 | } 29 | 30 | export type PredictResult = Array; 31 | -------------------------------------------------------------------------------- /packages/core/src/index.ts: -------------------------------------------------------------------------------- 1 | export * as DataCook from '@pipcook/datacook'; 2 | export * from './runtime'; 3 | export * from './artifact'; 4 | export * as DatasetPool from './dataset-pool'; 5 | -------------------------------------------------------------------------------- /packages/core/src/runtime.ts: -------------------------------------------------------------------------------- 1 | import * as DataCook from '@pipcook/datacook'; 2 | import { Types } from './dataset-pool'; 3 | 4 | /** 5 | * The model script can emit the training progress through the API `Runtime.notifyProgress`. 6 | */ 7 | export interface ProgressInfo { 8 | /** 9 | * The training progress percentage, it should be [0, 100]. 10 | */ 11 | value: number; 12 | /** 13 | * Custom data. 14 | */ 15 | extendData: Record; 16 | } 17 | 18 | /** 19 | * A Runtime is used to run pipelines on a specific platform. The interface `Runtime` 20 | * declares APIs which the runtime implementation must or shall achieve. 21 | */ 22 | export interface Runtime, M extends Types.DatasetMeta> { 23 | // report progress of pipeline 24 | notifyProgress: (progress: ProgressInfo) => void; 25 | // save the model file 26 | saveModel: (localPathOrStream: string | NodeJS.ReadableStream, filename?: string) => Promise; 27 | // read model file 28 | readModel: () => Promise; 29 | // datasource 30 | dataset: Types.DatasetPool; 31 | } 32 | 33 | export type FrameworkModule = any; 34 | 35 | /** 36 | * There ara 2 kinds of pipeline task type, `TaskType.TRAIN` means running for model training, 37 | * `TaskType.PREDICT` means running for predicting. 38 | */ 39 | export enum TaskType { TRAIN = 1, PREDICT = 2 } 40 | 41 | /** 42 | * The context of script running. 43 | */ 44 | export interface ScriptContext { 45 | /** 46 | * The workspace for the pipeline. There are some directories to save temporary files. 47 | */ 48 | workspace: { 49 | /** 50 | * Dataset directory, should save the dataset files here. 51 | */ 52 | dataDir: string; 53 | /** 54 | * Cache directory, every sample passed to the model script will be cached into the cache directory, 55 | * so, the dataflow scripts will not be executed again after the fisrt epoch. 56 | */ 57 | cacheDir: string; 58 | /** 59 | * The model file should be saved here. 60 | */ 61 | modelDir: string; 62 | 63 | /** 64 | * framework directory 65 | */ 66 | frameworkDir: string; 67 | }, 68 | taskType: TaskType; 69 | } 70 | 71 | export type PredictResult = Types.ObjectDetection.PredictResult | Types.TextClassification.PredictResult | Types.ImageClassification.PredictResult | any; 72 | 73 | /** 74 | * type of data source script entry 75 | */ 76 | export type DatasourceEntry, META extends Types.DatasetMeta> = 77 | (options: Record, context: ScriptContext) => Promise>; 78 | 79 | /** 80 | * type of data flow script entry 81 | */ 82 | export type DataflowEntry< 83 | IN extends DataCook.Dataset.Types.Sample, 84 | IN_META extends Types.DatasetMeta, 85 | OUT extends DataCook.Dataset.Types.Sample = IN, 86 | OUT_META extends Types.DatasetMeta = IN_META 87 | > = 88 | (api: Types.DatasetPool, options: Record, context: ScriptContext) => Promise>; 89 | 90 | /** 91 | * type of model script entry for train 92 | */ 93 | export type ModelEntry, META extends Types.DatasetMeta> = 94 | (api: Runtime, options: Record, context: ScriptContext) => Promise; 95 | 96 | /** 97 | * type of model script entry for predict 98 | */ 99 | export type PredictEntry, META extends Types.DatasetMeta> = 100 | (api: Runtime, options: Record, context: ScriptContext) => Promise; 101 | 102 | /** 103 | * type of model script entry for train and predict 104 | */ 105 | export interface ExtModelEntry, META extends Types.DatasetMeta> { 106 | train: ModelEntry; 107 | predict: PredictEntry; 108 | } 109 | -------------------------------------------------------------------------------- /packages/core/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json.schemastore.org/tsconfig", 3 | "compilerOptions": { 4 | "outDir": "dist", 5 | "rootDir": "src", 6 | 7 | "emitDecoratorMetadata": true, 8 | "experimentalDecorators": true, 9 | "noImplicitAny": true, 10 | "strictNullChecks": true, 11 | "resolveJsonModule": true, 12 | "skipLibCheck": true, 13 | 14 | "incremental": true, 15 | 16 | "lib": [ "es2018", "esnext.asynciterable" ], 17 | "module": "commonjs", 18 | "esModuleInterop": false, 19 | "moduleResolution": "node", 20 | "target": "es2017", 21 | "sourceMap": true, 22 | "declaration": true 23 | }, 24 | "include": ["src"], 25 | "exclude": [ 26 | "**/node_modules/**", 27 | "**/*.test.ts", 28 | "**/dist/**", 29 | "**/*.test.ts", 30 | "**/*.d.ts" 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /packages/costa/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | .debug 4 | .tests 5 | -------------------------------------------------------------------------------- /packages/costa/.npmignore: -------------------------------------------------------------------------------- 1 | .* 2 | tsconfig.* 3 | src 4 | *.tgz 5 | benchmark 6 | coverage 7 | -------------------------------------------------------------------------------- /packages/costa/.nycrc: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "src/**/*.ts" 4 | ], 5 | "all": true, 6 | "instrument": true 7 | } 8 | -------------------------------------------------------------------------------- /packages/costa/benchmark/bootstrap.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const { join } = require('path'); 4 | const { CostaRuntime } = require('../dist/src/runtime'); 5 | const { PluginRunnable } = require('../dist/src/runnable'); 6 | 7 | const costa = new CostaRuntime({ 8 | installDir: join(__dirname, '../.tests/plugins'), 9 | datasetDir: join(__dirname, '../.tests/datasets'), 10 | componentDir: join(__dirname, '../.tests/components'), 11 | npmRegistryPrefix: 'https://registry.npmjs.com/' 12 | }); 13 | const r = new PluginRunnable(costa); 14 | 15 | (async () => { 16 | await r.bootstrap({}); 17 | r.destroy(); 18 | })(); 19 | -------------------------------------------------------------------------------- /packages/costa/benchmark/makefile: -------------------------------------------------------------------------------- 1 | export DEBUG=costa* 2 | 3 | bootstrap: 4 | time node ./bootstrap.js 5 | install: 6 | time node ./plugin-install.js 7 | -------------------------------------------------------------------------------- /packages/costa/benchmark/plugin-install.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const { join } = require('path'); 4 | const { CostaRuntime } = require('../dist/src/runtime'); 5 | 6 | const costa = new CostaRuntime({ 7 | installDir: join(__dirname, '../.tests/plugins'), 8 | datasetDir: join(__dirname, '../.tests/datasets'), 9 | componentDir: join(__dirname, '../.tests/components'), 10 | npmRegistryPrefix: 'https://registry.npmjs.com/' 11 | }); 12 | 13 | (async () => { 14 | const pkg = await costa.fetch('@pipcook/plugins-tensorflow-resnet-model-define'); 15 | await costa.install(pkg, process); 16 | })(); 17 | -------------------------------------------------------------------------------- /packages/costa/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@pipcook/costa", 3 | "version": "2.1.4", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "arg": { 8 | "version": "4.1.3", 9 | "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", 10 | "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", 11 | "dev": true 12 | }, 13 | "buffer-from": { 14 | "version": "1.1.1", 15 | "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", 16 | "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", 17 | "dev": true 18 | }, 19 | "create-require": { 20 | "version": "1.1.1", 21 | "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", 22 | "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", 23 | "dev": true 24 | }, 25 | "diff": { 26 | "version": "4.0.2", 27 | "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", 28 | "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", 29 | "dev": true 30 | }, 31 | "make-error": { 32 | "version": "1.3.6", 33 | "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", 34 | "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", 35 | "dev": true 36 | }, 37 | "source-map": { 38 | "version": "0.6.1", 39 | "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", 40 | "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", 41 | "dev": true 42 | }, 43 | "source-map-support": { 44 | "version": "0.5.19", 45 | "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.19.tgz", 46 | "integrity": "sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==", 47 | "dev": true, 48 | "requires": { 49 | "buffer-from": "^1.0.0", 50 | "source-map": "^0.6.0" 51 | } 52 | }, 53 | "ts-node": { 54 | "version": "9.1.1", 55 | "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-9.1.1.tgz", 56 | "integrity": "sha512-hPlt7ZACERQGf03M253ytLY3dHbGNGrAq9qIHWUY9XHYl1z7wYngSr3OQ5xmui8o2AaxsONxIzjafLUiWBo1Fg==", 57 | "dev": true, 58 | "requires": { 59 | "arg": "^4.1.0", 60 | "create-require": "^1.1.0", 61 | "diff": "^4.0.1", 62 | "make-error": "^1.1.1", 63 | "source-map-support": "^0.5.17", 64 | "yn": "3.1.1" 65 | } 66 | }, 67 | "yn": { 68 | "version": "3.1.1", 69 | "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", 70 | "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", 71 | "dev": true 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /packages/costa/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@pipcook/costa", 3 | "version": "2.1.5", 4 | "description": "The Pipcook Script Runner", 5 | "main": "dist/index", 6 | "types": "dist/index", 7 | "files": [ 8 | "dist" 9 | ], 10 | "scripts": { 11 | "test": "ava -v", 12 | "cov": "nyc --reporter=text-summary npm run test", 13 | "cov:report": "nyc report -r=lcov", 14 | "build": "npm run clean && npm run compile", 15 | "clean": "((rm -rf dist tsconfig.tsbuildinfo) || (rmdir /Q /S dist tsconfig.tsbuildinfo)) || echo 'nothing to clean'", 16 | "compile": "tsc -b tsconfig.json" 17 | }, 18 | "license": "Apache-2.0", 19 | "dependencies": { 20 | "@pipcook/core": "^2.1.4", 21 | "debug": "^4.3.1" 22 | }, 23 | "devDependencies": { 24 | "@types/debug": "^4.1.5", 25 | "@types/node": "^14.6.0", 26 | "@types/sinon": "^9.0.11", 27 | "ava": "^3.13.0", 28 | "nyc": "^15.1.0", 29 | "sinon": "^10.0.0", 30 | "ts-node": "^9.1.1", 31 | "typescript": "^4.3.5" 32 | }, 33 | "publishConfig": { 34 | "access": "public" 35 | }, 36 | "keywords": [], 37 | "ava": { 38 | "extensions": [ 39 | "ts" 40 | ], 41 | "require": [ 42 | "ts-node/register" 43 | ] 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /packages/costa/src/constans.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * The initialize file name. 3 | */ 4 | export const FrameworkIndexFile = 'index.js'; 5 | -------------------------------------------------------------------------------- /packages/costa/src/types.ts: -------------------------------------------------------------------------------- 1 | import { ParsedUrlQuery } from 'querystring'; 2 | 3 | /** 4 | * There are three types of Pipcook script: `DataSource`, `Dataflow`, `Model`. 5 | */ 6 | export enum ScriptType { 7 | /** 8 | * The `DataSource` script is to collect the original data, 9 | * and offers an API object for the following flow. 10 | */ 11 | DataSource, 12 | /** 13 | * The `Dataflow` script processes the original data from `DataSource` script. 14 | */ 15 | Dataflow, 16 | /** 17 | * The `Model` script is to access the data from `DataSource` and `Dataflow` scripts, 18 | * and to train the model optionally. 19 | */ 20 | Model 21 | } 22 | 23 | /** 24 | * The Pipcook script structure. It describes a script and tells `Costa` how to run. 25 | */ 26 | export interface PipcookScript { 27 | /** 28 | * The script name. 29 | */ 30 | name: string; 31 | /** 32 | * The script path in the file system. 33 | */ 34 | path: string; 35 | /** 36 | * Script type. 37 | */ 38 | type: ScriptType; 39 | /** 40 | * The script query data. 41 | */ 42 | query: ParsedUrlQuery; 43 | } 44 | 45 | /** 46 | * Type of package in the framework. 47 | */ 48 | export enum PackageType { 49 | /** 50 | * Python package which can be used through `boa.import`. 51 | */ 52 | Python = 'python', 53 | /** 54 | * JS module which can be used through `import`. 55 | */ 56 | JS = 'js' 57 | } 58 | 59 | /** 60 | * package structure in pipcook framework 61 | */ 62 | /** 63 | * pipcook framework description struct 64 | */ 65 | export interface PipcookFramework { 66 | /** 67 | * The location of the framework. 68 | */ 69 | path: string; 70 | /** 71 | * The framework name. 72 | */ 73 | name: string; 74 | /** 75 | * Description of the framework. 76 | */ 77 | desc: string | null; 78 | /** 79 | * Framework version. 80 | */ 81 | version: string; 82 | /** 83 | * Arch of current machine, should be one of 'x86', 'x64'. 84 | */ 85 | arch: string | null; 86 | /** 87 | * OS types, it should be one of 'darwin', 'win32', 'linux'. 88 | */ 89 | platform: string | null; 90 | /** 91 | * Node version, it should be a semver string: see https://www.npmjs.com/package/semver. 92 | */ 93 | nodeVersion: string | null; 94 | /** 95 | * The n-api version that the framework depends. 96 | */ 97 | napiVersion: number | null; 98 | /** 99 | * Python runtime version, the python packages run on boa. 100 | */ 101 | pythonVersion: string | null; 102 | /** 103 | * Python site-packages relative path in the directory, 'site-packages' by default. 104 | */ 105 | pythonPackagePath: string | null; 106 | /** 107 | * The node modules relative path in the directory, 'node_modules' by default. 108 | */ 109 | jsPackagePath: string | null; 110 | } 111 | 112 | /** 113 | * the struct which defines the scripts in a pipeline, 114 | * datasource: the script use to fetch the data 115 | * dataflow: some scripts use to process the data, 116 | * model: define, train and evaluate model 117 | */ 118 | export interface ScriptConfig { 119 | datasource: PipcookScript | null; 120 | dataflow: Array | null; 121 | model: PipcookScript; 122 | } 123 | 124 | /** 125 | * Artifact configuration, `processor` is the name and version of the artifact plugin, 126 | * like `pipcook-ali-oss-uploader@0.0.1`. The others are the options which will be 127 | * passed into the plugin. 128 | */ 129 | export interface Artifact { 130 | processor: string; 131 | [k: string]: any; 132 | } 133 | 134 | /** 135 | * Pipeline type 136 | */ 137 | export enum PipelineType { 138 | ObjectDetection = 'ObjectDetection', 139 | TextClassification = 'TextClassification', 140 | ImageClassification = 'ImageClassification' 141 | } 142 | 143 | /** 144 | * pipeline configuration stucture 145 | */ 146 | export interface PipelineMeta { 147 | /** 148 | * pipeline version, '2.0' by default 149 | */ 150 | specVersion: string; 151 | /** 152 | * data source script url or sql 153 | */ 154 | datasource: string; 155 | /** 156 | * data process script, set to null if not used 157 | */ 158 | dataflow: Array | null; 159 | /** 160 | * model script url 161 | */ 162 | model: string; 163 | /** 164 | * artifact plugins and their options 165 | */ 166 | artifact: Array; 167 | /** 168 | * pipeline options 169 | */ 170 | options: Record; 171 | /** 172 | * Pipeline type which cloud be one of `PipelineType` or null. 173 | * It should be specified if predict is needed. 174 | */ 175 | type?: PipelineType; 176 | } 177 | -------------------------------------------------------------------------------- /packages/costa/src/utils.ts: -------------------------------------------------------------------------------- 1 | import { FrameworkModule } from '@pipcook/core'; 2 | 3 | /** 4 | * import from path 5 | * @param path the path where import from 6 | */ 7 | export const importFrom = (path: string): Promise => { 8 | return import(path); 9 | }; 10 | -------------------------------------------------------------------------------- /packages/costa/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json.schemastore.org/tsconfig", 3 | "compilerOptions": { 4 | "outDir": "dist", 5 | "rootDir": "src", 6 | 7 | "emitDecoratorMetadata": true, 8 | "experimentalDecorators": true, 9 | "noImplicitAny": true, 10 | "strictNullChecks": true, 11 | "resolveJsonModule": true, 12 | "skipLibCheck": true, 13 | 14 | "incremental": true, 15 | 16 | "lib": ["es2018", "esnext.asynciterable"], 17 | "module": "commonjs", 18 | "esModuleInterop": false, 19 | "moduleResolution": "node", 20 | "target": "es2017", 21 | "sourceMap": true, 22 | "declaration": true 23 | }, 24 | "include": ["src"], 25 | "exclude": [ 26 | "**/node_modules/**", 27 | "**/dist/**", 28 | "**/*.d.ts", 29 | "**/*.test.ts" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /test/pipelines/image-classification-mobilenet.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "ImageClassification", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/image-classification-test.zip", 5 | "dataflow": [ 6 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224" 7 | ], 8 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js", 9 | "artifact": [{ 10 | "processor": "pipcook-artifact-zip@0.0.2", 11 | "target": "./image-classification-mobilenet-model.zip" 12 | }], 13 | "options": { 14 | "framework": "tfjs@3.8", 15 | "train": { 16 | "epochs": 1, 17 | "validationRequired": true, 18 | "modelUrl": "https://pc-github.oss-us-west-1.aliyuncs.com/model/mobilenet_tfjs/web_model/model.json" 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/pipelines/image-classification-resnet.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "ImageClassification", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/image-classification-test.zip", 5 | "dataflow": [ 6 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224" 7 | ], 8 | "artifact": [{ 9 | "processor": "pipcook-artifact-zip@0.0.2", 10 | "target": "./image-classification-resnet-model.zip" 11 | }], 12 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js", 13 | "options": { 14 | "framework": "tfjs@3.8", 15 | "train": { 16 | "epochs": 1, 17 | "validationRequired": true, 18 | "modelUrl": "https://pc-github.oss-us-west-1.aliyuncs.com/model/resnet50_tfjs/model.json" 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/pipelines/object-detection-yolo.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "ObjectDetection", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/object-detection-yolo-min.zip", 5 | "dataflow": [ 6 | "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416" 7 | ], 8 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js", 9 | "artifact": [{ 10 | "processor": "pipcook-artifact-zip@0.0.2", 11 | "target": "./object-detection-yolo-model.zip" 12 | }], 13 | "options": { 14 | "framework": "tfjs@3.8", 15 | "train": { 16 | "epochs": 1, 17 | "batchSize": 1, 18 | "validationRequired": true 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/pipelines/text-classification-bayes.json: -------------------------------------------------------------------------------- 1 | { 2 | "specVersion": "2.0", 3 | "type": "TextClassification", 4 | "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=http://pc-github.oss-us-west-1.aliyuncs.com/dataset/text-classification.zip", 5 | "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/model.js", 6 | "artifact": [{ 7 | "processor": "pipcook-artifact-zip@0.0.2", 8 | "target": "./text-classification-bayes-model.zip" 9 | }], 10 | "options": { 11 | "framework": "tfjs@3.8-nlp", 12 | "backend": "@tensorflow/tfjs-backend-cpu" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tools/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # get flag argument 4 | for i in $@ 5 | do 6 | case $i in 7 | -u=*|--upload=*) 8 | UPLOAD=${i#*=} 9 | shift 10 | ;; 11 | esac 12 | done 13 | 14 | t1=$(date +%s) 15 | npm install 16 | t2=$(date +%s) 17 | install_time=$((t2-t1)) 18 | 19 | t1=$(date +%s) 20 | npm run build 21 | t2=$(date +%s) 22 | build_time=$((t2-t1)) 23 | 24 | t1=$(date +%s) 25 | npm run test 26 | t2=$(date +%s) 27 | test_time=$((t2-t1)) 28 | 29 | t1=$(date +%s) 30 | ./packages/cli/dist/bin/pipcook train ./example/pipelines/image-classification-mobilenet.json 31 | t2=$(date +%s) 32 | mobilenet_time=$((t2-t1)) 33 | 34 | if [ -z ${UPLOAD} ] 35 | then 36 | echo "{\"install_time\":${install_time}, \"build_time\":$build_time, \"test_time\":$test_time, \ 37 | \"mobilenet_time\":$mobilenet_time, \"timestamp\": $time_stamp }" | jq 38 | else 39 | git clone https://github.com/imgcook/pipcook-benchmark.git 40 | cd pipcook-benchmark 41 | 42 | echo $(cat data.json | 43 | jq --arg install_time $install_time \ 44 | --arg build_time $build_time \ 45 | --arg test_time $test_time \ 46 | --arg time_stamp $(date +%s) \ 47 | --arg mobilenet_time $mobilenet_time \ 48 | ". + [{commitId: $CIRCLE_SHA1, install_time:$install_time, build_time:$build_time, test_time:$test_time, \ 49 | mobilenet_time:$mobilenet_time, timestamp: $time_stamp }]") > data.json 50 | 51 | git config user.email ${EMAIL} 52 | git config user.name ${USERNAME} 53 | git add data.json 54 | git commit --allow-empty -am"update data" 55 | git push -q https://${TOKEN}@github.com/imgcook/pipcook-benchmark.git 56 | fi 57 | -------------------------------------------------------------------------------- /tools/coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # clear old data 4 | rm -rf .nyc_output coverage && \ 5 | # generate coverage file 6 | npx lerna run cov && \ 7 | # merge coverage directory into package root 8 | find packages -type d -name .nyc_output -exec cp -r {} ./ \; 9 | -------------------------------------------------------------------------------- /tools/mkdoc.sh: -------------------------------------------------------------------------------- 1 | 2 | function mkdoc() { 3 | npx typedoc \ 4 | --name "$3" \ 5 | --inputFiles "packages/$2/src" \ 6 | --out $1 \ 7 | --theme default \ 8 | --tsconfig "packages/$2/tsconfig.json" \ 9 | --readme none \ 10 | --mode file 11 | } 12 | 13 | mkdoc docs/typedoc/script core "Pipcook Interfaces" 14 | mkdoc docs/typedoc/runtime costa "Costa Interfaces" 15 | -------------------------------------------------------------------------------- /tools/run_pipeline.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ./packages/cli/dist/bin/pipcook run "./example/pipelines/$1.json" 4 | 5 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "resolveJsonModule": true, 4 | "composite": true, 5 | "outDir": "./dist", 6 | "rootDir": ".", 7 | "declaration": true, 8 | "noImplicitAny": true, 9 | "incremental": true, 10 | "module": "commonjs", 11 | "target": "es6", 12 | "lib": [ "es6" ], 13 | "skipLibCheck": true, 14 | "sourceMap": true, 15 | "esModuleInterop": false 16 | }, 17 | "exclude": [ 18 | "node_modules", 19 | "dist" 20 | ] 21 | } --------------------------------------------------------------------------------