├── .circleci
    └── config.yml
├── .editorconfig
├── .eslintignore
├── .eslintrc.js
├── .github
    └── workflows
    │   ├── beta.yml
    │   ├── build.yml
    │   ├── gh-pages.yml
    │   ├── markdown.yml
    │   └── release.yml
├── .gitignore
├── .markdown.config.json
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docker
    ├── Dockerfile
    └── Dockerfile.cpu
├── docs
    ├── .nojekyll
    ├── GLOSSORY.md
    ├── INSTALL.md
    ├── README.md
    ├── _navbar.md
    ├── _sidebar.md
    ├── contributing
    │   ├── contribute-a-script.md
    │   ├── guide-to-collaborator.md
    │   └── guide-to-contributor.md
    ├── faq
    │   ├── index.md
    │   ├── pipcook-framework.md
    │   └── plugins.md
    ├── images
    │   ├── community_qrcode.jpg
    │   ├── logo.png
    │   └── plugin-script-map.png
    ├── index.html
    ├── manual
    │   ├── README.md
    │   ├── intro-to-boa.md
    │   ├── intro-to-framework.md
    │   ├── intro-to-pipeline.md
    │   ├── intro-to-script.md
    │   ├── pipcook-models.md
    │   └── pipcook-tools.md
    ├── meta
    │   └── PROJECT_GUIDE.md
    ├── rfcs
    │   ├── 0000-new-pipeline.md
    │   └── 0001-framework-migration.md
    ├── spec
    │   ├── dataset.md
    │   └── script.md
    ├── tutorials
    │   ├── component-image-classification.md
    │   ├── machine-learning-overview.md
    │   └── using-python-functions-in-nodejs.md
    └── zh-cn
    │   ├── GLOSSORY.md
    │   ├── INSTALL.md
    │   ├── README.md
    │   ├── _navbar.md
    │   ├── _sidebar.md
    │   ├── contributing
    │       ├── contribute-a-script.md
    │       ├── guide-to-collaborator.md
    │       └── guide-to-contributor.md
    │   ├── faq
    │       ├── index.md
    │       ├── pipcook-framework.md
    │       └── plugins.md
    │   ├── manual
    │       ├── intro-to-boa.md
    │       ├── intro-to-framework.md
    │       ├── intro-to-pipeline.md
    │       ├── intro-to-script.md
    │       └── pipcook-tools.md
    │   ├── spec
    │       ├── dataset.md
    │       └── script.md
    │   └── tutorials
    │       ├── component-image-classification.md
    │       ├── machine-learning-overview.md
    │       └── using-python-functions-in-nodejs.md
├── example
    └── pipelines
    │   ├── README-CN.md
    │   ├── README.md
    │   ├── image-classification-mobilenet.json
    │   ├── image-classification-resnet.json
    │   ├── object-detection-yolo.json
    │   └── text-classification-bayes.json
├── lerna.json
├── notebooks
    ├── pipcook_image_classification.ipynb
    └── pipcook_object_detection.ipynb
├── package-lock.json
├── package.json
├── packages
    ├── cli
    │   ├── .npmignore
    │   ├── .nycrc
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── serve-resource
    │   │   ├── image
    │   │   │   └── index.html
    │   │   └── text
    │   │   │   └── index.html
    │   ├── src
    │   │   ├── bin
    │   │   │   ├── pipcook.test.ts
    │   │   │   └── pipcook.ts
    │   │   ├── constants
    │   │   │   ├── index.test.ts
    │   │   │   └── index.ts
    │   │   ├── runtime.test.ts
    │   │   ├── runtime.ts
    │   │   ├── standalone-impl.test.ts
    │   │   ├── standalone-impl.ts
    │   │   └── utils
    │   │   │   ├── cache.test.ts
    │   │   │   ├── cache.ts
    │   │   │   ├── framework.test.ts
    │   │   │   ├── framework.ts
    │   │   │   ├── index.test.ts
    │   │   │   ├── index.ts
    │   │   │   ├── plugin.test.ts
    │   │   │   ├── plugin.ts
    │   │   │   ├── post-predict.test.ts
    │   │   │   ├── post-predict.ts
    │   │   │   ├── predict-databset.test.ts
    │   │   │   ├── predict-dataset.ts
    │   │   │   ├── script.test.ts
    │   │   │   ├── script.ts
    │   │   │   ├── serve-predict.test.ts
    │   │   │   └── serve-predict.ts
    │   └── tsconfig.json
    ├── core
    │   ├── .gitignore
    │   ├── .npmignore
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── src
    │   │   ├── artifact.ts
    │   │   ├── dataset-pool
    │   │   │   ├── format
    │   │   │   │   ├── coco.test.ts
    │   │   │   │   ├── coco.ts
    │   │   │   │   ├── csv.test.ts
    │   │   │   │   ├── csv.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── pascal-voc.test.ts
    │   │   │   │   └── pascal-voc.ts
    │   │   │   ├── index.test.ts
    │   │   │   ├── index.ts
    │   │   │   ├── pipeline-type
    │   │   │   │   ├── image-classification.test.ts
    │   │   │   │   ├── image-classification.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── object-detection.test.ts
    │   │   │   │   ├── object-detection.ts
    │   │   │   │   └── text-classification.ts
    │   │   │   └── types
    │   │   │   │   ├── format
    │   │   │   │       ├── coco.ts
    │   │   │   │       ├── csv.ts
    │   │   │   │       ├── index.ts
    │   │   │   │       └── pascal-voc.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   └── pipeline-type
    │   │   │   │       ├── image-classification.ts
    │   │   │   │       ├── index.ts
    │   │   │   │       ├── object-detection.ts
    │   │   │   │       └── text-classification.ts
    │   │   ├── index.ts
    │   │   └── runtime.ts
    │   └── tsconfig.json
    └── costa
    │   ├── .gitignore
    │   ├── .npmignore
    │   ├── .nycrc
    │   ├── benchmark
    │       ├── bootstrap.js
    │       ├── makefile
    │       └── plugin-install.js
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── src
    │       ├── constans.ts
    │       ├── index.test.ts
    │       ├── index.ts
    │       ├── types.ts
    │       └── utils.ts
    │   └── tsconfig.json
├── test
    └── pipelines
    │   ├── image-classification-mobilenet.json
    │   ├── image-classification-resnet.json
    │   ├── object-detection-yolo.json
    │   └── text-classification-bayes.json
├── tools
    ├── benchmark.sh
    ├── coverage.sh
    ├── mkdoc.sh
    └── run_pipeline.sh
└── tsconfig.json


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | orbs:
 3 |   node: circleci/node@3.0.0
 4 | 
 5 | jobs:
 6 |   benchmark:
 7 |     docker:
 8 |         - image: cimg/node:14.11.0
 9 |     steps:
10 |         - checkout
11 |         - run: bash ./tools/benchmark.sh
12 | 
13 | workflows:
14 |   node-bench:
15 |     when:
16 |       condition:
17 |         euqal: [master, << pipeline.git.branch >>]
18 |     jobs:
19 |       - benchmark
20 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 |   
2 | root = true
3 | 
4 | [*]
5 | insert_final_newline = true
6 | 


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | docs
3 | dist
4 | test
5 | pipcook_venv
6 | output
7 | example
8 | 


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   'env': {
 3 |     'node': true,
 4 |     'es6': true
 5 |   },
 6 |   'extends': [
 7 |     'eslint:recommended',
 8 |     'plugin:@typescript-eslint/eslint-recommended',
 9 |     'plugin:@typescript-eslint/recommended'
10 |   ],
11 |   'rules': {
12 |     'quotes': [ 'error', 'single' ],
13 |     'no-trailing-spaces': 'error',
14 |     'prefer-const': 'warn',
15 |     'no-useless-escape': 'off',
16 |     '@typescript-eslint/no-var-requires': 'off',
17 |     '@typescript-eslint/camelcase': 'off',
18 |     '@typescript-eslint/no-explicit-any': 'off',
19 |     '@typescript-eslint/explicit-function-return-type': 'off',
20 |     '@typescript-eslint/no-use-before-define': [
21 |       'error',
22 |       {
23 |         'functions': false
24 |       }
25 |     ],
26 |     '@typescript-eslint/no-unused-vars': [ 'error' ],
27 |     'comma-spacing': [
28 |       'error',
29 |       {
30 |         'before': false, 'after': true
31 |       }
32 |     ],
33 |     'indent': [ 'error', 2 ],
34 |     'keyword-spacing': [
35 |       'error', { 'before': true }
36 |     ],
37 |     'array-bracket-spacing': [ 'error', 'always' ],
38 |     'space-infix-ops': 'error',
39 |     'object-curly-spacing': [ 'error', 'always' ],
40 |     'semi': [ 'error', 'always' ],
41 |     'eol-last': [ 'error', 'always' ],
42 |     'comma-dangle': [ 'error', 'never' ],
43 |     'no-multi-spaces': 'error',
44 |     'no-multiple-empty-lines': 'error',
45 |     'no-irregular-whitespace': 'error',
46 |     'arrow-parens': [ 'error', 'always' ],
47 |     'arrow-spacing': [ 'error', { before: true, after: true } ],
48 |     'block-spacing': 'error',
49 |     'brace-style': [ 'error', '1tbs', { allowSingleLine: true } ],
50 |     'comma-style': 'error',
51 |     'no-unused-vars': [ 'off' ] // Use @typescript-eslint/no-unused-vars instead, otherwise there will be checked twice
52 |   }
53 | };
54 | 


--------------------------------------------------------------------------------
/.github/workflows/beta.yml:
--------------------------------------------------------------------------------
 1 | name: Beta Build Packages
 2 | on:
 3 |   schedule:
 4 |     - cron: '0 0 * * *'
 5 | 
 6 | jobs:
 7 |   release:
 8 |     name: Release
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Delete branch
12 |         uses: dawidd6/action-delete-branch@v3
13 |         with:
14 |           github_token: ${{ secrets.GITHUB_TOKEN }}
15 |           branches: beta
16 | 
17 |       - name: Create Beta Branch
18 |         uses: peterjgrainger/action-create-branch@v1.0.0
19 |         env:
20 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |         with:
22 |           branch: 'beta'
23 | 
24 |       - name: Checkout beta
25 |         uses: actions/checkout@v2
26 |         with:
27 |           ref: beta
28 |       - name:  Activity check
29 |         run: |
30 |             curl -sL https://api.github.com/repos/${{github.repository}}/commits?since=yesterday | jq -r '.[0]' > $HOME/commit.json
31 |             date="$(jq -r '.commit.committer.date' $HOME/commit.json)"
32 |             echo "Last commit is made @ $date"
33 |             
34 |             if [ -n "${date}" ]; then
35 |               echo "UPDATED=true" >> $GITHUB_ENV
36 |             fi
37 |         shell: bash
38 | 
39 |       - name: Setup Node.js
40 |         uses: actions/setup-node@v1
41 |         if: env.UPDATED == 'true'
42 |         with:
43 |           node-version: 14
44 | 
45 |       - name: Install Dependencies
46 |         if: env.UPDATED == 'true'
47 |         run: |
48 |           npm install
49 |           npm run build
50 | 
51 |       - name: Publish npm beta packages
52 |         if: env.UPDATED == 'true'
53 |         run: |
54 |           sha_short="$(git rev-parse --short HEAD)"
55 |           echo "//registry.npmjs.org/:_authToken=${{ secrets.npm_token }}" > ~/.npmrc
56 |           git config --global user.name 'pipcook'
57 |           git config --global user.email 'queyue.crk@alibaba-inc.com'
58 |           git add .
59 |           git commit -m "bump beta version" --allow-empty
60 |           npm run beta-release-tag -- --preid "${sha_short}-beta"
61 |           npm run beta-release
62 |         env:
63 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
64 |           NODE_AUTH_TOKEN: ${{secrets.npm_token}}
65 |           NPM_AUTH_TOKEN: ${{ secrets.npm_token }}
66 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on:
 3 |   push:
 4 |     branches:
 5 |     - main
 6 |     paths-ignore:
 7 |     - 'docker/**'
 8 |     - 'docs/**'
 9 |     - 'notebooks/**'
10 |     - '.github/markdown.yml'
11 |     - '*.md'
12 |   pull_request:
13 |     types: [ opened, synchronize, reopened, ready_for_review ]
14 |     branches:
15 |     - main
16 |     paths-ignore:
17 |     - 'docker/**'
18 |     - 'docs/**'
19 |     - 'notebooks/**'
20 |     - '.github/markdown.yml'
21 |     - '*.md'
22 | 
23 | jobs:
24 |   universal:
25 |     name: Build on node ${{ matrix.node_version }} and ${{ matrix.os }}
26 |     if: ${{ github.event_name == 'push' || github.event.pull_request.draft == false }}
27 |     runs-on: ${{ matrix.os }}
28 |     timeout-minutes: 30
29 |     strategy:
30 |       matrix:
31 |         node_version: ['14', '16']
32 |         os: [ubuntu-latest, macOS-latest, windows-latest]
33 |     steps:
34 |     - name: Cancel previous runs
35 |       uses: imgcook/cancel-workflow-action@81524cf38ed0e3a5865a550dde6118d26b7a5ede
36 |       with:
37 |         access_token: ${{ github.token }}
38 |         exception: main
39 |     - uses: actions/checkout@v2
40 |     - name: Restore node_modules
41 |       uses: actions/cache@v2
42 |       with:
43 |         path: |
44 |           node_modules
45 |           packages/*/node_modules
46 |         key: ${{ runner.os }}-${{ matrix.node_version }}-nodepkg-${{ hashFiles('**/package-lock.json') }}
47 |         restore-keys: |
48 |           ${{ runner.os }}-nodepkg-${{ matrix.node_version }}
49 |           ${{ runner.os }}-nodepkg
50 |     - name: Using Node.js ${{ matrix.node_version }}
51 |       uses: actions/setup-node@v1
52 |       with:
53 |         node-version: ${{ matrix.node_version }}
54 |     - name: Building Pipcook
55 |       run: |
56 |         npm install
57 |         npm run build
58 |         du -h -d 1
59 |     - name: Running unit tests
60 |       if: ${{ runner.os != 'Linux' || matrix.node_version != 14 }}
61 |       run: |
62 |         npm run test
63 |     - name: Run cov
64 |       if: ${{ runner.os == 'Linux' && matrix.node_version == 14 }}
65 |       run: |
66 |         npm run cov && npm run cov:report
67 |     - name: Coveralls
68 |       if: ${{ runner.os == 'Linux' && matrix.node_version == 14 }}
69 |       uses: coverallsapp/github-action@master
70 |       with:
71 |         github-token: ${{ secrets.GITHUB_TOKEN }}
72 |     - name: Run bayes
73 |       run: |
74 |         node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/text-classification-bayes.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/
75 |         node ./packages/cli/dist/bin/pipcook.js predict ./my-workspace -s testword -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/
76 |     - name: Run mobilenet
77 |       run: |
78 |         node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/image-classification-mobilenet.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/
79 |     - name: Run resnet
80 |       run: |
81 |         node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/image-classification-resnet.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/
82 |     - name: Run yolo
83 |       run: |
84 |         node ./packages/cli/dist/bin/pipcook.js train ./test/pipelines/object-detection-yolo.json -o my-workspace -d -m http://pc-github.oss-us-west-1.aliyuncs.com/framework/
85 | 


--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
 1 | name: gh-pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   deployment:
10 |     runs-on: macOS-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |       - name: Use Node.js
14 |         uses: actions/setup-node@v1
15 |         with:
16 |           node-version: 14
17 |       - name: Build documentation
18 |         run: |
19 |           npm install
20 |           npm run build
21 |           npm run typedoc
22 |       - name: Deploy to GitHub Pages
23 |         uses: peaceiris/actions-gh-pages@v3
24 |         with:
25 |           github_token: ${{ secrets.GITHUB_SECRET_TOKEN }}
26 |           publish_dir: ./docs
27 |           publish_branch: gh-pages
28 | 


--------------------------------------------------------------------------------
/.github/workflows/markdown.yml:
--------------------------------------------------------------------------------
 1 | name: check markdown files
 2 | on:
 3 |   push:
 4 |     branches:
 5 |     - main
 6 |   pull_request:
 7 |     types: [ opened, synchronize, reopened, ready_for_review ]
 8 |     branches:
 9 |     - main
10 | 
11 | jobs:
12 |   markdown-link-check:
13 |     if: ${{ github.event_name == 'push' || github.event.pull_request.draft == false }}
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - uses: gaurav-nelson/github-action-markdown-link-check@v1
18 |       with:
19 |         use-quiet-mode: 'no'
20 |         use-verbose-mode: 'yes'
21 |         config-file: '.markdown.config.json'
22 |         folder-path: 'docs'
23 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Packages
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - 'v*'
 6 | 
 7 | jobs:
 8 |   release:
 9 |     name: Release
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout Repo
13 |         uses: actions/checkout@v2
14 | 
15 |       - name: Setup Node.js
16 |         uses: actions/setup-node@v1
17 |         with:
18 |           node-version: 14
19 | 
20 |       - name: Install Dependencies
21 |         run: |
22 |           npm install
23 |           npm run build
24 | 
25 |       - name: Get release version
26 |         id: get_version
27 |         run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF:11})" >> $GITHUB_ENV
28 | 
29 |       - name: Publish check
30 |         run: |
31 |           ver=$RELEASE_VERSION
32 |           check_pkg() {
33 |             curl -sL "http://registry.npmjs.com/@pipcook/$1" | jq -r '.versions."'$2'".version'
34 |           }
35 | 
36 |           check_image() {
37 |             curl -sL "https://hub.docker.com/v2/repositories/pipcook/pipcook/tags/$1" | jq -r '.name'
38 |           }
39 | 
40 |           if
41 |             [ $(check_pkg pipcook-cli $ver) == $ver ] &&
42 |             [ $(check_pkg daemon $ver) == $ver ]
43 |           then
44 |             echo "NPM_PUBLISHED=true" >> $GITHUB_ENV
45 |           fi
46 | 
47 |           if [ $(check_image $ver) == $ver ]; then
48 |             echo "DOCKER_PUBLISHED=true" >> $GITHUB_ENV
49 |           fi
50 | 
51 |       - name: Publish npm packages
52 |         if: env.NPM_PUBLISHED != 'true'
53 |         run: |
54 |           echo "//registry.npmjs.org/:_authToken=${{ secrets.npm_token }}" > ~/.npmrc
55 |           git config --global user.name 'pipcook'
56 |           git config --global user.email 'queyue.crk@alibaba-inc.com'
57 |           git add .
58 |           git commit -m "bump version" --allow-empty
59 |           npm run release
60 |         env:
61 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62 |           NODE_AUTH_TOKEN: ${{secrets.npm_token}}
63 |           NPM_AUTH_TOKEN: ${{ secrets.npm_token }}
64 | 
65 |       - name: Publish to Docker registry
66 |         uses: docker/build-push-action@v1
67 |         if: env.DOCKER_PUBLISHED != 'true'
68 |         with:
69 |           username: ${{ secrets.DOCKER_USERNAME }}
70 |           password: ${{ secrets.DOCKER_PASSWORD }}
71 |           repository: pipcook/pipcook
72 |           dockerfile: docker/Dockerfile
73 |           build_args: VER=${{ env.RELEASE_VERSION }}
74 |           tags: "latest,${{ env.RELEASE_VERSION }}"
75 | 
76 | 
77 |       - name: Sync from dockerhub to Aliyun
78 |         env:
79 |           ALI_DOCKER_USERNAME: ${{ secrets.ALI_DOCKER_USERNAME }}
80 |           ALI_DOCKER_PASSWORD: ${{secrets.ALI_DOCKER_PASSWORD}}
81 |           DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
82 |           DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
83 |         run: |
84 |           wget https://github.com/AliyunContainerService/image-syncer/releases/download/v1.2.0/image-syncer-v1.2.0-linux-amd64.tar.gz
85 |           tar -zxvf image-syncer-v1.2.0-linux-amd64.tar.gz
86 | 
87 |           echo {\"registry.cn-hangzhou.aliyuncs.com\": {\"username\": \"$ALI_DOCKER_USERNAME\", \"password\": \"$ALI_DOCKER_PASSWORD\"} \
88 |           , \"registry.hub.docker.com\": {\"username\":\"$DOCKER_USERNAME\", \"password\":\"$DOCKER_PASSWORD\"}} >> auth.json
89 | 
90 |           echo {\"pipcook/pipcook:latest\": \"registry.cn-hangzhou.aliyuncs.com/pipcook/pipcook:latest\", \
91 |           \"pipcook/pipcook:$RELEASE_VERSION\": \"registry.cn-hangzhou.aliyuncs.com/pipcook/pipcook:$RELEASE_VERSION\"} >> images.json
92 | 
93 |           ./image-syncer --auth=./auth.json --images=./images.json --namespace=pipcook --registry=registry.cn-hangzhou.aliyuncs.com --retries=3
94 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules
 2 | output
 3 | /lerna-debug.log
 4 | pipcook-pipeline-*
 5 | .DS_STORE
 6 | .vscode/
 7 | dist
 8 | tsconfig.tsbuildinfo
 9 | samples/
10 | samples*/
11 | .pipcook-log/
12 | pipcook_venv
13 | .temp
14 | detectron2
15 | .temp1
16 | pipcook-output/
17 | docs/typedoc
18 | .eslintcache
19 | packages/pipboard/build
20 | .eslintcache
21 | ENV
22 | .test
23 | .pip
24 | packages/cli/assets/server/**
25 | packages/daemon/.github/**
26 | .pipcook
27 | __pycache__
28 | .history
29 | *.pb
30 | coverage
31 | .nyc_output
32 | 


--------------------------------------------------------------------------------
/.markdown.config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "ignorePatterns": [{
3 |     "pattern": "^/.*"
4 |   }, {
5 |     "pattern": "*/index.html"
6 |   }]
7 | }
8 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | *                   @FeelyChau @yorkie
 2 | *.ts                @WenheLI @yorkie
 3 | 
 4 | /docs/              @yorkie
 5 | /tools/             @FeelyChau
 6 | /test/              @FeelyChau
 7 | /notebooks/         @rickycao-qy
 8 | 
 9 | /packages/cli/      @rickycao-qy
10 | /packages/core/     @WenheLI @FeelyChau
11 | /packages/costa/    @FeelyChau @yorkie
12 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Alibaba Open Source Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at opensource@alibaba-inc.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | We are excited that you are interested in contributing to Pipcook. Before submitting your contribution, please take a moment to read through a few small guidelines here
 3 | 
 4 | ## Reporting Issues
 5 | - We use Github issues to manage our issues. We use status to mark the progress of our issues.
 6 | 
 7 | - Try to search for your issue, it may have already been asked, answered or even fixed in the development branch.
 8 | 
 9 | - Check if the issue is reproducible with the latest stable version of Pipcook. If you are using a pre-release, please indicate the specific version you are using.
10 | 
11 | - It is required that you clearly describe the steps necessary to reproduce the issue you are running into. If the issues are asked to provide clear descriptions for more than 5 days, we will close it immediately.
12 | 
13 | - If your issue is resolved but still open, don’t hesitate to close it. In case you found a solution by yourself, it could be helpful to explain how you fixed it.
14 | 
15 | ## Pull Request Guidelines
16 | - Only code that's ready for release should be committed to the main branch. All development should be done in dedicated branches.
17 | - Checkout a **new** topic branch from main branch, and merge back against main branch.
18 | - If adding new feature:
19 |   - Add accompanying test case.
20 |   - Provide convincing reason to add this feature. Ideally you should open a suggestion issue first and have it greenlighted before working on it.
21 | - If fixing a bug:
22 |   - If you are resolving a special issue, add `(fix #xxxx[,#xxx])` (#xxxx is the issue id) in your title for a better release log, 
23 |   - Provide detailed description of the bug in the PR. Live demo preferred.
24 |   - Add appropriate test coverage if applicable.
25 | 
26 | 
27 | ## Git Commit Specific
28 | - Your commits message must follow our git commit specific.
29 | - We will check your commit message, if it does not conform to the specification, the commit will be automatically refused, make sure you have read the specification above.
30 | 
31 | ## Providing Feedback
32 | We are happy to hear any feedbacks or are delighted to ask any questions. You can join our Dingding Group or ask away on Stack Overflow using the tag Pipcook


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu18.04
 2 | ARG VER=latest
 3 | 
 4 | LABEL version=${VER}
 5 | LABEL description="docker image for pipcook runtime"
 6 | 
 7 | ENV TF_FORCE_GPU_ALLOW_GROWTH=true
 8 | 
 9 | WORKDIR /root/
10 | RUN apt-get update && apt-get install -y curl wget python git libglib2.0-0 libsm6 libxrender-dev
11 | 
12 | RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
13 | RUN apt-get install -y nodejs
14 | 
15 | RUN npm install @pipcook/cli@${VER} -g --unsafe-perm
16 | ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda-11.2/targets/x86_64-linux/lib/
17 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM node:14
 2 | ARG VER=latest
 3 | 
 4 | LABEL version=${VER}
 5 | LABEL description="docker image for pipcook runtime without gpu"
 6 | 
 7 | WORKDIR /root/
 8 | 
 9 | RUN npm install @pipcook/cli@${VER} -g --unsafe-perm
10 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/GLOSSORY.md:
--------------------------------------------------------------------------------
 1 | # Glossory
 2 | 
 3 | This glossary hopes to definitively represent the tacit and explicit conventions applied in Pipcook toolkit, while providing a reference for users and contributors. It aims to describe the concepts and either detail their corresponding API or link to other relevant parts of the documentation which do so. By linking to glossary entries from the API Reference and User Guide, we may minimize redundancy and inconsistency.
 4 | 
 5 | ### Pipboard
 6 | 
 7 | The abbreviation of Pipcook board. On Pipboard, you can manage and operate your pipelines and plug-ins through the Web interface.
 8 | 
 9 | ### Pipcook
10 | 
11 | The Pipcook project, generally https://github.com/alibaba/pipcook.
12 | 
13 | ### Pipcook Daemon
14 | 
15 | It is actually responsible for the management and execution of Pipeline's components. It provides remote access to [Pipcook Tools][] and [Pipboard][] through HTTP, and also supports the ability of other clients to integrate Pipcook Daemon through HTTP.
16 | 
17 | ### Pipcook Script
18 | 
19 | Scripts are Lego blocks in pipeline. By selecting different scripts, you can quickly complete different pipelines to train different models.
20 | 
21 | ### Pipcook Tools
22 | 
23 | The abbreviation of Pipcook command-line tool, installed via `npm install @pipcook/pipcook-cli`.
24 | 
25 | ### Pipeline
26 | 
27 | In computing, a pipeline, also known as a data pipeline, is a set of data processing elements connected in series, where the output of one element is the input of the next one. The elements of a pipeline are often executed in parallel or in time-sliced fashion.
28 | 
29 | [Pipcook Tools]: #pipcook-tools
30 | [Pipboard]: #pipboard
31 | 


--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installing Pipcook
 2 | 
 3 | There are different ways to install [Pipcook][]:
 4 | 
 5 | - [Install via NPM][]. This is the best approach for most users. It will provide a stable version and pre-built packages are available for most platforms.
 6 | - [Build from source][]. This is best for users who want the latest-and-greatest features and aren’t afraid of running a brand-new code. This is also needed for users who wish to contribute to the project.
 7 | 
 8 | Before starting the installation, please make sure the following environments are correct:
 9 | 
10 | - macOS, Linux, Windows
11 | - Node.js >= 12.17 or >= 14.0.0
12 | 
13 | ## Install via NPM
14 | 
15 | Installing [Pipcook][] via NPM is easy, just run:
16 | 
17 | ```sh
18 | $ npm install -g @pipcook/cli
19 | ```
20 | 
21 | Then check if installed via `pipcook --help`.
22 | 
23 | ## Install via Docker
24 | 
25 | You could also install pipcook with Docker. Just run command:
26 | 
27 | ```sh
28 | $ docker pull pipcook/pipcook:latest
29 | ```
30 | 
31 | After pulling successfully, run command below to start:
32 | 
33 | ```sh
34 | $ docker run -it pipcook/pipcook:latest /bin/bash
35 | ```
36 | 
37 | ## Troubleshooting
38 | 
39 | If you have any installation problems, please feedback to [issue tracker](https://github.com/alibaba/pipcook/issues/new).
40 | 
41 | [Install via NPM]: #install-via-npm
42 | [Install via Docker]: #install-via-docker
43 | [Build from source]: contributing/guide-to-contributor#download-source
44 | [Pipcook]: https://github.com/alibaba/pipcook
45 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Pipcook
 2 | 
 3 | The Pipcook Project is an open-source toolkit to enable and accelerate the intelligentization of front-end engineering for Web developers.
 4 | 
 5 | # Usage
 6 | Using Pipcook for machine learning development is very simple. It only takes four steps: install, train, test, and deploy.
 7 | 
 8 | Install the [Pipcook][] command-line tool:
 9 | 
10 | ```shell
11 | $ npm install -g @pipcook/cli
12 | ```
13 | 
14 | Then train from anyone of those [pipelines](./example/pipelines/), we take image classification as an example:
15 | 
16 | ```shell
17 | $ pipcook train https://cdn.jsdelivr.net/gh/alibaba/pipcook@main/example/pipelines/image-classification-mobilenet.json -o ./output
18 | ```
19 | This dataset specfied by the pipeline includes 2 categories image: `avatar` and `blurBackground`.
20 | After training, we can predict the category of a image:
21 | 
22 | ```shell
23 | $ pipcook predict ./output/image-classification-mobilenet.json -s ./output/data/validation/blurBackground/71197_223__30.7_36.jpg
24 | ✔ Origin result:[{"id":1,"category":"blurBackground","score":0.9998120665550232}]
25 | ```
26 | 
27 | The input is a `blurBackground` image from the validation dataset. And the model determines that its category is `blurBackground`.
28 | 
29 | Want to deploy it?
30 | ```shell
31 | $ pipcook serve ./output
32 | ℹ preparing framework
33 | ℹ preparing scripts
34 | ℹ preparing artifact plugins
35 | ℹ initializing framework packages
36 | Pipcook has served at: http://localhost:9091
37 | ```
38 | 
39 | Then you can open the browser and try your image classification server.
40 | 
41 | ## Why Pipcook
42 | 
43 | With the mission of enabling Web engineers to utilize the power of machine learning without any prerequisites and the vision to lead front-end technical field to the intelligence. [Pipcook][] is to become the toolkit for the cross-cutting area of machine learning and front-end interaction.
44 | 
45 | We are truly to design Pipcook's machine learning APIs for front-end applications, and focusing on the front-end area and developed from the Web engineers' view. With the principle of being friendly to the Web, we will push the whole area forward with the machine learning engineering.
46 | 
47 | ## What's Pipcook
48 | 
49 | The project provides subprojects include machine learning pipeline framework, management tools, a JavaScript runtime for machine learning, and these can be also used as building blocks in conjunction with other projects.
50 | 
51 | ### Principles
52 | 
53 | [Pipcook][] is an open-source project guided by strong principles, aiming to be modular and flexible on user experience. It is open to the community to help set its direction.
54 | 
55 | - **Modular** the project includes some of projects that have well-defined functions and APIs that work together.
56 | - **Swappable** the project includes enough modules to build what Pipcook has done, but its modular architecture ensures that most of the modules can be swapped by different implementations.
57 | 
58 | ### Audience
59 | 
60 | [Pipcook][] is intended for Web engineers looking to:
61 | 
62 | - learn what's machine learning.
63 | - train their models and serve them.
64 | - optimize own models for better model evaluation results, like higher accuracy for image classification.
65 | 
66 | > If you are in the above conditions, just try it via [installation guide](INSTALL.md).
67 | 
68 | ### Subprojects
69 | 
70 | __Pipcook Pipeline__
71 | 
72 | It's used to represent ML pipelines consisting of Pipcook scripts. This layer ensures the stability and scalability of the whole system and uses a [Script](manual/intro-to-script.md) mechanism to support rich functions including datasource, dataflow, training, validations.
73 | 
74 | A Pipcook Pipeline is generally composed of lots of scripts. Through different scripts and configurations, the final output to us is a directory, which contains the trained model.
75 | 
76 | __Pipcook Bridge to Python__
77 | 
78 | For JavaScript engineers, the most difficult part is the lack of a mature machine learning toolset in the ecosystem. In Pipcook, a module called **Boa**, which provides access to Python packages by bridging the interface of [CPython][] using N-API.
79 | 
80 | With it, developers can use packages such as `numpy`, `scikit-learn`, `jieba`, `tensorflow`, or any other Python ecology in the Node.js runtime through JavaScript.
81 | 
82 | ## The Next
83 | 
84 | Can't wait to start a [Pipcook][] project? You can follow the guidance below to proceed to the next step:
85 | 
86 | - [Learn how to install Pipcook?](INSTALL.md)
87 | - [Learn machine learning](tutorials/machine-learning-overview.md)
88 | - [Learn Pipcook from Pipeline](manual/intro-to-pipeline.md)
89 | - [Learn Pipcook from Boa](manual/intro-to-boa.md)
90 | - [Learn Pipcook Tools](manual/pipcook-tools.md)
91 | 
92 | [Pipcook]: https://github.com/alibaba/pipcook
93 | [CPython]: https://github.com/python/cpython
94 | 


--------------------------------------------------------------------------------
/docs/_navbar.md:
--------------------------------------------------------------------------------
1 | - API Reference
2 |   - [Runtime](typedoc/runtime/index.html)
3 |   - [Script](typedoc/script/index.html)
4 | - Translations
5 |   - [English](/)
6 |   - [中文](/zh-cn/)
7 | 


--------------------------------------------------------------------------------
/docs/_sidebar.md:
--------------------------------------------------------------------------------
 1 | - [What's Pipcook](/README.md)
 2 | - [Install](/INSTALL.md)
 3 | - User Manual
 4 |   - [Introduction to Pipeline](/manual/intro-to-pipeline.md)
 5 |   - [Introduction to Script](/manual/intro-to-script.md)
 6 |   - [Introduction to Boa](/manual/intro-to-boa.md)
 7 |   - [Pipcook Tools](/manual/pipcook-tools.md)
 8 | - Tutorials
 9 |   - [Machine Learning Overview](/tutorials/machine-learning-overview.md)
10 |   <!-- - [Working with Text Data](/tutorials/working-with-text-data.md) -->
11 |   <!-- - [Working with Image Data](/tutorials/working-with-image-data.md) -->
12 |   - [Using Python functions in Node.js](/tutorials/using-python-functions-in-nodejs.md)
13 |   - [Classify images of UI components](/tutorials/component-image-classification.md)
14 | - Contributing
15 |   - [Contributor guide](/contributing/guide-to-contributor.md)
16 |   - [Collaborator guide](/contributing/guide-to-collaborator.md)
17 |   - [Contribute a script](/contributing/contribute-a-script.md)
18 | - Specification
19 |   - [Script Specification](/spec/script.md)
20 |   - [Dataset Specification](/spec/dataset.md)
21 | - [FAQ](/faq/index.md)
22 |   - [Pipcook Framework](/faq/pipcook-framework.md)
23 |   - [Scripts](/faq/scripts.md)
24 | - [Glossory](/GLOSSORY.md)
25 | 


--------------------------------------------------------------------------------
/docs/contributing/guide-to-contributor.md:
--------------------------------------------------------------------------------
  1 | # Contributor Guide
  2 | 
  3 | Pipcook is a community-driven open source project. We do our best to hope that every bug fixed, new feature, and how this project evolves is visible and transparent to everyone in this Community.
  4 | 
  5 | Therefore, we believe that from source code to our documentation are more friendly to contributors, so in order to make it easier for contributors to participate in Pipcook, some paths have been developed. If you want to get involved, you can follow it.
  6 | 
  7 | - If you are going to browse source code only, goto [GitHub](https://github.com/alibaba/pipcook).
  8 | - If you are a rookie and no experience in contributing to any open source project, then we have organized [good first issue][] for you, all of which are relatively simple tasks, easy to start with.
  9 | - If you want to learn machine learning through contributing this project, you can try our [good first model][] to help us do some model implementation and migration tasks (rest assured, you only need to complete the call to the Python ecosystem through [Boa][]).
 10 | - Otherwise, discussions on any of our issues are open to everyone, and you are welcome to contribute your ideas.
 11 | 
 12 | ## Submit a patch
 13 | 
 14 | Next, let ’s take a look at how to submit patches to Pipcook.
 15 | 
 16 | ### Requirements
 17 | 
 18 | - macOS / Linux / Windows
 19 | - Node.js >= 12.17 || >= 14.0.0
 20 | 
 21 | ### Download source
 22 | 
 23 | Clone the repository from GitHub:
 24 | 
 25 | ```bash
 26 | $ git clone git@github.com:alibaba/pipcook.git
 27 | ```
 28 | 
 29 | ### Build from source
 30 | 
 31 | And install the requirements and build:
 32 | 
 33 | ```bash
 34 | $ npm install
 35 | $ npm run build
 36 | ```
 37 | 
 38 | We provide a way to use [tuna mirror](https://mirrors.tuna.tsinghua.edu.cn/) for downloading Python and packages:
 39 | 
 40 | ```sh
 41 | $ BOA_TUNA=1 npm install
 42 | ```
 43 | 
 44 | Or you could specify your custom miniconda mirror and Python index page:
 45 | 
 46 | ```sh
 47 | $ export BOA_CONDA_MIRROR=https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda # this is for miniconda
 48 | $ export BOA_CONDA_INDEX=https://pypi.tuna.tsinghua.edu.cn/simple                 # this is for pip
 49 | $ npm install
 50 | ```
 51 | 
 52 | ### Test
 53 | 
 54 | Run all the tests in the following
 55 | 
 56 | ```bash
 57 | $ npm test
 58 | ```
 59 | 
 60 | And run tests for single specific package:
 61 | 
 62 | ```bash
 63 | $ ./node_modules/.bin/lerna run --scope <package_name>
 64 | ```
 65 | 
 66 | ### Pipeline
 67 | 
 68 | ```bash
 69 | $ sh tools/run_pipeline.sh <pipeline_name>
 70 | ```
 71 | 
 72 | The `pipeline_name` is the name of the pipeline file under "test/pipelines", such as:
 73 | 
 74 | - "text-bayes-classification"
 75 | - "mnist-image-classification"
 76 | - "databinding-image-classification"
 77 | 
 78 | ### Database
 79 | 
 80 | If your modification involves `Database` structure, you need to write a migration script under `packages/daemon/src/migrations`.
 81 | For more detail about it, please refer to [Migration](https://sequelize.org/master/manual/migrations.html).
 82 | 
 83 | ### Push and create a pull request
 84 | 
 85 | After the local test is passed, you can push the code and create a pull request:
 86 | 
 87 | ```sh
 88 | $ git push git@github.com:<username>/pipcook.git <feature_branch>
 89 | ```
 90 | 
 91 | ## Internal documentations
 92 | 
 93 | ### Plugin Specification
 94 | 
 95 | You can refer [here](../spec/plugin.md) for Plugin Specification.
 96 | 
 97 | We have defined a set of interfaces for each plugin. Each type of plugin must be implemented strictly according to the interfaces. The detailed information is as follows:
 98 | 
 99 | - [Data Collect](../spec/plugin/0-data-collect.md)
100 | - [Data Access](../spec/plugin/1-data-access.md)
101 | - [Data Process](../spec/plugin/2-data-process.md)
102 | - [Model Load](../spec/plugin/3-model-define.md)
103 | - [Model Train](../spec/plugin/4-model-train.md)
104 | - [Model Evaluate](../spec/plugin/5-model-evaluate.md)
105 | 
106 | ### Dataset Specification
107 | 
108 | For data reading and processing involved in the development, please refer to our [Dataset Specification](../spec/dataset.md).
109 | 
110 | [good first issue]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22
111 | [good first model]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+model%22
112 | [Boa]: https://github.com/alibaba/pipcook/tree/master/packages/boa
113 | 


--------------------------------------------------------------------------------
/docs/faq/index.md:
--------------------------------------------------------------------------------
 1 | # Frequently Asked Questions
 2 | 
 3 | ## Questions about Pipcook framework
 4 | 
 5 | - [Where should I put the Pipcook JSON config file?](./pipcook-framework.md#q1)
 6 | 
 7 | - [Why is it so slow to install Pipcook](./pipcook-framework.md#q2)
 8 | 
 9 | - [Can I use Pipcook in Electron?](./pipcook-framework.md#q3)
10 | 
11 | - [Does Pipcook support Windows platform?](./pipcook-framework.md#q4)
12 | 
13 | ## Questions about Pipcook plugins(v1.x)
14 | 
15 | - [How can I specify which GPU card to use for yolov5 training?](./plugins.md#q1)
16 | 
17 | - [How can I tell plugins not to use GPU even you have right GPU and cuda environment for yolov5 training?](./plugins.md#q2)
18 | 


--------------------------------------------------------------------------------
/docs/faq/pipcook-framework.md:
--------------------------------------------------------------------------------
 1 | # Frequently Asked Questions
 2 | 
 3 | ## Where should I put the Pipcook JSON config file?
 4 | <a id="q1"></a>
 5 | Pipcook runs daemon behind the scene and provide the service to the user via command line tool or Pipboard. There is no restriction on the current working directory or where you should put your config file. You can run `pipcook run <url>` at any location and url is the path of your config file, which can be both local path or remote url.
 6 | 
 7 | ## Why is it so slow to install Pipcook
 8 | <a id="q2"></a>
 9 | Currently the installation of Pipcook and plugins rely on npm registry and pip(python) registry. Probably these default registries have slow connection to you. You can specify `pipcook init -c <npm client>`, for example, `pipcook init -c cnpm` to change your npm client. Meanwhile, you can use `pipcook init --tuna` to use tuna pip registry.
10 | 
11 | ## Can I use Pipcook in Electron?
12 | <a id="q3"></a>
13 | Thereotically as long as the environment supports Node.js >= 12.17 or >= 14.0.0 and corresponding N-API, you can run Pipcook smoothly. Meanwhile, Pipcook will support to produce WASM model so that you can easily integrate the model to your system.
14 | 
15 | ## Does Pipcook support Windows platform?
16 | <a id="q4"></a>
17 | Not yet. We will support Windows soon.
18 | 


--------------------------------------------------------------------------------
/docs/faq/plugins.md:
--------------------------------------------------------------------------------
 1 | # Frequently Asked Questions(v1.x)
 2 | 
 3 | ### How can I specify which GPU card to use for yolov5 training?
 4 | <a id="q1"></a>
 5 | You can just set the environment variable `$CUDA_VISIBLE_DEVICES` to your GPU card number to achieve this. If this environment variable is not set, plugins will just use GPU:0 for default. Notice that the environment variable should be set at daemon (server) side before running daemon.
 6 | 
 7 | ### How can I tell plugins not to use GPU even you have right GPU and cuda environment for yolov5 training?
 8 | <a id="q2"></a>
 9 | By default, plugins will use GPU if the environment is good for GPU training. If you want to disable this feature, just set `export CUDA_VISIBLE_DEVICES=""` Notice that the environment variable should be set at daemon (server) side before running daemon.
10 | 


--------------------------------------------------------------------------------
/docs/images/community_qrcode.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/images/community_qrcode.jpg


--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/images/logo.png


--------------------------------------------------------------------------------
/docs/images/plugin-script-map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alibaba/pipcook/dcac2cfdee76fa999befefbcee29f325247ef0a2/docs/images/plugin-script-map.png


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>Pipcook, JavaScript application framework for machine learning and its engineering</title>
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
 7 |   <meta name="description" content="Description">
 8 |   <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
 9 |   <link rel="stylesheet" href="//unpkg.com/docsify/lib/themes/vue.css">
10 | </head>
11 | <body>
12 |   <div id="app"></div>
13 |   <script>
14 |     window.$docsify = {
15 |       auto2top: true,
16 |       loadSidebar: true,
17 |       loadNavbar: true,
18 |       mergeNavbar: true,
19 |       relativePath: true,
20 |       maxLevel: 5,
21 |       subMaxLevel: 2,
22 |       name: 'Pipcook',
23 |       repo: 'alibaba/pipcook',
24 |       themeColor: '#e96900',
25 |       noCompileLinks: [
26 |         'typedoc/script/index.html',
27 |         'pipcook/typedoc/script/index.html',
28 |         'typedoc/runtime/index.html',
29 |         'pipcook/typedoc/runtime/index.html'
30 |       ],
31 |       search: {
32 |         noData: {
33 |           '/zh-cn/': '没有结果!',
34 |           '/': 'No results!'
35 |         },
36 |         paths: 'auto',
37 |         placeholder: {
38 |           '/zh-cn/': '搜索',
39 |           '/': 'Search'
40 |         }
41 |       },
42 |       markdown: {
43 |         smartypants: true,
44 |         renderer: {
45 |           link: function(href, title, text) {
46 |             const typedocPrefix = 'https://alibaba.github.io/pipcook/typedoc/';
47 |             if (href.startsWith(typedocPrefix)) {
48 |               const link = href => `<a target="_new" href="${href}">${text}</a>`;
49 |               const pathname = href.replace(typedocPrefix, '');
50 |               if (location.hostname === 'alibaba.github.io') {
51 |                 return link(`/pipcook/typedoc/${pathname}`);
52 |               } else {
53 |                 return link(`/typedoc/${pathname}`);
54 |               }
55 |             }
56 |             return this.origin.link.apply(this, arguments);
57 |           }
58 |         }
59 |       },
60 |     };
61 |   </script>
62 |   <script src="//unpkg.com/docsify/lib/docsify.min.js"></script>
63 |   <script src="//cdn.jsdelivr.net/npm/docsify/lib/plugins/search.min.js"></script>
64 | </body>
65 | </html>
66 | 


--------------------------------------------------------------------------------
/docs/manual/README.md:
--------------------------------------------------------------------------------
1 | # Pipcook Manual
2 | 


--------------------------------------------------------------------------------
/docs/manual/intro-to-framework.md:
--------------------------------------------------------------------------------
  1 | # Pipcook Framework
  2 | 
  3 | Pipcook uses scripts that don't bundle some of the heavier dependencies like `@tensorflow/tfjs`, so how do we use them in our scripts?
  4 | In fact, Pipcook packages these dependencies in a so-called `framework`, which is actually a set of packages related to the platform and node.js version, such as the following pipeline:
  5 | 
  6 | ```json
  7 | {
  8 |   "specVersion": "2.0",
  9 |   "type": "ObjectDetection",
 10 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/object-detection-yolo-min.zip",
 11 |   "dataflow": [
 12 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416"
 13 |   ],
 14 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js",
 15 |   "artifact": [{
 16 |     "processor": "pipcook-artifact-zip@0.0.2",
 17 |     "target": "./object-detection-yolo-model.zip"
 18 |   }],
 19 |   "options": {
 20 |     "framework": "tfjs@3.8",
 21 |     "train": {
 22 |       "epochs": 10,
 23 |       "gpu": true
 24 |     }
 25 |   }
 26 | }
 27 | ```
 28 | 
 29 | This pipeline uses the framework `tfjs@3.8`, which contains `@tensorflow/tfjs-node@3.8`, `@tensorflow/tfjs-node-gpu@3.8`. Note that the script does not need to reference these two packages, but rather `@tensorflow/tfjs`. When the framework loads, it determines whether `@tensorflow/tfjs-node-gpu@3.8` needs to be set to backend based on the `train.gpu` option. Since macOS does not support CUDA, the framework also smoothes out platform differences: `train.gpu` will not take effect on macOS systems. This capability is implemented by initialization scripts in the framework, which involves the framework's structure: each framework contains a framework description file, a framework initialization script, and several dependency folders. As an example, the `tfjs@3.8` package has the following directory structure:
 30 | 
 31 | 
 32 | ```sh
 33 | ├── framework.json
 34 | ├── index.js
 35 | └── node_modules
 36 | ```
 37 | 
 38 | Where `framework.json` is the framework's description file, `index.js` is the framework's initialization script, and `node_modules` contains the dependency folder that the framework will provide.
 39 | Content of `framework.json` is as follows:
 40 | 
 41 | ```json
 42 | {
 43 |   "name": "tfjs",
 44 |   "version": "3.8",
 45 |   "packages": [
 46 |     {
 47 |       "name": "@tensorflow/tfjs-node",
 48 |       "version": "3.8.0",
 49 |       "type": "js"
 50 |     },
 51 |     {
 52 |       "name": "@tensorflow/tfjs-node-gpu",
 53 |       "version": "3.8.0",
 54 |       "type": "js"
 55 |     }
 56 |   ]
 57 | }
 58 | ```
 59 | 
 60 | The initialization script exports an initialization function that will be called each time the pipeline runs to the framework initialization phase, passing in the `options` field of the pipeline file, as in the following example:
 61 | 
 62 | ```js
 63 | const os = require('os');
 64 | 
 65 | module.exports = {
 66 |   initialize(opts) {
 67 |     if (
 68 |       opts && opts.train
 69 |       && (
 70 |         opts.train.gpu === 'true'
 71 |         || opts.train.gpu === true
 72 |       )
 73 |     ) {
 74 |       if (os.platform() !== 'darwin') {
 75 |         require('@tensorflow/tfjs-node-gpu');
 76 |         console.log('gpu enabled');
 77 |       } else {
 78 |         require('@tensorflow/tfjs-node');
 79 |         console.warn('platform darwin does not support gpu');
 80 |       }
 81 |     } else {
 82 |       require('@tensorflow/tfjs-node');
 83 |       console.log('gpu disabled');
 84 |     }
 85 |   }
 86 | }
 87 | ```
 88 | 
 89 | In addition, the backend of `tfjs` has different binary libraries for different platforms, so Pipcook will choose to download different packages depending on the environment, for example, on macOS, node.js v12.22, the actual framework file downloaded is `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node12-py37/tfjs%403.8-darwin-x64-v8.zip`, while on linux, node.js v14.0, it will download `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node14-py37/tfjs%403.8-linux-x64-v6.zip`. Of course, if the script dependes on a custom framework, you can also use it directly by filling in the `framework` option with the full url, or by creating your own framework mirror and specifying the framework mirror address with the `-m` argument of the `trian`, `predict`, `serve` commands.
 90 | A complete framework mirror directory structure is as follows:
 91 | 
 92 | 
 93 | Translated with www.DeepL.com/Translator (free version)
 94 | ```sh
 95 | ├── node14-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip
 96 | └── node12-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip
 97 | ```
 98 | 
 99 | The `py37` in the path is the version of python supported by the referenced `BOA`, which currently only supports v3.7.
100 | 


--------------------------------------------------------------------------------
/docs/manual/intro-to-pipeline.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Pipeline
 2 | 
 3 | In Pipcook, we use Pipeline to represent the training process of a model, so in general, what kind of pipeline is needed to train a model? The developer can use a JSON to describe pipeline of modeling from sample collection, model definition, training to model evaluation:
 4 | 
 5 | ```js
 6 | {
 7 |   "specVersion": "2.0",
 8 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/datasource.js?url=http://ai-sample.oss-cn-hangzhou.aliyuncs.com/image_classification/datasets/imageclass-test.zip",
 9 |   "dataflow": [
10 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/dataflow.js?size=224&size=224"
11 |   ],
12 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/model.js",
13 |   "artifact": [{
14 |     "processor": "pipcook-artifact-zip@0.0.2",
15 |     "target": "/tmp/mobilenet-model.zip"
16 |   }],
17 |   "options": {
18 |     "framework": "tfjs@3.8",
19 |     "train": {
20 |       "epochs": 20,
21 |       "validationRequired": true
22 |     }
23 |   }
24 | }
25 | ```
26 | 
27 | As shown above, a Pipeline consists of three types of Script, `dataSource`, `dataflow` and `model`, as well as the build plugin `artifacts`, and the Pipeline options `options`.
28 | Each Pipcook script passes parameters via a URI query, and the parameters of the model script can also be defined via `options.train`.
29 | `artifacts` defines a set of build scripts, each of which will be called in turn after the training is completed, allowing the output model to be transformed, packaged, deployed, etc.
30 | `options` contains the framework definition and the definition of training parameters.
31 | Then, Pipcook prepares the environment, runs the Script, and finally outputs and processes the model based on the URIs and parameters defined in this JSON file.
32 | 
33 | > See [Introduction to Script](./intro-to-script.md) for more details about Pipcook script.
34 | 
35 | > The script of a pipeline supports `http`, `https` and `file` protocol.
36 | 
37 | Next, when we have defined such a pipeline, we can run it through Pipcook.
38 | 
39 | ## Preparation
40 | 
41 | Follow the [Pipcook Tools Installation](./pipcook-tools.md) to get the Pipcook ready.
42 | 
43 | ## Run Pipeline
44 | 
45 | Save the above JSON of your pipeline in the disk, and run:
46 | 
47 | ```sh
48 | $ pipcook run /path/to/your/pipeline-config.json
49 | ```
50 | 
51 | Or serve it on static source server:
52 | 
53 | ```sh
54 | $ pipcook run https://host/path/to/your/pipeline-config.json
55 | ```
56 | 
57 | After execution, the trained model files are generated in a folder named with the current timestamp under the current [working directory](https://linux.die.net/man/3/cwd), and the model files are compressed by the build plugin `pipcook-artifact-zip` into a zip file and saved in the tmp directory.
58 | 
59 | ```
60 |   ├── pipeline-config.json
61 |   ├── cache
62 |   ├── data
63 |   ├── framework
64 |   ├── model
65 |   └── scripts
66 | ```
67 | 
68 | The directory named model holds the model files, and the ability to use the model will be added in the next release.
69 | 


--------------------------------------------------------------------------------
/docs/manual/intro-to-script.md:
--------------------------------------------------------------------------------
 1 | # Pipcook Scripts
 2 | 
 3 | In Pipcook, each Pipeline represents a specific machine learning task, so how do we define a workflow? Pipcook uses scripts to define and configure the different phases in a Pipeline. A Pipcook script is a js script file that exports a specific method and contains 3 different types: `datasource`, `dataflow` and `model`, as defined [here] (... /spec/script.md). A text classification task, for example, could be composed with the following script.
 4 | 
 5 | - `datasource` The datasource script is used to download the sample data and provide the data access interface.
 6 | - `dataflow` converts the format of the downloaded dataset to a format acceptable to the model that follows (not needed in this example).
 7 | - `model` Define the model for text classification, [plain Bayesian classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier), obtain samples for model training and evaluate accuracy through the sample data interface.
 8 | 
 9 | > The source code of the above Pipeline is defined in [here](https://github.com/alibaba/pipcook/blob/main/example/pipelines/text-classification-bayes.json).
10 | 
11 | With the above example, for a text classifier task, we follow a machine learning workflow which executes in order of different types of subtasks, and each subtask corresponds to a user-defined plug-in, while the user can also quickly tune the Pipeline for the whole task at a lower cost.
12 | 
13 | > The available official scripts are [here](https://github.com/imgcook/pipcook-script).
14 | 


--------------------------------------------------------------------------------
/docs/manual/pipcook-models.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Pipcook models
 2 | 
 3 | Pipcook now supports two types (Node.js & wasm) of model. In this manual, we will dive into these two types of models and show users how to use them.
 4 | 
 5 | ## nodejs
 6 | 
 7 | ### Background
 8 | 
 9 | Node.js models are powered by boa, a python-js bridge that allows users to directly run python module with JavaScript syntax.
10 | 
11 | The common folder structure for such model is like:
12 | ```
13 | ├── boapkg.js
14 | ├── index.js
15 | ├── metadata.json
16 | ├── model
17 | └── package.json
18 | ```
19 | 
20 | The black magic here is to use boa to connect JavaScript and python. This will allow users to use the flourish python eco-system and powerful pc serving as backend in Nodejs.
21 | 
22 | But the trade-off is a heavy runtime and long installation time.
23 | 
24 | ### How to use
25 | 
26 | To use the Node.js model, the following steps are needed:
27 | 
28 | ```bash
29 | $ cd output/nodejs
30 | $ npm install # To install deps
31 | ```
32 | 
33 | Then just treat the `output/nodejs` as an npm package with `predict` function. You can include it in any Nodejs runtime. And use the following code to call the model:
34 | ```js
35 | const model = require('./nodejs/index');
36 | const result = await model.predict([1, 2, 3, 4]);
37 | ```
38 | 
39 | ## WASM
40 | 
41 | ### Background
42 | 
43 | To give a more portable and user-friendly model solution, Pipcook uses [TVM](https://tvm.apache.org/docs) to compile a given model to wasm format. In this way, the model can run in both browser and Nodejs natively. However, since the standard for webGPU is not stable yet, Pipcook does not target the compiled model to GPU yet. In another word, **WASM format only works for CPU right now**.
44 | 
45 | THe generated folder structure looks like:
46 | 
47 | ```
48 | ├── browser.js
49 | ├── node.js
50 | ├── model.wasi.js
51 | ├── model.wasi.wasm
52 | ├── modelDesc.json
53 | ├── modelParams.parmas
54 | ├── modelSpec.json
55 | └── tvmjs.bundle.js
56 | ```
57 | 
58 | ### How to use
59 | 
60 | The entry files are `browser.js` and `node.js`, as the name suggests, they are prepared for the browser environment and Nodejs environment.
61 | To run the model, users just need to include the corresponding entry file and call the `predict` function. 
62 | 
63 | Node.js:
64 | ```js
65 | const model = require('./node');
66 | const data = [0, 1, 2, 3]; // Mock data, the real data layout depends on model's define
67 | const res = model.predict(data); // return type is Float32Array
68 | ```
69 | 
70 | Browser:
71 | ```js
72 | const model = require('./browser.js');
73 | const data = [0, 1, 2, 3]; // Mock data, the real data layout depends on model's define
74 | const res = model.predict(data); // return type is Float32Array
75 | ```
76 | 


--------------------------------------------------------------------------------
/docs/manual/pipcook-tools.md:
--------------------------------------------------------------------------------
 1 | # Pipcook Tools
 2 | 
 3 | Pipcook Tools is a command-line tool provided by Pipcook for developers. It can help you manage your pipelines.
 4 | 
 5 | ## Installation
 6 | 
 7 | ```sh
 8 | $ npm install @pipcook/cli -g
 9 | ```
10 | 
11 | Follow [Install](../INSTALL.md) for other installation guide.
12 | 
13 | 
14 | ## User's Guide
15 | 
16 | To run a Pipeline from a URI, simply execute the following command.
17 | 
18 | ```sh
19 | $ pipcook run protocal://location/to/your/pipeline-config.json
20 | ```
21 | 
22 | The supported pipeline file protocols are: `http:`, `https:`, `file:`. `file:` is the default protocol if not defined.
23 | 
24 | More run options can be obtained with the following command:
25 | 
26 | ```sh
27 | $ pipcook run --help
28 | ```
29 | 
30 | > For more information on writing a pipeline, please see [here](./intro-to-pipeline.md).
31 | 
32 | ## Cache Manage
33 | 
34 | When Pipeline run with the `pipcook run` command, if the script or framework is a non-file protocol, it will be
35 | saved to a cache directory to speed up the next run.
36 | 
37 | If you want to remove these cache files manually, you can execute the following:
38 | ```sh
39 | $ pipcook clean
40 | ```
41 | 


--------------------------------------------------------------------------------
/docs/rfcs/0001-framework-migration.md:
--------------------------------------------------------------------------------
  1 | - Start Date: 2020-11-25
  2 | - Target Major Version: 2.0
  3 | - Reference Issues: (leave this empty)
  4 | - Implementation PR: (leave this empty)
  5 | - Author: @FeelyChau @SirM2z
  6 | 
  7 | # Summary
  8 | 
  9 | We have accumulated a lot of debt in the iteration process in 1.x, and hope to solve these problems in 2.0.
 10 | 
 11 | # Test
 12 | 
 13 | The running cost of unit test and integration test is very different, but 1.x does not classify the tests well, which leads to the mixing of unit test and integration test, which will reduce the efficiency of continuous integration and cause trouble to test coding. In 2.0, we will make a clear division between unit testing and integration testing.
 14 | 
 15 | ## Unit test
 16 | 
 17 | Unit test is the test of functions. Before writing the unit test code for an function, we should make clear the input and output boundary of the function to be tested and the exception handling method, and then we should cover the boundary pertinently and run it in a low-cost way so as to verify the change at any time.
 18 | 
 19 | In this way, we can ensure that the function works normally according to the design expectation. We have written many unit tests in 1.x, but there are still some problems as follows:
 20 | 
 21 | * **insufficient coverage**: `cli` is not covered, the coverage rate in other projects is 88%
 22 | * **case design is not enough**: there is no complete test design each unit
 23 | * **too many test frameworks**: `boa` uses `Tape`, `daemon` uses `Mocha`, and other projects use `Jasmine`
 24 | 
 25 | It is necessary to solve the above problems. The specific objectives and measures:
 26 | 
 27 | * the coverage rate of single test should be increased to more than **95%**
 28 | * the boundary of function input and output should be defined, tested and covered, the coverage of  `cli`  shoud be enabled
 29 | * test framework should be unified as `Ava`. Unit test cases should be as free of IO and side effects as possible, the parallel running mechanism of AVA will force us to write more efficient test codes
 30 | 
 31 | Switching the test framework to `Ava` will bring some refactoring work:
 32 | 
 33 | ```js
 34 | // ava
 35 | import test from 'ava';
 36 | import * as fs from 'fs-extra';
 37 | import * as sinon from 'sinon';
 38 | import { shuffle } from './public';
 39 | 
 40 | test('array shuffle', (t) => {
 41 |   const array = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ];
 42 |   shuffle(array);
 43 |   t.notDeepEqual(array, [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]);
 44 |   t.deepEqual(array.sort(), [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]);
 45 | });
 46 | 
 47 | // serial hook
 48 | test.serial.afterEach(() => {
 49 |   sinon.restore();
 50 | });
 51 | 
 52 | // serial case
 53 | test.serial('test a', (t) => {
 54 |   const fsReadJsonMock = sinon.stub(fs, 'readJson').resolves({});
 55 |   t.deepEqual(await fs.readJson('mockFileName.json'), {});
 56 | });
 57 | 
 58 | test.serial('test b', (t) => {
 59 |   const fsReadJsonMock = sinon.stub(fs, 'readJson').resolves(undefined);
 60 |   t.is(await fs.readJson('mockFileName.json'), undefined);
 61 | });
 62 | 
 63 | test.todo('some todo cases');
 64 | ```
 65 | 
 66 | ```js
 67 | // jasmine
 68 | import { shuffle } from './public';
 69 | 
 70 | describe('public utils', () => {
 71 |   it('test if the array is shuffled', () => {
 72 |     const array = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ];
 73 |     shuffle(array);
 74 |     expect(array).not.toEqual([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]);
 75 |     expect(array.sort()).toEqual([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]);
 76 |   });
 77 | }
 78 | ```
 79 | 
 80 | ## Integration test
 81 | 
 82 | The integration test of 1.x depends on the workflow configuration file. In a strict sense, this is not a rigorous integration testing method and hard to maintain. In addition, we do not assert any of the running processes except exit code. This situation will be improved in 2.0 as follows:
 83 | 
 84 | * define integration test cases
 85 | * use or develop an integration test framework suitable for Pipbook
 86 | * extend the run environment
 87 | 
 88 | # Framework migration for Daemon
 89 | 
 90 | The framework of daemon will be moved from the original framework to `loopback 4` for the following reasons:
 91 | 
 92 | * automatic code generation capability
 93 | * framework maturity: the version of typescript cannot be configured in the original framework, resulting in the overall size of the pipbook package unable to be trimmed, and the test framework is constrained and cannot be switched
 94 | * i18n
 95 | 
 96 | # Framework migration for Pipboard
 97 | 
 98 | The pipboard UI library will be migrated from the icejs to the [ant.design](https://ant.design/) for the following reasons:
 99 | - The icejs has no English documentation, so it is not easy to foreign contributors
100 | - Compared with the familiarity of community contributors, the [ant.design](https://ant.design/) library is more widely used
101 | - Reduce the complexity of the framework to make it easy to maintain. The icejs contains some complex functions that are unnecessary for pipboard, such as MPA (Multi-page application), SSR (Server-side rendering), Permission routing, etc
102 | 
103 | And the packaging tools will be migrated from `webpack` to `parcel`. Mainly compared the configuration complexity, parcel is lower than webpack, easy to use.
104 | 


--------------------------------------------------------------------------------
/docs/spec/dataset.md:
--------------------------------------------------------------------------------
 1 | # Dataset Specification
 2 | 
 3 | Dataset is an important part of machine learning. Subsequent models are built based on datasets. We need to manage datasets. The following is the standard format of the dataset that Pipcook should save after the data is collected through the `datasource` script.
 4 | 
 5 | For different dataset formats, `datasource` script is used to smooth the differences.
 6 | 
 7 | #### Image
 8 | 
 9 | PascalVOC Dataset format, the detailed directory is as follows:
10 | 
11 | ```
12 | 📂dataset
13 |    ┣ 📂annotations
14 |    ┃ ┣ 📂train
15 |    ┃ ┃ ┣ 📜...
16 |    ┃ ┃ ┗ 📜${image_name}.xml
17 |    ┃ ┣ 📂test
18 |    ┃ ┗ 📂validation
19 |    ┗ 📂images
20 |      ┣ 📜...
21 |      ┗ 📜${image_name}.jpg
22 | ```
23 | 
24 | Or representing in XML:
25 | 
26 | ```xml
27 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
28 | <annotation>
29 |   <folder>folder path</folder>
30 |   <filename>image name</filename>
31 |   <size>
32 |     <width>width</width>
33 |     <height>height</height>
34 |   </size>
35 |   <object>
36 |     <name>category name</name>
37 |     <bndbox> <!--this is not necessary for image classification problem-->
38 |       <xmin>left</xmin>
39 |       <ymin>top</ymin>
40 |       <xmax>right</xmax>
41 |       <ymax>bottom</ymax>
42 |     </bndbox>
43 |   </object>
44 | </annotation>
45 | ```
46 | 
47 | #### Text
48 | 
49 | The text category should be a CSV file. The first column is the text content, and the second column is the category name. The delimiter is ',' without a header.
50 | 
51 | ```csv
52 | name, category
53 | prod1, type1
54 | prod2, type2
55 | prod3, type2
56 | prod4, type1
57 | ```
58 | 


--------------------------------------------------------------------------------
/docs/spec/script.md:
--------------------------------------------------------------------------------
 1 | # Script Specification
 2 | 
 3 | [Pipcook][] uses scripts to achieve tasks in a specific machine learning lifecycle, which ensures that the framework is simple, stable, and efficient enough.
 4 | 
 5 | At the same time, through a set of script specifications defined by [Pipcook][], we can also allow anyone to develop scripts, which ensures the scalability of [Pipcook][]. Theoretically, through scripts, we can achieve all kinds of the machine learning task.
 6 | 
 7 | 
 8 | ## Script Category
 9 | 
10 | We have defined the following script categories for the machine learning lifecycle.
11 | 
12 | - datasource: [`DatasourceEntry<SAMPLE, META>: (options: Record<string, any>, context: ScriptContext) => Promise<DatasetPool<SAMPLE, META>>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#datasourceentry) Download data from data sources and provide data access interfaces.
13 | - dataflow: [`DataflowEntry<IN, IN_META, OUT, OUT_META>: (api: DatasetPool<IN, IN_META>, options: Record<string, any>, context: ScriptContext) => Promise<DatasetPool<OUT, OUT_META>>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#dataflowentry) Get the data from the datasource, process it and let the next dataflow script or model script get the processed data by returning the data access interface.
14 | - model: [`{ train: ModelEntry<SAMPLE, META>, predict: PredictEntry<SAMPLE, META> }`](https://alibaba.github.io/pipcook/typedoc/script/interfaces/extmodelentry.html) Get sample data from dataflow or datasource scripts, train, validate, and output the model file. And predict from the input.
15 | 
16 | ## Developing
17 | 
18 | Check [this contributing documentation](../contributing/contribute-a-script.md) for learning how to develop a new script.
19 | 
20 | [Pipcook]: https://github.com/alibaba/pipcook
21 | [Pipcook Script]: ../../GLOSSORY.md#pipcook-script
22 | [Pipcook Tools]: ../../manual/pipcook-tools.md
23 | [PyPI]: https://pypi.org
24 | 


--------------------------------------------------------------------------------
/docs/zh-cn/GLOSSORY.md:
--------------------------------------------------------------------------------
 1 | # 术语表
 2 | 
 3 | 术语表用于表达 Pipcook 中的若干的默认、隐式约定，同时为用户和贡献者提供参考。它旨在描述概念，并详细说明其相应的 API 或至文档的其他相关部分。 通过从接口文档或用户手册中的链接跳转过来并查阅术语含义，这样可以最大程度地减少阅读中的不一致。
 4 | 
 5 | ### Pipboard
 6 | 
 7 | Pipcook Board 的缩写，在 Pipboard 上，您可以通过 Web 来管理你的 Pipeline 和插件。
 8 | 
 9 | ### Pipcook
10 | 
11 | 指 Pipcook 项目，一般来说指向 GitHub(https://github.com/alibaba/pipcook) 地址。
12 | 
13 | ### Pipcook Daemon
14 | 
15 | 用于管道 Pipeline 和执行，它通过 HTTP 提供对 [Pipcook Tools][] 和 [Pipboard][] 的远程访问，同时也支持其他客户端通过 HTTP 集成Pipcook Daemon。
16 | 
17 | ### Pipcook script
18 | 
19 | 在 Pipeline 中，脚本就像乐高积木，用户选择不同的脚本，就可以快速完成不同的 Pipeline 的搭建，并训练出不同的模型。
20 | 
21 | ### Pipcook Tools
22 | 
23 | Pipcook 命令行工具的简称，通过 `npm install -g @pipcook/cli` 安装。
24 | 
25 | ### Pipeline
26 | 
27 | 在计算机系统中，Pipeline（也称为数据流）是一组串联连接的数据处理节点，其中一个节点的输出是下一个节点的输入。 Pipeline 的节点通常以并行或按时间分割的方式执行。
28 | 
29 | [Pipcook Tools]: #pipcook-tools
30 | [Pipboard]: #pipboard
31 | 


--------------------------------------------------------------------------------
/docs/zh-cn/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # 安装
 2 | 
 3 | 现在有以下两种不同的方式来安装 [Pipcook][]：
 4 | 
 5 | - [通过 NPM 安装][] 对于大多数用户来说，这是最好的方法。它将提供一个稳定的版本，并且预编译的软件包可用于大多数平台。
 6 | - [通过源码安装][] 这是最适合需要最新和最强大功能而又不怕运行全新代码的用户，希望为该项目做出贡献的开发者也需要这样做。
 7 | 
 8 | 在开始安装之前，需要保证下面的环境：
 9 | 
10 | - macOS，Linux，Windows
11 | - Node.js 12.17 以上 / 14.0.0 以上
12 | 
13 | ## 通过 NPM 安装
14 | 
15 | 安装 [Pipcook][] 只需运行下面的命令即可：
16 | 
17 | ```sh
18 | $ npm install -g @pipcook/cli
19 | ```
20 | 
21 | 然后通过 `pipcook --help` 来检查安装是否成功。
22 | 
23 | ## 通过 Docker 安装
24 | 
25 | 我们提供了阿里源 docker 镜像，您可以运行如下命令安装
26 | 
27 | ```sh
28 | $ docker pull registry.cn-beijing.aliyuncs.com/pipcook/pipcook:latest
29 | ```
30 | 
31 | 安装完成之后，可以运行如下命令启动 docker
32 | 
33 | ```sh
34 | $ docker run -it registry.cn-beijing.aliyuncs.com/pipcook/pipcook:latest /bin/bash
35 | ```
36 | 
37 | ## 疑难排查
38 | 
39 | 如果你有任何安装方面的问题，请反馈到我们的 [issue tracker](https://github.com/alibaba/pipcook/issues/new)。
40 | 
41 | [通过 NPM 安装]: #通过-NPM-安装
42 | [通过源码安装]: contributing/guide-to-contributor#download-source
43 | [Pipcook]: https://github.com/alibaba/pipcook
44 | 


--------------------------------------------------------------------------------
/docs/zh-cn/README.md:
--------------------------------------------------------------------------------
 1 | # Pipcook
 2 | 
 3 | [Pipcook][] 项目是一个开源工具集，它能让 Web 开发者更好地使用机器学习，从而开启和加速前端智能化时代！
 4 | 
 5 | ## 用法
 6 | 使用 Pipcook 进行机器学习开发非常简单，只需 4 步：安装、训练、测试和部署。
 7 | 
 8 | 安装 Pipcook-cli:
 9 | 
10 | ```sh
11 | $ npm install -g @pipcook/cli
12 | ```
13 | 
14 | 从[内置 pipeline](https://github.com/alibaba/pipcook/tree/main/example/pipelines) 选择一个进行训练，比如图片分类：
15 | 
16 | ```sh
17 | $ pipcook train https://cdn.jsdelivr.net/gh/alibaba/pipcook@main/example/pipelines/image-classification-mobilenet.json -o output
18 | ```
19 | 
20 | Pipeline 中指定的数据集包含了两类图片，分别是 `avatar` 和 `blurBackground`.
21 | 在训练结束之后，我们可以使用训练结果进行预测：
22 | 
23 | ```sh
24 | $ pipcook predict ./output/image-classification-mobilenet.json -s ./output/data/test/blurBackground/4572_58__1500.94_453.jpg
25 | Origin result:[{"id":1,"category":"blurBackground","score":1}]
26 | ```
27 | 
28 | 模型的输入是一张验证数据集的图片，分类是 `blurBackground`, 预测的结果显示当前输入的图片分类为 `blurBackground`，可信度为 1。
29 | 
30 | 想要部署服务？
31 | 
32 | ```sh
33 | $ pipcook serve ./output
34 | ℹ preparing framework
35 | ℹ preparing scripts
36 | ℹ preparing artifact plugins
37 | ℹ initializing framework packages
38 | Pipcook has served at: http://localhost:9091
39 | ```
40 | 
41 | 接下来，打开浏览器并访问 `http://localhost:9091` 就可以访问到你的图片分类服务了。
42 | 
43 | ## 为什么要开发 Pipcook
44 | 
45 | 它旨在使 Web 工程师能够在零门槛的前提下使用机器学习，并拥有将前端技术领域带到智能领域的视角。[Pipcook][] 的目标就是成为机器学习和前端交互的跨领域工具包。
46 | 
47 | 我们将完全基于前端应用程序来设计 Pipcook API，并专注于前端领域，以真实解决 Web 工程师使用机器学习的痛点来开发 Pipcook。以面向 Web 友好为原则，来推动机器学习工程和前端工程师的融合。
48 | 
49 | ## 简单的介绍
50 | 
51 | [Pipcook][] 项目提供了若干独立的子项目，包括机器学习工作流框架，命令行管理工具，机器学习的 JavaScript 运行时。你也可以在其他项目中使用这些框架来搭建你所需要的系统。
52 | 
53 | ### 设计原则
54 | 
55 | 在 [Pipcook][] 中，我们遵循一些基本的设计原则，来保证整个软件是模块化和灵活的，这些原则也能帮助社区来对 [Pipcook][] 未来的方向作出指导。
56 | 
57 | - **模块化** 项目中包含了一些子项目，它们自身都必须保证是良好定义的。
58 | - **可更换** 项目中包含了足够的模块来构建现在的 Pipcook，不过我们通过模块化的架构和规范，开发者可以按照自己的需要对部分模块切换为其他的实现方式。
59 | 
60 | ### 受众
61 | 
62 | [Pipcook][] 面向以下的 Web 工程师：
63 | 
64 | - 想要学习机器学习
65 | - 想要训练和部署自己的模型
66 | - 想要优化模型的性能，比如针对一个图片分类模型，有一个更高的准确度
67 | 
68 | > 如果你满足上面条件之一，那么就尝试从[安装](INSTALL.md)开始吧。
69 | 
70 | ### 子项目
71 | 
72 | __Pipcook Pipeline__
73 | 
74 | 它用于表达机器学习的工作流，其中包含了 Pipcook Script，在这一层，我们需要保证整个系统的稳定性和拓展性，同时使用[Script](manual/intro-to-script.md)机制来支持丰富的数据源、数据流、训练和验证。
75 | 
76 | 一条 Pipcook Pipeline 由多个 script 组成，通过配置不同的插件以及参数，最终会输出一个目录，其中包含了训练好的模型。
77 | 
78 | __Pipcook Bridge to Python__
79 | 
80 | 对于 JavaScript 工程师来说，开始机器学习最苦难的一点就是缺乏一套成熟的工具集。在 Pipcook，我们提供了 **Boa**，它使用 N-API 将 [CPython][] 集成在了 Node.js 环境，从而让开发者能够通过 JavaScript 访问到 Python 生态来解决这个痛点。
81 | 
82 | 通过它，开发者可以毫无顾虑地在 Node.js 中使用诸如 `numpy`、`scikit-learn`、`jieba` 或 `tensorflow` 这样的 Python 包。
83 | 
84 | ## 下一步
85 | 
86 | 看到这里，已经按耐不住想要使用 [Pipcook][] 了吗？可以按照下面的介绍开始你下一步的学习之旅：
87 | 
88 | - [如何安装](INSTALL.md)
89 | - [什么是机器学习](tutorials/machine-learning-overview.md)
90 | - [如何使用 Pipeline](manual/intro-to-pipeline.md)
91 | - [如何使用 Boa](manual/intro-to-boa.md)
92 | - [如何使用 Pipcook Tools](manual/pipcook-tools.md)
93 | 
94 | [Pipcook]: https://github.com/alibaba/pipcook
95 | [CPython]: https://github.com/python/cpython
96 | 


--------------------------------------------------------------------------------
/docs/zh-cn/_navbar.md:
--------------------------------------------------------------------------------
1 | - API文档
2 |   - [Runtime](typedoc/runtime/index.html)
3 |   - [Script](typedoc/script/index.html)
4 | - 选择语言
5 |   - [English](/)
6 |   - [中文](/zh-cn/)
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/zh-cn/_sidebar.md:
--------------------------------------------------------------------------------
 1 | - [什么是 Pipcook](/zh-cn/README.md)
 2 | - [安装](/zh-cn/INSTALL.md)
 3 | - 用户手册
 4 |   - [Pipeline](/zh-cn/manual/intro-to-pipeline.md)
 5 |   - [脚本](/zh-cn/manual/intro-to-script.md)
 6 |   - [Boa 使用指南](/zh-cn/manual/intro-to-boa.md)
 7 |   - [命令行工具](/zh-cn/manual/pipcook-tools.md)
 8 | - 教程
 9 |   - [开始机器学习](/zh-cn/tutorials/machine-learning-overview.md)
10 |   - [在 Node.js 中使用 Python](/zh-cn/tutorials/using-python-functions-in-nodejs.md)
11 |   - [分类图片中的前端组件](/zh-cn/tutorials/component-image-classification.md)
12 | - 如何贡献
13 |   - [贡献者指南](/zh-cn/contributing/guide-to-contributor.md)
14 |   - [维护者指南](/zh-cn/contributing/guide-to-collaborator.md)
15 |   - [贡献脚本](/zh-cn/contributing/contribute-a-script.md)
16 | - 规范
17 |   - [脚本](/zh-cn/spec/script.md)
18 |   - [数据集](/zh-cn/spec/dataset.md)
19 | - [FAQ](/zh-cn/faq/index.md)
20 |   - [pipcook 框架](/zh-cn/faq/pipcook-framework.md)
21 |   - [脚本](/zh-cn/faq/scripts.md)
22 | - [术语表](/zh-cn/GLOSSORY.md)
23 | 


--------------------------------------------------------------------------------
/docs/zh-cn/contributing/guide-to-contributor.md:
--------------------------------------------------------------------------------
  1 | # Contributor Guide
  2 | 
  3 | Pipcook is a community-driven open source project. We do our best to hope that every bug fixed, new feature, and how this project evolves is visible and transparent to everyone in this Community.
  4 | 
  5 | Therefore, we believe that from source code to our documentation are more friendly to contributors, so in order to make it easier for contributors to participate in Pipcook, some paths have been developed. If you want to get involved, you can follow it.
  6 | 
  7 | - If you are going to browse source code only, goto [GitHub](https://github.com/alibaba/pipcook).
  8 | - If you are a rookie and no experience in contributing to any open source project, then we have organized [good first issue][] for you, all of which are relatively simple tasks, easy to start with.
  9 | - If you want to learn machine learning through contributing this project, you can try our [good first model][] to help us do some model implementation and migration tasks (rest assured, you only need to complete the call to the Python ecosystem through [Boa][]).
 10 | - Otherwise, discussions on any of our issues are open to everyone, and you are welcome to contribute your ideas.
 11 | 
 12 | ## Submit a patch
 13 | 
 14 | Next, let ’s take a look at how to submit patches to Pipcook.
 15 | 
 16 | ### Requirements
 17 | 
 18 | - macOS / Linux / Windows
 19 | - Node.js >= 12
 20 | 
 21 | ### Download source
 22 | 
 23 | Clone the repository from GitHub:
 24 | 
 25 | ```bash
 26 | $ git clone git@github.com:alibaba/pipcook.git
 27 | ```
 28 | 
 29 | ### Build from source
 30 | 
 31 | And install the requirements and build:
 32 | 
 33 | ```bash
 34 | $ npm install
 35 | $ npm run build
 36 | ```
 37 | 
 38 | We provide a way to use [tuna mirror](https://mirrors.tuna.tsinghua.edu.cn/) for downloading Python and packages:
 39 | 
 40 | ```sh
 41 | $ BOA_TUNA=1 npm install
 42 | ```
 43 | 
 44 | Or you could specify your custom miniconda mirror and Python index page:
 45 | 
 46 | ```sh
 47 | $ export BOA_CONDA_MIRROR=https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda # this is for miniconda
 48 | $ export BOA_CONDA_INDEX=https://pypi.tuna.tsinghua.edu.cn/simple                 # this is for pip
 49 | $ npm install
 50 | ```
 51 | 
 52 | ### Test
 53 | 
 54 | Run all the tests in the following
 55 | 
 56 | ```bash
 57 | $ npm test
 58 | ```
 59 | 
 60 | And run tests for single specific package:
 61 | 
 62 | ```bash
 63 | $ ./node_modules/.bin/lerna run --scope <package_name>
 64 | ```
 65 | 
 66 | ### Pipeline
 67 | 
 68 | ```bash
 69 | $ sh tools/run_pipeline.sh <pipeline_name>
 70 | ```
 71 | 
 72 | The `pipeline_name` is the name of the pipeline file under "test/pipelines", such as:
 73 | 
 74 | - "text-bayes-classification"
 75 | - "mnist-image-classification"
 76 | - "databinding-image-classification"
 77 | 
 78 | ### Push and create a pull request
 79 | 
 80 | After the local test is passed, you can push the code and create a pull request:
 81 | 
 82 | ```sh
 83 | $ git push git@github.com:<username>/pipcook.git <feature_branch>
 84 | ```
 85 | 
 86 | ## Internal documentations
 87 | 
 88 | ### Plugin Specification
 89 | 
 90 | You can refer [here](../spec/plugin.md) for Plugin Specification.
 91 | 
 92 | We have defined a set of interfaces for each plugin. Each type of plugin must be implemented strictly according to the interfaces. The detailed information is as follows:
 93 | 
 94 | - [Data Collect](../spec/plugin/0-data-collect.md)
 95 | - [Data Access](../spec/plugin/1-data-access.md)
 96 | - [Data Process](../spec/plugin/2-data-process.md)
 97 | - [Model Load](../spec/plugin/3-model-define.md)
 98 | - [Model Train](../spec/plugin/4-model-train.md)
 99 | - [Model Evaluate](../spec/plugin/5-model-evaluate.md)
100 | 
101 | ### Dataset Specification
102 | 
103 | For data reading and processing involved in the development, please refer to our [Dataset Specification](../spec/dataset.md).
104 | 
105 | [good first issue]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22
106 | [good first model]: https://github.com/alibaba/pipcook/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+model%22
107 | [Boa]: https://github.com/alibaba/pipcook/tree/master/packages/boa
108 | 


--------------------------------------------------------------------------------
/docs/zh-cn/faq/index.md:
--------------------------------------------------------------------------------
 1 | # 常见问题
 2 | 
 3 | ## Pipcook 核心功能相关
 4 | 
 5 | - [我应该在哪里放置我的 pipeline 的配置文件?](./pipcook-framework.md#q1)
 6 | 
 7 | - [为什么 Pipcook 安装非常缓慢？](./pipcook-framework.md#q2)
 8 | 
 9 | - [我可以在 Electron 里面使用 Pipcook 吗？](./pipcook-framework.md#q3)
10 | 
11 | - [Pipcook 支持 Windows 系统吗？](./pipcook-framework.md#q4)
12 | 
13 | ## Pipcook 插件相关（v1.x）
14 | 
15 | - [在 yolov5 的链路中我怎样指定用哪张显卡进行训练?](./plugins.md#q1)
16 | 
17 | - [在 yolov5 的训练中我怎样指定仅使用 cpu 训练？](./plugins.md#q2)
18 | 


--------------------------------------------------------------------------------
/docs/zh-cn/faq/pipcook-framework.md:
--------------------------------------------------------------------------------
 1 | # 常见问题
 2 | 
 3 | ## 我应该在哪里放置我的 pipeline 的配置文件?
 4 | <a id="q1"></a>
 5 | Pipcook 会在后台启动服务，用户可以通过 cli 工具或者可视化工具访问服务。因此，我们对您的工作目录没有特别的要求，你可以在任意地方放置你的配置文件。只需要使用 `pipcook run <url>` 并且指定正确的配置文件路径就可以进行训练了， url 参数支持本地路径或者远程 url。
 6 | 
 7 | ## 为什么 Pipcook 安装非常缓慢？
 8 | <a id="q2"></a>
 9 | 目前 Pipcook 的安装依赖于 npm 源和 pip 源。有可能这些默认源的链接非常缓慢。你可以指定 `pipcook init -c <npm client>` 去改变你的 npm 源，同时，你也可以使用 `pipcook init --tuna` 指定 pip 清华源。
10 | 
11 | ## 我可以在 Electron 里面使用 Pipcook 吗？
12 | <a id="q3"></a>
13 | 理论上只要 Node.js >= 12.17 或者 >= 14.0.0 并且相应的 N-API 可用，你就可以使用 Pipcook。同时，Pipcook 也将会支持产出 WASM 模型所以你可以非常容易的集成到你的系统中去。
14 | 
15 | ## Pipcook 支持 Windows 系统吗？
16 | <a id="q4"></a>
17 | 目前不支持，未来将会支持。
18 | 


--------------------------------------------------------------------------------
/docs/zh-cn/faq/plugins.md:
--------------------------------------------------------------------------------
 1 | # 常见问题（v1.x）
 2 | 
 3 | ### 在 yolov5 的链路中我怎样指定用哪张显卡进行训练?
 4 | <a id="q1"></a>
 5 | 你可以设置 `$CUDA_VISIBLE_DEVICES`  这个环境变量，它的值就是你的显卡编号，默认我们会使用 GPU:0 进行训练。注意环境变量需要设置在 daemon 进程运行的机器上，并且在 daemon 启动之前设置。
 6 | 
 7 | ### 在 yolov5 的训练中我怎样指定仅使用 cpu 训练？
 8 | <a id="q2"></a>
 9 | 如果你的环境支持 GPU 训练，我们默认会使用 GPU。如果你想禁掉此功能，可以设置环境变量 `export CUDA_VISIBLE_DEVICES=""`。 注意环境变量需要设置在 daemon 进程运行的机器上，并且在 daemon 启动之前设置。
10 | 


--------------------------------------------------------------------------------
/docs/zh-cn/manual/intro-to-framework.md:
--------------------------------------------------------------------------------
 1 | # Pipcook 框架
 2 | 
 3 | Pipcook 使用的脚本是没有 bundle 一些比较重的依赖的，比如 `@tensorflow/tfjs`，那么我们如何在脚本中使用他们呢？
 4 | 事实上，Pipcook 会把这些依赖打包在所谓的`框架`中，实际上，框架是一组和平台、node.js 版本，相关的包，比如以下 pipeline：
 5 | ```json
 6 | {
 7 |   "specVersion": "2.0",
 8 |   "type": "ObjectDetection",
 9 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/object-detection-yolo-min.zip",
10 |   "dataflow": [
11 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416"
12 |   ],
13 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js",
14 |   "artifact": [{
15 |     "processor": "pipcook-artifact-zip@0.0.2",
16 |     "target": "./object-detection-yolo-model.zip"
17 |   }],
18 |   "options": {
19 |     "framework": "tfjs@3.8",
20 |     "train": {
21 |       "epochs": 10,
22 |       "gpu": true
23 |     }
24 |   }
25 | }
26 | ```
27 | 
28 | 这条 pipeline 使用了框架 `tfjs@3.8`，包含 `@tensorflow/tfjs-node@3.8`、`@tensorflow/tfjs-node-gpu@3.8`，需要注意的是，脚本不需要引用这两个包，而是引用 `@tensorflow/tfjs`，在框架加载时，会根据 `train.gpu` 选项确定是否需要将`@tensorflow/tfjs-node-gpu@3.8` 设置为 backend，由于 macOS 不支持 CUDA，因此框架也会抹平平台差异：`train.gpu` 在 macOS 系统上不会生效。这种能力是由框架中的初始化脚本实现的，这就涉及到框架的组织结构：每个框架中包含了一个框架描述文件，框架初始化脚本和若干依赖文件夹。以 `tfjs@3.8` 包为例，目录结构如下:
29 | 
30 | ```sh
31 | ├── framework.json
32 | ├── index.js
33 | └── node_modules
34 | ```
35 | 
36 | 其中 `framework.json` 是框架的描述文件，`index.js` 是框架的初始化脚本，`node_modules` 内包含的则是框架所要提供的依赖文件夹。
37 | `framework.json` 如下：
38 | 
39 | ```json
40 | {
41 |   "name": "tfjs",
42 |   "version": "3.8",
43 |   "packages": [
44 |     {
45 |       "name": "@tensorflow/tfjs-node",
46 |       "version": "3.8.0",
47 |       "type": "js"
48 |     },
49 |     {
50 |       "name": "@tensorflow/tfjs-node-gpu",
51 |       "version": "3.8.0",
52 |       "type": "js"
53 |     }
54 |   ]
55 | }
56 | ```
57 | 
58 | 初始化脚本导出一个初始化函数，将在每次 pipeline 运行到框架初始化阶段时被调用，传入 pipeline 的 `option` 字段，示例如下：
59 | 
60 | ```js
61 | const os = require('os');
62 | 
63 | module.exports = {
64 |   initialize(opts) {
65 |     if (
66 |       opts && opts.train
67 |       && (
68 |         opts.train.gpu === 'true'
69 |         || opts.train.gpu === true
70 |       )
71 |     ) {
72 |       if (os.platform() !== 'darwin') {
73 |         require('@tensorflow/tfjs-node-gpu');
74 |         console.log('gpu enabled');
75 |       } else {
76 |         require('@tensorflow/tfjs-node');
77 |         console.warn('platform darwin does not support gpu');
78 |       }
79 |     } else {
80 |       require('@tensorflow/tfjs-node');
81 |       console.log('gpu disabled');
82 |     }
83 |   }
84 | }
85 | ```
86 | 
87 | 另外，`tfjs` 的 backend 在不同平台都有不同的二进制库，所以 Pipcook 在会根据环境的不同选择下载不同的包，比如在 macOS，node.js v12.22 上，实际下载的框架文件为 `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node12-py37/tfjs%403.8-darwin-x64-v8.zip`，而在 linux，node.js v14.0 上，则会下载 `https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/node14-py37/tfjs%403.8-linux-x64-v6.zip`。当然，如果脚本依赖一些自定义的框架，也可以直接把完整的 url 填入 `framework` 选项来直接使用，或者通过自建一个框架镜像，然后通过 `trian`，`predict`，`serve` 命令的 `-m` 参数指定框架镜像地址。
88 | 一个完整的框架镜像目录结构如下：
89 | 
90 | ```sh
91 | ├── node14-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip
92 | └── node12-py37/{framework-name}@{version}-{os}-{arch}-{napi-version}.zip
93 | ```
94 | 
95 | 链接中的 `py37` 是引用的 `BOA` 支持的 python 版本，目前只支持 v3.7。
96 | 


--------------------------------------------------------------------------------
/docs/zh-cn/manual/intro-to-pipeline.md:
--------------------------------------------------------------------------------
 1 | # Pipeline
 2 | 
 3 | 在 Pipcook 中，我们使用 Pipeline 来表示一个模型的训练工作流，那么这个 Pipeline 到底是什么样的呢？在 Pipeline 中，开发者能够使用 JSON 来描述从样本收集、模型定义、模型训练和模型评估这些阶段。
 4 | 
 5 | ```js
 6 | {
 7 |   "specVersion": "2.0",
 8 |   "type": "ImageClassification",
 9 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/datasource.js?url=http://ai-sample.oss-cn-hangzhou.aliyuncs.com/image_classification/datasets/imageclass-test.zip",
10 |   "dataflow": [
11 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/dataflow.js?size=224&size=224"
12 |   ],
13 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/model.js",
14 |   "artifact": [{
15 |     "processor": "pipcook-artifact-zip@0.0.2",
16 |     "target": "/tmp/mobilenet-model.zip"
17 |   }],
18 |   "options": {
19 |     "framework": "tfjs@3.8",
20 |     "train": {
21 |       "epochs": 20,
22 |       "validationRequired": true
23 |     }
24 |   }
25 | }
26 | ```
27 | 
28 | 如上面 JSON 所示，一个 Pipeline 由 `dataSource`, `dataflow` 和 `model` 这三类 Script 以及 构建插件 `artifacts`, Pipeline 选项 `options` 组成。
29 | 每个 Script 通过 URI query 传递参数，model script 的参数也可以通过 `options.train` 定义。
30 | `artifacts` 定义了一组构建插件，每个构建插件会在训练结束后被依次调用，从而可以对输出的模型进行转换、打包、部署等。
31 | `options` 包含框架定义和训练参数的定义。
32 | 接着，Pipcook 就会根据这个 JSON 文件中定义的 URI 和参数，来准备环境，运行 Script，最后输出和处理模型。
33 | 
34 | > Pipeline 中的脚本支持 `http`，`https` 和 `file` 协议。
35 | 
36 | > 如果想获取更多 Script 相关的知识，可以阅读[如何编写 Pipcook Script](./intro-to-script.md)。
37 | 
38 | 下一步，我们在定义好一个 Pipeline 文件后，就能通过 Pipcook 来运行它了。
39 | 
40 | ## 准备工作
41 | 
42 | 通过[命令行工具安装指南](./pipcook-tools.md#环境设置)来做运行 Pipeline 前的准备。
43 | 
44 | ## 运行
45 | 
46 | 将上面的 Pipeline 保存在磁盘上，然后执行：
47 | 
48 | ```sh
49 | $ pipcook run /path/to/your/pipeline-config.json
50 | ```
51 | 
52 | 或者 serve 在静态资源服务器上：
53 | 
54 | ```sh
55 | $ pipcook run https://host/path/to/your/pipeline-config.json
56 | ```
57 | 
58 | 执行完成后，训练好的模型会生成在当前[工作目录](https://linux.die.net/man/3/cwd)下，以当前时间戳命名的文件夹中，同时模型文件会被构建插件 `pipcook-artifact-zip` 压缩成 zip 文件并保存在 tmp 目录下。
59 | 
60 | ```
61 |   ├── pipeline-config.json
62 |   ├── cache
63 |   ├── data
64 |   ├── framework
65 |   ├── model
66 |   └── scripts
67 | ```
68 | 
69 | model 目录下保存了模型文件，在后续的版本迭代中，会增加模型使用的能力。
70 | 


--------------------------------------------------------------------------------
/docs/zh-cn/manual/intro-to-script.md:
--------------------------------------------------------------------------------
 1 | # Pipcook 脚本
 2 | 
 3 | 在 Pipcook 中，每一个 Pipeline 表示一个特定的机器学习任务，那么我们如何定义一个工作流呢？Pipcook 使用脚本来定义和配置 Pipeline 中不同的阶段。Pipcook 脚本是一个暴露了特定方法的 js 脚本文件，包含 3 种不同的类型，分别为 datasource，dataflow 和 model，具体的定义看[这里](../spec/script.md)。比如一个文本分类的任务，就可以用下面的脚本来组成：
 4 | 
 5 | - `datasource` 通过 datasource 脚本来下载样本数据，提供数据访问接口。
 6 | - `dataflow` 将下载的数据集格式转换为后面模型能够接受的格式（在此例中不需要）。
 7 | - `model` 定义文本分类的模型，[朴素贝叶斯分类器](https://en.wikipedia.org/wiki/Naive_Bayes_classifier)，通过样本数据接口获取样本进行模型训练，并评估准确度。
 8 | 
 9 | > 上述 Pipeline 的源码定义在[这里](https://github.com/alibaba/pipcook/blob/main/example/pipelines/text-classification-bayes.json)。
10 | 
11 | 通过上面的例子，对于一个文本分类器的任务，我们遵循机器学习工作流，它按照不同类型的子任务顺序执行，而每个子任务就对应一个用户定义的插件，同时用户也可以以较低成本，快速地调整整个任务的 Pipeline。
12 | 
13 | > 可用的官方脚本在[这里](https://github.com/imgcook/pipcook-script)。
14 | 


--------------------------------------------------------------------------------
/docs/zh-cn/manual/pipcook-tools.md:
--------------------------------------------------------------------------------
 1 | # Pipcook Tools
 2 | 
 3 | Pipcook Tools 是 Pipcook 提供给开发者使用的命令行工具，它帮助开发者运行和管理 Pipeline。
 4 | 
 5 | ## 安装
 6 | 
 7 | ```sh
 8 | $ npm install @pipcook/cli -g
 9 | ```
10 | 
11 | 查看[安装指南](../INSTALL.md)查看完成的安装引导。
12 | 
13 | ## 使用指南
14 | 
15 | 运行一个 Pipeline，只需要执行如下命令：
16 | 
17 | ```sh
18 | $ pipcook run protocal://location/to/your/pipeline-config.json
19 | ```
20 | 
21 | 支持的 pipeline 文件协议包括: `http:`, `https:`, `file:`, 默认为 `file:` 协议.
22 | 更多运行参数可以通过以下命令获取：
23 | 
24 | ```sh
25 | $ pipcook run --help
26 | ```
27 | 
28 | > 关于如何编写 pipeline, 可以看[这里](./intro-to-pipeline.md).
29 | 
30 | ## 缓存管理
31 | 
32 | 通过 `pipcook run` 命令运行 Pipeline 时，如果 Script 或 framework 为非 `file` 协议，则会将其缓存到 `~/.pipcook` 目录下，以便加速下次运行。
33 | 如果想手动删除这些缓存，可以通过以下命令:
34 | ```sh
35 | $ pipcook clean
36 | ```
37 | 


--------------------------------------------------------------------------------
/docs/zh-cn/spec/dataset.md:
--------------------------------------------------------------------------------
 1 | # Dataset Specification
 2 | 
 3 | Dataset is an important part of machine learning. Subsequent models are built based on datasets. We need to manage datasets. The following is the standard format of the dataset that Pipcook should save after the data is collected through the `datasource` script.
 4 | 
 5 | For different dataset formats, `datasource` script is used to smooth the differences.
 6 | 
 7 | #### Image
 8 | 
 9 | PascalVOC Dataset format, the detailed directory is as follows:
10 | 
11 | ```
12 | 📂dataset
13 |    ┣ 📂annotations
14 |    ┃ ┣ 📂train
15 |    ┃ ┃ ┣ 📜...
16 |    ┃ ┃ ┗ 📜${image_name}.xml
17 |    ┃ ┣ 📂test
18 |    ┃ ┗ 📂validation
19 |    ┗ 📂images
20 |      ┣ 📜...
21 |      ┗ 📜${image_name}.jpg
22 | ```
23 | 
24 | Or representing in XML:
25 | 
26 | ```xml
27 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
28 | <annotation>
29 |   <folder>folder path</folder>
30 |   <filename>image name</filename>
31 |   <size>
32 |     <width>width</width>
33 |     <height>height</height>
34 |   </size>
35 |   <object>
36 |     <name>category name</name>
37 |     <bndbox> <!--this is not necessary for image classification problem-->
38 |       <xmin>left</xmin>
39 |       <ymin>top</ymin>
40 |       <xmax>right</xmax>
41 |       <ymax>bottom</ymax>
42 |     </bndbox>
43 |   </object>
44 | </annotation>
45 | ```
46 | 
47 | #### Text
48 | 
49 | The text category should be a CSV file. The first column is the text content, and the second column is the category name. The delimiter is ',' without a header.
50 | 
51 | ```csv
52 | name, category
53 | prod1, type1
54 | prod2, type2
55 | prod3, type2
56 | prod4, type1
57 | ```
58 | 


--------------------------------------------------------------------------------
/docs/zh-cn/spec/script.md:
--------------------------------------------------------------------------------
 1 | # 脚本规范
 2 | 
 3 | [Pipcook][] 使用脚本来完成特定机器学习任务的任务，它使得框架足够简单、稳定和高效。
 4 | 
 5 | 同时，通过定义了不同的脚本规范，使得我们可以允许任何人开发插件来拓展 [Pipcook][]，理论上，我们可以通过脚本来完成任何的机器学习任务。
 6 | 
 7 | 
 8 | ## 分类
 9 | 
10 | 下面是所有在 Pipcook 中支持的脚本分类。
11 | 
12 | - datasource: [`DatasourceEntry<SAMPLE, META>: (options: Record<string, any>, context: ScriptContext) => Promise<DatasetPool<SAMPLE, META>>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#datasourceentry) 从数据源中下载数据，提供数据访问接口。
13 | - dataflow: [`DataflowEntry<IN, IN_META, OUT, OUT_META>: (api: DatasetPool<IN, IN_META>, options: Record<string, any>, context: ScriptContext) => Promise<DatasetPool<OUT, OUT_META>>`](https://alibaba.github.io/pipcook/typedoc/script/index.html#dataflowentry) 从 datasource 获取数据，处理并通过返回数据访问接口让下一个 dataflow 脚本或 model 脚本获取处理后的数据。
14 | - model: [`{ train: ModelEntry<SAMPLE, META>, predict: PredictEntry<SAMPLE, META> }`](https://alibaba.github.io/pipcook/typedoc/script/interfaces/extmodelentry.html) 从 dataflow 或 datasource 脚本中获取样本数据，并进行训练，验证，产出模型，或者通过 predict 对输入的数据进行预测。
15 | 
16 | ## 开发
17 | 
18 | 查看[贡献者文档](../contributing/contribute-a-script.md)来学习如何开发一个新的脚本。
19 | 
20 | [Pipcook]: https://github.com/alibaba/pipcook
21 | [Pipcook Script]: ../GLOSSORY.md#pipcook-script
22 | [PyPI]: https://pypi.org
23 | 


--------------------------------------------------------------------------------
/docs/zh-cn/tutorials/machine-learning-overview.md:
--------------------------------------------------------------------------------
  1 |  # 开始机器学习
  2 | 
  3 | 从这篇文章，我们将介绍什么是机器学习，以及如果使用 [Pipcook][] 来完成机器学习任务。
  4 | 
  5 | ## 如何定义一个机器学习问题
  6 | 
  7 | 一般来说，一个学习问题就是将 N 个样本集数据输入，然后输出与输入相关联对应的结果，下面的例子将展示，如何教会一个程序学会 Node.js 书籍和售价的关系：
  8 | 
  9 | ```ts
 10 | const BookPriceModel: Record<string, number> = {};
 11 | const learnBookPrice = (book: string, price: number) => BookPriceModel[book] = price;
 12 | const predictBookPrice = (book: string) => BookPriceModel[book];
 13 | 
 14 | // prediction without learning.
 15 | predictBookPrice('Node.js in Action'); // undefined, because the program don't know nothing
 16 | 
 17 | // learn "Node.js in Action" and "Dive into Node.js".
 18 | learnBookPrice('Node.js in Action', 99.0);
 19 | learnBookPrice('Dive into Node.js', 199.0);
 20 | 
 21 | // prediction after learning.
 22 | predictBookPrice('Node.js in Action'); // 99.0
 23 | predictBookPrice('Dive into Node.js'); // 199.0
 24 | ```
 25 | 
 26 | **机器学习**问题也是类似的，只不过可以通过机器学习算法让机器能更“智能”地学习，能够对于一些未知数据作出真正的预测结果，比如可以帮助作者决定写一本什么样的书能够卖得更贵：
 27 | 
 28 | ```js
 29 | predictBookPrice('Pipcook in Action'); // 89.0
 30 | predictBookPrice('Dive into Pipcook'); // 199.0
 31 | ```
 32 | 
 33 | 机器学习并非万能灵药，因此接下来看看它到底能解决哪些问题，下面我们按照数据类型分为不同的任务类型：
 34 | 
 35 | | Sample Type      | Problem Category         | Description                    |
 36 | |------------------|--------------------------|--------------------------------|
 37 | | Image            | 图片分类                   | 对于给定类型的图片进行分类 |
 38 | |                  | 图片生成                   | 生成图片 |
 39 | |                  | 目标检测                   | 识别出给定的对象，并返回目标的位置和类型 |
 40 | |                  | 图片分割                   | 与图片检测类似，但是返回的是目标轮廓的像素级显示 |
 41 | |                  | 图片聚类                   | 返回自动分类后的结果 |
 42 | | Text             | 文本分类                   | 对于给定类型的文本进行分类 |
 43 | |                  | 命名实体识别               | 从一句话中识别出命名实体 |
 44 | |                  | 关系提取                   | 抽取句子与句子间的关系 |
 45 | |                  | 指代消解                   | 将一句话中的代词转换为实际代表的个体 |
 46 | |                  | 写作纠错                   | 辅助写作的纠错功能 |
 47 | |                  | 翻译                       | 从一种语言翻译到另一种语言 |
 48 | |                  | 问答                       | 根据问题生成对应的回答 |
 49 | |                  | 文本摘要                   | 从一段长文本生成摘要文本 |
 50 | |                  | 文本创作                   | 生成一些如诗歌、散文、词等艺术作品 |
 51 | |                  | 文本聚类                   | 返回自动分类后的结果 |
 52 | 
 53 | 那么我们如何在日常生活中使用上面的任务呢？我们可以来看看一个机器学习项目都会有哪些阶段：
 54 | 
 55 | 1. 收集样本，并将他们处理成一种格式，用于给后面定义的模型学习数据中的特征。
 56 | 2. 选择一个用于训练的机器学习模型，一般来说会根据任务类型和场景进行选择。
 57 | 3. 在开始训练之前，需要将上面的样本集分为训练集和测试集。
 58 | 4. 训练阶段，将训练集输入到模型中，此时模型开始从训练集中学习特征。
 59 | 5. 训练结束后，再使用测试集输入到训练好的模型，来评估模型效果。
 60 | 
 61 | > **训练集和测试集**
 62 | >
 63 | > 机器学习是关于学习数据集的某些特征，然后针对另一个数据集进行测试。机器学习中的一种常见做法是通过将数据集分成两部分来评估算法。我们称其中一组为训练集，在该集上我们学习数据中的特征，我们称另一组为测试集，在测试集上我们对学习的特征进行测试。
 64 | 
 65 | ## 加载数据集
 66 | 
 67 | [MNIST][](Modified National Institute of Standards and Technology database) 是一个手写识别的大型数据集：
 68 | 
 69 | <center>
 70 |   <img src="https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png">
 71 | </center>
 72 | 
 73 | 接下来，我们使用手写数字识别作为例子，来介绍如何使用 [Pipcook][] 完成一个图片分类任务。
 74 | 
 75 | 我们使用 Pipeline 来完整地描述机器学习任务，不同的插件表示这个 Pipeline 中不同的阶段，然后再通过 Pipeline 将不同的阶段连接起来形成一个完整的机器学习工作流。
 76 | 
 77 | 在 [Pipcook][]，构建一个分类任务的模型就是配置 Pipeline 的脚本，我们从通过 [datasource 脚本](https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet/src/datasource.ts)加载 [MNIST][] 数据集开始创建 Pipeline：
 78 | 
 79 | ```js
 80 | {
 81 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/datasource.js?url=http://ai-sample.oss-cn-hangzhou.aliyuncs.com/image_classification/datasets/mnist.zip"
 82 | }
 83 | ```
 84 | 
 85 | 这个脚本会下载 [MNIST][] 数据集并提供访问接口。
 86 | 
 87 | ## 学习
 88 | 
 89 | 在这个数字识别数据集的例子中，我们的目的是预测一张图片所代表的数字，那么我们给出的样本中，每张图片就拥有10个分类（0-9），这也就是说，我们要让模型做到的是预测一张未知图片的类型，即从0到9的分类。
 90 | 我们使用 [image classification dataflow](https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet/src/dataflow.ts) 脚本来调整每张图片的尺寸为 224x224，用一个数组 `[224, 224]` 表示：
 91 | ```js
 92 | {
 93 |   "dataflow": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/dataflow.js?size=224&size=224"
 94 | }
 95 | ```
 96 | 
 97 | 然后定义[模型脚本](https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet/src/model.ts)和参数：
 98 | ```js
 99 | {
100 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification-mobilenet/build/model.js",
101 |   "options": {
102 |     "framework": "tfjs@3.8",
103 |     "train": {
104 |       "epochs": 20,
105 |       "validationRequired": true
106 |     }
107 |   }
108 | }
109 | ```
110 | 
111 | 这个脚本会使用 [mobilenet][] 来做图片分类任务，训练和评估基于 tfjs 的模型。
112 | 
113 | 目前为止，Pipeline 就定义完成了，接下来就可以开始模型训练了。
114 | 
115 | ```sh
116 | $ pipcook run pipeline.json
117 | ```
118 | 
119 | ## 预测
120 | 
121 | 训练完成后，我们就能发现 model 目录，它里面包含了训练的模型。
122 | 
123 | ```
124 | 📂 model
125 |    ┣ 📜 model.json
126 |    ┗ 📜 weights.bin
127 | ```
128 | 
129 | [Pipcook]: https://github.com/alibaba/pipcook
130 | [MNIST]: https://en.wikipedia.org/wiki/MNIST_database
131 | [Introduction to Pipeline]: ../manual/intro-to-pipeline.md
132 | [mobilenet]: https://github.com/imgcook/pipcook-script/blob/master/scripts/image-classification-mobilenet
133 | 


--------------------------------------------------------------------------------
/example/pipelines/README-CN.md:
--------------------------------------------------------------------------------
  1 | # Pipelines
  2 | 
  3 | [English](./README.md)
  4 | 
  5 | ## 贝叶斯-文本分类
  6 | 
  7 | ### 数据集
  8 | 
  9 | 文本分类 Pipeline 的数据集组织格式如下：
 10 | 
 11 | ```sh
 12 | .
 13 | ├── test
 14 | │   └── textDataBinding.csv
 15 | └── train
 16 |     └── textDataBinding.csv
 17 | ```
 18 | 
 19 | `train` 文件夹内是训练数据，`test` 文件夹内是测试数据，存储为 csv 格式。csv 文件内有两列数据，分别为 input 和 output，input 为样本数据，output 为样本标签，如：
 20 | 
 21 | | input                                                        | output    |
 22 | | ------------------------------------------------------------ | --------- |
 23 | | 原创春秋新款宽松黑色牛仔裤男贴布哈伦裤日系潮流胖男大码长裤子 | itemTitle |
 24 | | 茗缘翡翠                                                     | shopName  |
 25 | | 挂画精美 种类丰富                                            | itemDesc  |
 26 | 
 27 | 这3个样本表示了3类不同的文本，他们的标签分别是`itemTitle`，`shopName`，`itemDesc`。需要注意的是，数据集中的数据需要尽可能丰富，且分布相对均匀，也就是说每个类别的样本数量应该差不多，差异过大将影响模型的准确度。
 28 | 
 29 | 数据源可以为本地文件夹路径:
 30 | 
 31 | ```json
 32 | {
 33 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=file:///path/to/dataset-directory"
 34 | }
 35 | ```
 36 | 
 37 | `/path/to/dataset-directory` 内包含 `test` 和 `train` 文件夹。
 38 | 
 39 | 或者可以将 `test` 和 `train` 目录压缩成 zip 文件，存储在 OSS 上，修改数据源为 zip 文件 url:
 40 | 
 41 | ```json
 42 | {
 43 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=http:///oss-host/my-dataset.zip"
 44 | }
 45 | ```
 46 | 
 47 | ### 模型参数
 48 | 
 49 | 贝叶斯模型支持中文和英文两种模式，可以通过 `mode` 参数指定 `cn` 或者 `en`，默认为 `cn`。
 50 | 
 51 | ```json
 52 | {
 53 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/model.js?mode=en"
 54 | }
 55 | ```
 56 | 
 57 | 由于贝叶斯模型使用了一些 `tfjs-backend-cpu` 上的算子，其他 backend 目前还未支持，所以在 `options` 字段中，我们定义了模型训练的 backend 为 `@tensorflow/tfjs-backend-cpu`。
 58 | 
 59 | ## ResNet/MobileNet-图片分类
 60 | 
 61 | ### 数据集
 62 | 
 63 | 图片分类 Pipeline 的数据集组织格式如下：
 64 | 
 65 | ```sh
 66 | .
 67 | ├── test
 68 | │   ├── class-1
 69 | │   └── class-2
 70 | ├── train
 71 | │   ├── class-1
 72 | │   └── class-2
 73 | └── validation
 74 | │   ├── class-1
 75 | │   └── class-2
 76 | ```
 77 | 
 78 | `train` 文件夹内是训练数据，`test` 文件夹内是测试数据，`validation` 文件夹内是验证数据，目录中为各类别的图片文件夹，文件夹名称即图片的类别。
 79 | 
 80 | 数据源可以使本地文件夹路径:
 81 | 
 82 | ```json
 83 | {
 84 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=file:///path/to/dataset-directory"
 85 | }
 86 | ```
 87 | 
 88 | `/path/to/dataset-directory` 内包含 `test` 和 `train` 文件夹。
 89 | 
 90 | 也可以把样本目录压缩成 zip 文件，存储在 OSS 上，修改数据源为 zip 文件 url:
 91 | 
 92 | ```json
 93 | {
 94 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=http://oss-host/dataset.zip"
 95 | }
 96 | ```
 97 | 
 98 | ### 数据处理
 99 | 
100 | 对于图片分类任务来说，输入模型的所有样本图片维度（长宽）必须是一致的，而我们预定义的 MobileNet 和 ResNet 模型都要求输入 224 * 224 的图片，因此在模型训练开始前，我们会通过 `dataflow` 脚本对图片进行 resize 操作：
101 | ```json
102 | {
103 |   "dataflow": [
104 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224"
105 |   ]
106 | }
107 | ```
108 | 
109 | ### 模型参数
110 | 
111 | 图片分类 pipeline 支持 MobileNet 和 ResNet 两种模型，`modelUrl` 参数指定 `mobilenet` 或者 `resnet`，默认为 `mobilenet`。
112 | 
113 | ```json
114 | {
115 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js?modelUrl=resnet",
116 | }
117 | ```
118 | 
119 | 另外，`options` 字段可以配置是否启用 GPU，训练的 epochs:
120 | 
121 | ```json
122 | {
123 |   "options": {
124 |     "framework": "tfjs@3.8",
125 |     "gpu": false,
126 |     "train": {
127 |       "epochs": 10
128 |     }
129 |   }
130 | }
131 | ```
132 | 
133 | GPU 默认为启用。epochs 越大，训练时长越久。
134 | 
135 | ## YOLO-目标检测
136 | 
137 | ### 数据集
138 | 
139 | 目标检测 Pipeline 支持 [PascalVoc](../../docs/zh-cn/spec/dataset.md) 和 [Coco](https://cocodataset.org/#format-data) 两种数据集格式，通过定义 `format` 参数为 `pascalvoc` 或 `coco` 来指定当前数据集格式：
140 | 
141 | ```json
142 | {
143 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?format=pascalvoc&url=https://host/dataset.zip"
144 | }
145 | ```
146 | 
147 | 同样的，如果在本地训练，可以将数据源改为本地文件夹路径:
148 | 
149 | ```json
150 | {
151 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?format=pascalvoc&url=file:///path/to/dataset-directory"
152 | }
153 | ```
154 | 
155 | ### 数据处理
156 | 
157 | 和图片分类 pipeline 一样，YOLO 要求输入模型的所有样本图片维度（长宽）必须是一致的，为 416 * 416，因此在模型训练开始前，我们会通过 `dataflow` 脚本对图片进行 resize 操作：
158 | ```json
159 | {
160 |   "dataflow": [
161 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416"
162 |   ]
163 | }
164 | ```
165 | 
166 | ### 模型参数
167 | 
168 | `options` 字段可以配置目标检测 pipeline 是否启用 GPU，训练的 epochs，每次喂入模型的样本数量 batchSize 以及 early-stopping 的 patience 值:
169 | 
170 | ```json
171 | {
172 |   "options": {
173 |     "framework": "tfjs@3.8",
174 |     "gpu": false,
175 |     "train": {
176 |       "epochs": 100,
177 |       "batchSize": 16,
178 |       "patience": 10
179 |     }
180 |   }
181 | }
182 | ```
183 | 
184 | GPU 默认为启用。`patience` 表示 loss 在 patience 个 epoch 没有下降后停止训练。比如 `patience` 为 3 的情况下，如果连续出现 3 个 epoch loss 都没有下降，就会触发 early-stopping，训练会提前终止。
185 | 


--------------------------------------------------------------------------------
/example/pipelines/image-classification-mobilenet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "ImageClassification",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/mnist.zip",
 5 |   "dataflow": [
 6 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224"
 7 |   ],
 8 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js",
 9 |   "options": {
10 |     "framework": "tfjs@3.8",
11 |     "train": {
12 |       "epochs": 10
13 |     }
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/example/pipelines/image-classification-resnet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "ImageClassification",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/mnist.zip",
 5 |   "dataflow": [
 6 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224"
 7 |   ],
 8 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js",
 9 |   "options": {
10 |     "framework": "tfjs@3.8",
11 |     "train": {
12 |       "epochs": 10,
13 |       "modelUrl": "resnet"
14 |     }
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/example/pipelines/object-detection-yolo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "ObjectDetection",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?format=pascalvoc&url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/mask.zip",
 5 |   "dataflow": [
 6 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416"
 7 |   ],
 8 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js",
 9 |   "options": {
10 |     "framework": "tfjs@3.8",
11 |     "train": {
12 |       "epochs": 100,
13 |       "batchSize": 16,
14 |       "patience": 10
15 |     }
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/example/pipelines/text-classification-bayes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "TextClassification",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/dataset/text-classification.zip",
 5 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/model.js",
 6 |   "options": {
 7 |     "framework": "tfjs@3.8-nlp",
 8 |     "backend": "@tensorflow/tfjs-backend-cpu"
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/lerna.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "packages": [
 3 |     "packages/core",
 4 |     "packages/costa",
 5 |     "packages/cli"
 6 |   ],
 7 |   "version": "independent",
 8 |   "loglevel": "verbose",
 9 |   "npmClient": "npm",
10 |   "command": {
11 |     "publish": {
12 |       "npmClient": "npm",
13 |       "registry": "https://registry.npmjs.org"
14 |     },
15 |     "publishConfig": {
16 |       "access": "public"
17 |     },
18 |     "bootstrap": {
19 |       "nohoist": [
20 |         "node-addon-api",
21 |         "ice-scripts",
22 |         "stylelint"
23 |       ]
24 |     }
25 |   },
26 |   "hoist": true
27 | }
28 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "pipcook",
 3 |   "private": true,
 4 |   "workspaces": [
 5 |     "packages/*"
 6 |   ],
 7 |   "devDependencies": {
 8 |     "@lerna/bootstrap": "^4.0.0",
 9 |     "@typescript-eslint/eslint-plugin": "^4.1.1",
10 |     "@typescript-eslint/parser": "^4.1.1",
11 |     "docsify-cli": "^4.4.3",
12 |     "eslint": "^7.15.0",
13 |     "http-server": "^0.12.3",
14 |     "lerna": "^4.0.0",
15 |     "nyc": "^15.1.0",
16 |     "ts-node": "^9.1.1",
17 |     "typedoc": "^0.17.8",
18 |     "typescript": "^4.3.5"
19 |   },
20 |   "scripts": {
21 |     "build": "lerna run compile",
22 |     "postinstall": "lerna clean --yes && lerna bootstrap",
23 |     "docsify": "npm run typedoc && http-server -c-1 ./docs",
24 |     "lint": "eslint . --ext .ts --ext .js",
25 |     "pretest": "npm run lint",
26 |     "test": "lerna run test",
27 |     "test:pipeline": "sh ./tools/run_pipeline.sh",
28 |     "typedoc": "sh tools/mkdoc.sh",
29 |     "clean": "lerna run clean --parallel --concurrency 10",
30 |     "release": "lerna publish from-package --yes --no-verify-access",
31 |     "beta-release-tag": "lerna version prerelease --no-push --force-publish=* --yes",
32 |     "beta-release": "lerna publish from-package --no-verify-access --dist-tag beta -y",
33 |     "cov": "./tools/coverage.sh",
34 |     "cov:report": "nyc report -r=lcov",
35 |     "build:docker-cpu": "docker build -t pipcook:latest-cpu -f docker/Dockerfile.cpu ./docker",
36 |     "build:docker": "docker build -t pipcook:latest -f docker/Dockerfile ./docker"
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/packages/cli/.npmignore:
--------------------------------------------------------------------------------
1 | .*
2 | coverage
3 | *.tgz
4 | src
5 | tsconfig.*
6 | 


--------------------------------------------------------------------------------
/packages/cli/.nycrc:
--------------------------------------------------------------------------------
1 | {
2 |   "include": [
3 |     "src/**/*.ts"
4 |   ],
5 |   "all": true,
6 |   "instrument": true
7 | }
8 | 


--------------------------------------------------------------------------------
/packages/cli/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipcook/cli",
 3 |   "version": "2.1.5",
 4 |   "description": "pipcook cli",
 5 |   "main": "dist/bin/pipcook.js",
 6 |   "scripts": {
 7 |     "preinstall": "(mkdir -p dist/bin && touch dist/bin/pipcook.js) || ((mkdir dist\\bin || echo 'exists') && echo '' > dist\\bin\\pipcook.js)",
 8 |     "test": "ava -v",
 9 |     "build": "npm run clean && npm run compile",
10 |     "clean": "((rm -rf dist tsconfig.tsbuildinfo) || (rmdir /Q /S dist tsconfig.tsbuildinfo)) || echo 'nothing to clean'",
11 |     "compile": "tsc -b tsconfig.json && (chmod +x ./dist/bin/pipcook.js || echo 'windows not support chmod for bin')",
12 |     "cov": "nyc -r text-summary npm run test",
13 |     "cov:report": "nyc report -r lcov"
14 |   },
15 |   "bin": {
16 |     "pipcook": "dist/bin/pipcook.js"
17 |   },
18 |   "keywords": [],
19 |   "author": "",
20 |   "license": "Apache-2.0",
21 |   "dependencies": {
22 |     "@pipcook/core": "^2.1.4",
23 |     "@pipcook/costa": "^2.1.4",
24 |     "bent": "^7.3.12",
25 |     "chalk": "^3.0.0",
26 |     "cli-progress": "^3.9.0",
27 |     "commander": "^4.0.1",
28 |     "dateformat": "^4.5.1",
29 |     "debug": "^4.3.1",
30 |     "express": "^4.17.1",
31 |     "extract-zip": "^2.0.1",
32 |     "fs-extra": "^9.1.0",
33 |     "jimp": "^0.16.1",
34 |     "multer": "^1.4.3",
35 |     "nanoid": "^3.1.22",
36 |     "ora": "^5.4.1",
37 |     "pretty-bytes": "^5.6.0",
38 |     "query-string": "^6.14.1",
39 |     "semver": "^6.3.0"
40 |   },
41 |   "devDependencies": {
42 |     "@types/bent": "^7.3.2",
43 |     "@types/cli-progress": "^3.9.1",
44 |     "@types/dateformat": "^3.0.1",
45 |     "@types/express": "^4.17.13",
46 |     "@types/extract-zip": "^1.6.2",
47 |     "@types/fs-extra": "^9.0.9",
48 |     "@types/multer": "^1.4.7",
49 |     "@types/node": "^14.6.0",
50 |     "@types/semver": "^7.3.4",
51 |     "@types/sinon": "^9.0.11",
52 |     "ava": "^3.13.0",
53 |     "import-fresh": "^3.3.0",
54 |     "nyc": "^15.1.0",
55 |     "sinon": "^10.0.0",
56 |     "ts-node": "^9.1.1",
57 |     "typescript": "^4.3.5"
58 |   },
59 |   "publishConfig": {
60 |     "access": "public"
61 |   },
62 |   "ava": {
63 |     "extensions": [
64 |       "ts"
65 |     ],
66 |     "require": [
67 |       "ts-node/register"
68 |     ],
69 |     "timeout": "2m"
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/packages/cli/serve-resource/image/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |   <title>Pipcook Server</title>
  6 |   <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js">
  7 |   </script>
  8 |   <style>
  9 |     html {
 10 |       font-family: sans-serif;
 11 |     }
 12 | 
 13 |     form {
 14 |       width: 600px;
 15 |       background: #ccc;
 16 |       margin: 0 auto;
 17 |       padding: 20px;
 18 |       border: 1px solid black;
 19 |     }
 20 | 
 21 |     form ol {
 22 |       padding-left: 0;
 23 |     }
 24 | 
 25 |     form li,
 26 |     div>p {
 27 |       background: #eee;
 28 |       display: flex;
 29 |       justify-content: space-between;
 30 |       margin-bottom: 10px;
 31 |       list-style-type: none;
 32 |       border: 1px solid black;
 33 |       white-space: pre-wrap;
 34 |     }
 35 | 
 36 |     form img {
 37 |       height: 64px;
 38 |       order: 1;
 39 |     }
 40 | 
 41 |     form p {
 42 |       line-height: 32px;
 43 |       padding-left: 10px;
 44 |     }
 45 | 
 46 |     form label,
 47 |     form button {
 48 |       background-color: #7F9CCB;
 49 |       padding: 5px 10px;
 50 |       border-radius: 5px;
 51 |       border: 1px ridge black;
 52 |       font-size: 0.8rem;
 53 |       height: auto;
 54 |     }
 55 | 
 56 |     form label:hover,
 57 |     form button:hover {
 58 |       background-color: #2D5BA3;
 59 |       color: white;
 60 |     }
 61 | 
 62 |     form label:active,
 63 |     form button:active {
 64 |       background-color: #0D3F8F;
 65 |       color: white;
 66 |     }
 67 | 
 68 |     #result {
 69 |       margin-top: 10px;
 70 |       justify-content: space-between;
 71 |       line-height: 20px;
 72 |       padding: 20px;
 73 |     }
 74 | 
 75 |   </style>
 76 | </head>
 77 | 
 78 | <body>
 79 |   <form>
 80 |     <div>
 81 |       <label for="image_uploads">Choose image to predict (PNG, JPG)</label>
 82 |       <input type="file" id="image_uploads" name="image" multiple>
 83 |     </div>
 84 |     <div class="preview">
 85 |       <p>No files currently selected for predict</p>
 86 |     </div>
 87 |     <div>
 88 |       <button id="predict">Predict</button>
 89 |     </div>
 90 |     <div>
 91 |       <p id="result">Predict result...</p>
 92 |     </div>
 93 |   </form>
 94 |   <script>
 95 |     const input = document.querySelector('input');
 96 |     const preview = document.querySelector('.preview');
 97 | 
 98 |     input.style.opacity = 0;
 99 | 
100 |     input.addEventListener('change', updateImageDisplay);
101 | 
102 |     function updateImageDisplay() {
103 |       while (preview.firstChild) {
104 |         preview.removeChild(preview.firstChild);
105 |       }
106 | 
107 |       const curFiles = input.files;
108 |       if (curFiles.length === 0) {
109 |         const para = document.createElement('p');
110 |         para.textContent = 'No files currently selected for predict';
111 |         preview.appendChild(para);
112 |       } else {
113 |         const list = document.createElement('ol');
114 |         preview.appendChild(list);
115 | 
116 |         for (const file of curFiles) {
117 |           const listItem = document.createElement('li');
118 |           const para = document.createElement('p');
119 | 
120 |           if (validFileType(file)) {
121 |             para.textContent = `File name ${file.name}, file size ${returnFileSize(file.size)}.`;
122 |             const image = document.createElement('img');
123 |             image.src = URL.createObjectURL(file);
124 | 
125 |             listItem.appendChild(image);
126 |             listItem.appendChild(para);
127 |           } else {
128 |             para.textContent = `File name ${file.name}: Not a valid file type. Update your selection.`;
129 |             listItem.appendChild(para);
130 |           }
131 | 
132 |           list.appendChild(listItem);
133 |         }
134 |       }
135 |     }
136 | 
137 |     const fileTypes = [
138 |       'image/apng',
139 |       'image/bmp',
140 |       'image/gif',
141 |       'image/jpeg',
142 |       'image/pjpeg',
143 |       'image/png',
144 |       'image/svg+xml',
145 |       'image/tiff',
146 |       'image/webp',
147 |       `image/x-icon`
148 |     ];
149 | 
150 |     function validFileType(file) {
151 |       return fileTypes.includes(file.type);
152 |     }
153 | 
154 |     function returnFileSize(number) {
155 |       if (number < 1024) {
156 |         return number + 'bytes';
157 |       } else if (number > 1024 && number < 1048576) {
158 |         return (number / 1024).toFixed(1) + 'KB';
159 |       } else if (number > 1048576) {
160 |         return (number / 1048576).toFixed(1) + 'MB';
161 |       }
162 |     }
163 |     $(document).ready(function () {
164 |       $('#predict').click(function (event) {
165 |         event.preventDefault();
166 |         var fd = new FormData();
167 |         var files = $('#image_uploads')[0].files[0];
168 |         fd.append('image', files);
169 | 
170 |         $.ajax({
171 |           url: 'predict',
172 |           type: 'post',
173 |           data: fd,
174 |           cache: false,
175 |           contentType: false,
176 |           processData: false,
177 |           success: function (resp) {
178 |             $('#result').html(JSON.stringify(resp, null, '  '));
179 |           },
180 |         });
181 |       });
182 |     });
183 | 
184 |   </script>
185 | </body>
186 | 
187 | </html>
188 | 


--------------------------------------------------------------------------------
/packages/cli/serve-resource/text/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |   <title>Pipcook Server</title>
  6 |   <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js">
  7 |   </script>
  8 |   <style>
  9 |     html {
 10 |       font-family: sans-serif;
 11 |     }
 12 | 
 13 |     form {
 14 |       width: 600px;
 15 |       background: #ccc;
 16 |       margin: 0 auto;
 17 |       padding: 20px;
 18 |       border: 1px solid black;
 19 |     }
 20 | 
 21 |     form ol {
 22 |       padding-left: 0;
 23 |     }
 24 | 
 25 |     form p,
 26 |     form input {
 27 |       border: 1px solid black;
 28 |       font-size: 1rem;
 29 |       line-height: 32px;
 30 |       padding-left: 10px;
 31 |       margin-bottom: 10px;
 32 |     }
 33 | 
 34 |     form label {
 35 |       margin-bottom: 10px;
 36 |     }
 37 | 
 38 |     form button {
 39 |       background-color: #7F9CCB;
 40 |       padding: 5px 10px;
 41 |       border-radius: 5px;
 42 |       border: 1px ridge black;
 43 |       font-size: 0.8rem;
 44 |       height: auto;
 45 |     }
 46 | 
 47 |     form button:hover {
 48 |       background-color: #2D5BA3;
 49 |       color: white;
 50 |     }
 51 | 
 52 |     form button:active {
 53 |       background-color: #0D3F8F;
 54 |       color: white;
 55 |     }
 56 | 
 57 |     #input_label {
 58 |       border: 0px;
 59 |       margin-top: 10px;
 60 |     }
 61 | 
 62 |     #result {
 63 |       margin-top: 10px;
 64 |       justify-content: space-between;
 65 |       line-height: 20px;
 66 |       padding: 20px;
 67 |       background: #eee;
 68 |       display: flex;
 69 |       justify-content: space-between;
 70 |       margin-bottom: 10px;
 71 |       list-style-type: none;
 72 |       border: 1px solid black;
 73 |       white-space: pre-wrap;
 74 |     }
 75 | 
 76 |   </style>
 77 | </head>
 78 | 
 79 | <body>
 80 |   <form>
 81 |     <div>
 82 |       <p id="input_label">Input text to predict:</p>
 83 |     </div>
 84 |     <div>
 85 |       <input type="text" id="input_text">
 86 |     </div>
 87 |     <div>
 88 |       <button id="predict">Predict</button>
 89 |     </div>
 90 |     <div>
 91 |       <p id="result">Predict result...</p>
 92 |     </div>
 93 |   </form>
 94 |   <script>
 95 |     $(document).ready(function () {
 96 |       document.getElementById('input_text').focus();
 97 |       $('#predict').click(function (event) {
 98 |         event.preventDefault();
 99 |         var input = document.getElementById('input_text').value;
100 |         $.get({
101 |           url: 'predict?input=' + input,
102 |           cache: false,
103 |           contentType: false,
104 |           processData: false,
105 |           success: function (resp) {
106 |             $('#result').html(JSON.stringify(resp, null, '  '));
107 |           },
108 |         });
109 |       });
110 |     });
111 | 
112 |   </script>
113 | </body>
114 | 
115 | </html>
116 | 


--------------------------------------------------------------------------------
/packages/cli/src/constants/index.test.ts:
--------------------------------------------------------------------------------
 1 | import test from 'ava';
 2 | import * as path from 'path';
 3 | import { homedir } from 'os';
 4 | import * as constants from './';
 5 | 
 6 | test('constants of cli', (t) => {
 7 |   t.is(constants.PIPCOOK_FRAMEWORK_MIRROR_BASE, 'https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/');
 8 |   t.is(constants.FrameworkDescFilename, 'framework.json');
 9 |   t.is(constants.PIPCOOK_PLUGIN_ARTIFACT_PATH, path.join(constants.PIPCOOK_HOME_PATH, 'artifact'));
10 |   t.is(constants.PIPCOOK_HOME_PATH, path.join(homedir(), '.pipcook'));
11 |   t.is(constants.PIPCOOK_TMPDIR, path.join(constants.PIPCOOK_HOME_PATH, '.tmp'));
12 |   t.is(constants.PIPCOOK_FRAMEWORK_PATH, path.join(constants.PIPCOOK_HOME_PATH, 'framework'));
13 |   t.is(constants.PIPCOOK_SCRIPT_PATH, path.join(constants.PIPCOOK_HOME_PATH, 'script'));
14 | });
15 | 


--------------------------------------------------------------------------------
/packages/cli/src/constants/index.ts:
--------------------------------------------------------------------------------
 1 | import { homedir } from 'os';
 2 | import { join } from 'path';
 3 | 
 4 | /**
 5 |  * Pipcook home directory.
 6 |  */
 7 | export const PIPCOOK_HOME_PATH = join(homedir(), '.pipcook');
 8 | 
 9 | /**
10 |  * Pipcook temp directory
11 |  */
12 | export const PIPCOOK_TMPDIR = join(PIPCOOK_HOME_PATH, '.tmp');
13 | 
14 | /**
15 |  * pipcook framework cache
16 |  */
17 | export const PIPCOOK_FRAMEWORK_PATH = join(PIPCOOK_HOME_PATH, 'framework');
18 | 
19 | /**
20 |  * pipcook script cache
21 |  */
22 | export const PIPCOOK_SCRIPT_PATH = join(PIPCOOK_HOME_PATH, 'script');
23 | 
24 | /**
25 |  * pipcook artifact plugin path
26 |  */
27 | export const PIPCOOK_PLUGIN_ARTIFACT_PATH = join(PIPCOOK_HOME_PATH, 'artifact');
28 | 
29 | /**
30 |  * pipcook framework mirror url
31 |  */
32 | export const PIPCOOK_FRAMEWORK_MIRROR_BASE = 'https://pipcook-cloud.oss-cn-hangzhou.aliyuncs.com/framework/';
33 | 
34 | /**
35 |  * framework description file name, which is located in the root directory of the framework package
36 |  */
37 | export const FrameworkDescFilename = 'framework.json';
38 | 
39 | /**
40 |  * framework description filename, which is located in the root directory of the framework package
41 |  */
42 | export const JSDescFilename = 'package.json';
43 | 
44 | /**
45 |  * Node module directory name in the framework directory.
46 |  */
47 | export const JSModuleDirName = 'node_modules';
48 | 
49 | /**
50 |  * Pipeline file name in the model directory.
51 |  */
52 | export const PipelineFileInModelDir = 'pipeline.json';
53 | 
54 | /**
55 |  * Model directory name in the workspace directory.
56 |  */
57 | export const WorkspaceModelDir = 'model';
58 | 
59 | /**
60 |  * Script directory name in the workspace directory.
61 |  */
62 | export const WorkspaceScriptDir = 'scripts';
63 | 
64 | /**
65 |  * Framework directory name in the workspace directory.
66 |  */
67 | export const WorkspaceFrameworkDir = 'framework';
68 | 
69 | /**
70 |  * Data directory name in the workspace directory.
71 |  */
72 | export const WorkspaceDataDir = 'data';
73 | 
74 | export const WorkspaceCacheDir = 'cache';
75 | 


--------------------------------------------------------------------------------
/packages/cli/src/standalone-impl.test.ts:
--------------------------------------------------------------------------------
 1 | import test from 'ava';
 2 | import * as sinon from 'sinon';
 3 | import * as fs from 'fs-extra';
 4 | import { createStandaloneRT } from './standalone-impl';
 5 | 
 6 | test.serial.afterEach(() => sinon.restore());
 7 | 
 8 | console.log('standalone-impl test start');
 9 | 
10 | test('create standalone runtime', (t) => {
11 |   const mockDataSourceApi: any = {};
12 |   const rt = createStandaloneRT(mockDataSourceApi, '/tmp');
13 |   t.is((rt as any).dataset, mockDataSourceApi, 'datasource should equal');
14 |   t.is((rt as any).modelDir, '/tmp', 'datasource should equal');
15 | });
16 | 
17 | test.serial('runtime interface', async (t) => {
18 |   const mockDataSourceApi: any = {};
19 |   const rt = createStandaloneRT(mockDataSourceApi, '/tmp');
20 |   const stubLog = sinon.stub(console, 'log');
21 |   await rt.notifyProgress({ value: 10, extendData: {} });
22 |   t.true(stubLog.calledOnce, 'console.log should be called once');
23 |   t.is(await rt.readModel(), '/tmp', 'readModel should be correct');
24 | });
25 | 
26 | test.serial('runtime save model with path', async (t) => {
27 |   const mockDataSourceApi: any = {};
28 |   const stubCopy = sinon.stub(fs, 'copy').resolves();
29 |   const rt = createStandaloneRT(mockDataSourceApi, '/tmp');
30 |   await rt.saveModel('/tmp/file.json');
31 |   t.false(stubCopy.called, 'copy should not be called');
32 |   await rt.saveModel('/data/file.json');
33 |   t.true(stubCopy.calledOnce, 'copy should be called once');
34 |   t.deepEqual(stubCopy.args[0], [ '/data/file.json', '/tmp' ] as any, 'should copy to the correct path');
35 | });
36 | 
37 | console.log('standalone-impl test end');
38 | 


--------------------------------------------------------------------------------
/packages/cli/src/standalone-impl.ts:
--------------------------------------------------------------------------------
 1 | import { ProgressInfo } from '@pipcook/core';
 2 | import { pipelineAsync } from './utils';
 3 | import { DefaultDataSet, DefaultRuntime } from '@pipcook/costa';
 4 | import * as fs from 'fs-extra';
 5 | import * as path from 'path';
 6 | 
 7 | export class StandaloneImpl implements DefaultRuntime {
 8 |   constructor(
 9 |     public dataset: DefaultDataSet,
10 |     private modelDir: string
11 |   ) {}
12 | 
13 |   async notifyProgress(progress: ProgressInfo): Promise<void> {
14 |     console.log(`progress: ${progress.value}%`);
15 |   }
16 | 
17 |   async saveModel(localPathOrStream: string | NodeJS.ReadableStream, filename: 'model'): Promise<void> {
18 |     if (typeof localPathOrStream === 'string') {
19 |       if (path.parse(localPathOrStream).dir === this.modelDir || this.modelDir === path.resolve(localPathOrStream)) {
20 |         return;
21 |       }
22 |       return fs.copy(localPathOrStream, this.modelDir);
23 |     } else {
24 |       const modelStream = fs.createWriteStream(path.join(this.modelDir, filename));
25 |       return pipelineAsync(localPathOrStream, modelStream);
26 |     }
27 |   }
28 | 
29 |   async readModel(): Promise<string> {
30 |     return this.modelDir;
31 |   }
32 | }
33 | 
34 | export const createStandaloneRT = (
35 |   datasource: DefaultDataSet,
36 |   modelDir: string
37 | ): DefaultRuntime => {
38 |   return new StandaloneImpl(datasource, modelDir);
39 | };
40 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/cache.test.ts:
--------------------------------------------------------------------------------
  1 | import test from 'ava';
  2 | import * as sinon from 'sinon';
  3 | import * as fs from 'fs-extra';
  4 | import { fetchWithCache } from './cache';
  5 | import * as utils from '.';
  6 | 
  7 | test.serial.afterEach(() => sinon.restore());
  8 | 
  9 | test.serial('fetch with cache', async (t) => {
 10 |   const cacheDir = '.cache';
 11 |   const url = 'url';
 12 |   const target = 'target';
 13 | 
 14 |   const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves();
 15 |   const stubRemove = sinon.stub(fs, 'remove').resolves();
 16 |   const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true);
 17 |   const stubCopy = sinon.stub(fs, 'copy').resolves();
 18 | 
 19 |   await fetchWithCache(cacheDir, url, target, true, true);
 20 | 
 21 |   t.false(stubDownloadAndExtractTo.called, 'downloadAndExtractTo function should not called.');
 22 |   t.true(stubRemove.calledOnce, 'fs.remove function should called once.');
 23 |   t.true(stubPathExists.calledOnce, 'fs.pathExists function should called once.');
 24 |   t.true(stubCopy.called, 'fs.copy function should called.');
 25 | });
 26 | 
 27 | test.serial('fetch with cache and link', async (t) => {
 28 |   const cacheDir = '.cache';
 29 |   const url = 'url';
 30 |   const target = 'target';
 31 | 
 32 |   const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves();
 33 |   const stubRemove = sinon.stub(fs, 'remove').resolves();
 34 |   const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true);
 35 |   const stubSymlink = sinon.stub(fs, 'symlink').resolves();
 36 | 
 37 |   await fetchWithCache(cacheDir, url, target, true, false);
 38 | 
 39 |   t.false(stubDownloadAndExtractTo.called, 'downloadAndExtractTo function should not called.');
 40 |   t.true(stubRemove.calledOnce, 'fs.remove function should called once.');
 41 |   t.true(stubPathExists.calledOnce, 'fs.pathExists function should called once.');
 42 |   t.true(stubSymlink.called, 'fs.symlink function should called.');
 43 | });
 44 | 
 45 | test.serial('fetch with missed cache', async (t) => {
 46 |   const cacheDir = '.cache';
 47 |   const url = 'url';
 48 |   const target = 'target';
 49 | 
 50 |   const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves();
 51 |   const stubRemove = sinon.stub(fs, 'remove').resolves();
 52 |   const stubMove = sinon.stub(fs, 'move').resolves();
 53 |   const stubPathExists = sinon.stub(fs, 'pathExists').resolves(false);
 54 |   const stubCopy = sinon.stub(fs, 'copy').resolves();
 55 | 
 56 |   await fetchWithCache(cacheDir, url, target, true, true);
 57 | 
 58 |   t.true(stubDownloadAndExtractTo.calledOnce, 'downloadAndExtractTo function should called once.');
 59 |   t.true(stubRemove.calledThrice, 'fs.remove function should three times.');
 60 |   t.true(stubMove.calledOnce, 'fs.move function should called once.');
 61 |   t.true(stubPathExists.calledOnce, 'fs.pathExists function should called once.');
 62 |   t.true(stubCopy.calledOnce, 'fs.copy function should called once.');
 63 | });
 64 | 
 65 | test.serial('fetch with disabled cache', async (t) => {
 66 |   const cacheDir = '.cache';
 67 |   const url = 'url';
 68 |   const target = 'target';
 69 | 
 70 |   const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves();
 71 |   const stubRemove = sinon.stub(fs, 'remove').resolves();
 72 |   const stubMove = sinon.stub(fs, 'move').resolves();
 73 |   const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true);
 74 |   const stubCopy = sinon.stub(fs, 'copy').resolves();
 75 | 
 76 |   await fetchWithCache(cacheDir, url, target, false, true);
 77 | 
 78 |   t.true(stubDownloadAndExtractTo.calledOnce, 'downloadAndExtractTo function should called once.');
 79 |   t.true(stubRemove.calledThrice, 'fs.remove function should three times.');
 80 |   t.true(stubMove.calledOnce, 'fs.move function should called once.');
 81 |   t.false(stubPathExists.called, 'fs.pathExists function should not called once.');
 82 |   t.true(stubCopy.calledOnce, 'fs.copy function should called once.');
 83 | });
 84 | 
 85 | test.serial('fetch with disabled cache with link', async (t) => {
 86 |   const cacheDir = '.cache';
 87 |   const url = 'url';
 88 |   const target = 'target';
 89 | 
 90 |   const stubDownloadAndExtractTo = sinon.stub(utils, 'downloadAndExtractTo').resolves();
 91 |   const stubRemove = sinon.stub(fs, 'remove').resolves();
 92 |   const stubMove = sinon.stub(fs, 'move').resolves();
 93 |   const stubPathExists = sinon.stub(fs, 'pathExists').resolves(true);
 94 |   const stubSymlink = sinon.stub(fs, 'symlink').resolves();
 95 | 
 96 |   await fetchWithCache(cacheDir, url, target, false, false);
 97 | 
 98 |   t.true(stubDownloadAndExtractTo.calledOnce, 'downloadAndExtractTo function should called once.');
 99 |   t.true(stubRemove.calledThrice, 'fs.remove function should three times.');
100 |   t.true(stubMove.calledOnce, 'fs.move function should called once.');
101 |   t.false(stubPathExists.called, 'fs.pathExists function should not called once.');
102 |   t.true(stubSymlink.calledOnce, 'fs.symlink function should called once.');
103 | });
104 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/cache.ts:
--------------------------------------------------------------------------------
 1 | import * as crypto from 'crypto';
 2 | import * as fs from 'fs-extra';
 3 | import * as path from 'path';
 4 | import Debug from 'debug';
 5 | import { downloadAndExtractTo } from '.';
 6 | const debug = Debug('cache');
 7 | 
 8 | /**
 9 |  * if the file or directory exists in cache, link to target, or fetch and cache it
10 |  * @param cacheDir cache directory
11 |  * @param url url to fetch
12 |  * @param target target path
13 |  * @param enableCache is cache enabled
14 |  */
15 | export const fetchWithCache = async (cacheDir: string, url: string, target: string, enableCache: boolean, isCopy = false): Promise<void> => {
16 |   const md5 = crypto.createHash('md5').update(url).digest('hex');
17 |   const cachePath = path.join(cacheDir, md5);
18 |   const cacheTmpPath = path.join(cacheDir, 'tmp', md5);
19 |   debug('search cache from', cachePath);
20 |   await fs.remove(target);
21 |   if (enableCache) {
22 |     if (await fs.pathExists(cachePath)) {
23 |       return await isCopy ? fs.copy(cachePath, target) : fs.symlink(cachePath, target);
24 |     }
25 |     debug('cache missed');
26 |   }
27 |   await fs.remove(cachePath);
28 |   await fs.remove(cacheTmpPath);
29 |   debug('download from url', url);
30 |   await downloadAndExtractTo(url, cacheTmpPath);
31 |   debug('move tmp file to cache');
32 |   await fs.move(cacheTmpPath, cachePath);
33 |   debug(`copy/link ${cachePath} to ${target}`);
34 |   return isCopy ? fs.copy(cachePath, target) : fs.symlink(cachePath, target);
35 | };
36 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/framework.test.ts:
--------------------------------------------------------------------------------
  1 | import test from 'ava';
  2 | import * as sinon from 'sinon';
  3 | import * as fs from 'fs-extra';
  4 | import * as cache from './cache';
  5 | import { prepareFramework } from './framework';
  6 | import { PipcookFramework, PipelineMeta } from '@pipcook/costa';
  7 | import * as utils from './';
  8 | 
  9 | test.serial.afterEach(() => sinon.restore());
 10 | 
 11 | test.serial('prepare with invalid options', async (t) => {
 12 |   const pipelineMeta: PipelineMeta = {
 13 |     specVersion: 'test',
 14 |     datasource: 'test',
 15 |     dataflow: [ 'test' ],
 16 |     model: 'test',
 17 |     artifact: [],
 18 |     options: {}
 19 |   };
 20 |   const frameworkDir = 'test';
 21 | 
 22 |   const stubFetchWithCache = sinon.stub(cache, 'fetchWithCache').resolves();
 23 |   const stubReadJson = sinon.stub(fs, 'readJson').resolves();
 24 | 
 25 |   const ret = await prepareFramework(pipelineMeta, frameworkDir, '');
 26 | 
 27 |   t.false(stubFetchWithCache.called);
 28 |   t.false(stubReadJson.called);
 29 |   t.is(ret, undefined);
 30 | });
 31 | 
 32 | test.serial('prepare with file protocol and zip extname', async (t) => {
 33 |   const pipelineMeta: PipelineMeta = {
 34 |     specVersion: 'test',
 35 |     datasource: 'test',
 36 |     dataflow: [ 'test' ],
 37 |     model: 'test',
 38 |     artifact: [],
 39 |     options: {
 40 |       framework: 'file:///data/a.zip'
 41 |     }
 42 |   };
 43 |   const frameworkDir = 'test';
 44 | 
 45 |   const stubUnzipData = sinon.stub(utils, 'unZipData').resolves();
 46 |   const stubReadJson = sinon.stub(fs, 'readJson').resolves({ mock: 'value' });
 47 | 
 48 |   const ret = await prepareFramework(pipelineMeta, frameworkDir, '');
 49 | 
 50 |   t.true(stubReadJson.calledOnce, 'readJson should be called once');
 51 |   t.true(stubUnzipData.calledOnce, 'unzip should be called once');
 52 |   t.deepEqual(ret, { mock: 'value', path: frameworkDir } as any);
 53 | });
 54 | 
 55 | test.serial('prepare with file protocol and no-zip extname', async (t) => {
 56 |   const pipelineMeta: PipelineMeta = {
 57 |     specVersion: 'test',
 58 |     datasource: 'test',
 59 |     dataflow: [ 'test' ],
 60 |     model: 'test',
 61 |     artifact: [],
 62 |     options: {
 63 |       framework: 'file:///data/a'
 64 |     }
 65 |   };
 66 |   const frameworkDir = 'test';
 67 | 
 68 |   const stubCopy = sinon.stub(fs, 'copy').resolves();
 69 |   const stubReadJson = sinon.stub(fs, 'readJson').resolves({ mock: 'value' });
 70 | 
 71 |   const ret = await prepareFramework(pipelineMeta, frameworkDir, 'http://a.b.c/');
 72 | 
 73 |   t.true(stubReadJson.calledOnce, 'readJson should be called once');
 74 |   t.true(stubCopy.calledOnce, 'unzip should be called once');
 75 |   t.deepEqual(ret, { mock: 'value', path: frameworkDir } as any);
 76 | });
 77 | 
 78 | test.serial('prepare with valid options', async (t) => {
 79 |   const pipelineMeta: PipelineMeta = {
 80 |     specVersion: 'test',
 81 |     datasource: 'test',
 82 |     dataflow: [ 'test' ],
 83 |     model: 'test',
 84 |     artifact: [],
 85 |     options: {
 86 |       framework: 'test'
 87 |     }
 88 |   };
 89 | 
 90 |   const framework: PipcookFramework = {
 91 |     path : 'test',
 92 |     name: 'test',
 93 |     desc: 'test',
 94 |     version: 'test',
 95 |     arch: 'test',
 96 |     platform: 'test',
 97 |     pythonVersion: 'test',
 98 |     nodeVersion: 'test',
 99 |     napiVersion: 7,
100 |     pythonPackagePath: 'test',
101 |     jsPackagePath: 'test'
102 |   };
103 | 
104 |   const frameworkDir = 'test';
105 | 
106 |   const stubFetchWithCache = sinon.stub(cache, 'fetchWithCache').resolves();
107 |   const stubReadJson = sinon.stub(fs, 'readJson').resolves(framework);
108 | 
109 |   const ret = await prepareFramework(pipelineMeta, frameworkDir, '');
110 | 
111 |   const expectedRet = {
112 |     ...framework,
113 |     path: frameworkDir
114 |   };
115 | 
116 |   t.true(stubFetchWithCache.calledOnce);
117 |   t.true(stubReadJson.calledOnce);
118 |   t.deepEqual(ret, expectedRet);
119 | });
120 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/framework.ts:
--------------------------------------------------------------------------------
 1 | import { fetchWithCache } from './cache';
 2 | import * as fs from 'fs-extra';
 3 | import * as path from 'path';
 4 | import * as url from 'url';
 5 | import { PipelineMeta, PipcookFramework } from '@pipcook/costa';
 6 | import * as constants from '../constants';
 7 | import { mirrorUrl, DownloadProtocol, unZipData } from './';
 8 | 
 9 | export const prepareFramework = async (
10 |   pipelineMeta: PipelineMeta,
11 |   frameworkDir: string,
12 |   mirror: string,
13 |   enableCache = true
14 | ): Promise<PipcookFramework> => {
15 |   if (pipelineMeta.options.framework) {
16 |     const urlObj = url.parse(pipelineMeta.options.framework);
17 |     if (urlObj.protocol === DownloadProtocol.FILE) {
18 |       if (path.extname(urlObj.path) === '.zip') {
19 |         await unZipData(urlObj.path, frameworkDir);
20 |       } else {
21 |         await fs.copy(urlObj.path, frameworkDir);
22 |       }
23 |     } else {
24 |       let realUrl = '';
25 |       if (urlObj.protocol === DownloadProtocol.HTTP || urlObj.protocol === DownloadProtocol.HTTPS) {
26 |         realUrl = pipelineMeta.options.framework;
27 |       } else {
28 |         realUrl = mirrorUrl(mirror, pipelineMeta.options.framework);
29 |       }
30 |       await fetchWithCache(
31 |         constants.PIPCOOK_FRAMEWORK_PATH,
32 |         realUrl,
33 |         frameworkDir,
34 |         enableCache
35 |       );
36 |     }
37 |     const framework = await fs.readJson(path.join(frameworkDir, constants.FrameworkDescFilename));
38 |     // todo: validate framework
39 |     return {
40 |       ...framework,
41 |       path: frameworkDir
42 |     };
43 |   }
44 | };
45 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/plugin.ts:
--------------------------------------------------------------------------------
 1 | import * as fs from 'fs-extra';
 2 | import * as path from 'path';
 3 | import { ArtifactExports } from '@pipcook/core';
 4 | import { PipelineMeta } from '@pipcook/costa';
 5 | import * as constants from '../constants';
 6 | import { execAsync } from './';
 7 | 
 8 | export interface ArtifactMeta {
 9 |   artifactExports: ArtifactExports;
10 |   options: Record<string, any>;
11 | }
12 | 
13 | export interface PluginVersion {
14 |   name: string,
15 |   version: string
16 | }
17 | 
18 | /**
19 |  * extract the verison in the name expression, return empty string if no version found
20 |  * @param name package name with semver
21 |  */
22 | export const extractVersion = (name: string): PluginVersion => {
23 |   let n = name.length;
24 |   while (n-- > 0) {
25 |     if (name[n] === '/') {
26 |       break;
27 |     } else if (name[n] === '@') {
28 |       return {
29 |         name: name.substr(0, n),
30 |         version: name.substr(n + 1)
31 |       };
32 |     }
33 |   }
34 |   return { name, version: 'latest' };
35 | };
36 | 
37 | /**
38 |  * install plugin
39 |  * @param name package name: pipcook-ali-oss-uploader or pipcook-ali-oss-uploader@0.0.1
40 |  * @param pluginHomeDir plugin home directory
41 |  */
42 | export const install = async (name: string, pluginHomeDir: string, npmClient: string, registry?: string): Promise<string> => {
43 |   if (!await fs.pathExists(pluginHomeDir)) {
44 |     await fs.mkdirp(pluginHomeDir);
45 |   }
46 |   const pluginVersion = extractVersion(name);
47 |   const alias = `${pluginVersion.name}-${pluginVersion.version}`;
48 |   const requirePath = path.join(pluginHomeDir, 'node_modules', alias);
49 |   // always update plugin if version is 'beta', 'alpha' or 'latest'
50 |   if ([ 'beta', 'alpha', 'latest' ].includes(pluginVersion.version) || !(await fs.pathExists(requirePath))) {
51 |     await execAsync(
52 |       `${npmClient} install ${alias}@npm:${name} -P --save${ registry ? ' --registry=' + registry : '' }`,
53 |       { cwd: pluginHomeDir }
54 |     );
55 |   }
56 |   return requirePath;
57 | };
58 | 
59 | export const prepareArtifactPlugin = async (pipelineMeta: PipelineMeta, npmClient: string, registry?: string): Promise<Array<ArtifactMeta>> => {
60 |   if (
61 |     !pipelineMeta.artifact ||
62 |     (Array.isArray(pipelineMeta.artifact) && pipelineMeta.artifact.length === 0)
63 |   ) {
64 |     return [];
65 |   }
66 |   const allPlugins: Array<ArtifactMeta> = [];
67 |   for (const plugin of pipelineMeta.artifact) {
68 |     const requirePath = await install(plugin.processor, constants.PIPCOOK_PLUGIN_ARTIFACT_PATH, npmClient, registry);
69 |     let pluginExport: ArtifactExports = await import(requirePath);
70 |     if (
71 |       typeof pluginExport.initialize !== 'function'
72 |       || typeof pluginExport.build !== 'function'
73 |     ) {
74 |       if (
75 |         (pluginExport as any).default
76 |         && typeof (pluginExport as any).default.initialize === 'function'
77 |         && typeof (pluginExport as any).default.build === 'function'
78 |       ) {
79 |         pluginExport = (pluginExport as any).default;
80 |       } else {
81 |         throw new TypeError(`${plugin} is not a valid artifact plugin`);
82 |       }
83 |     }
84 |     await pluginExport.initialize(plugin);
85 |     allPlugins.push({
86 |       artifactExports: pluginExport,
87 |       options: plugin
88 |     });
89 |   }
90 |   return allPlugins;
91 | };
92 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/post-predict.test.ts:
--------------------------------------------------------------------------------
 1 | import test from 'ava';
 2 | import * as sinon from 'sinon';
 3 | import * as fs from 'fs-extra';
 4 | import { PipelineType } from '@pipcook/costa';
 5 | import { processData } from './post-predict';
 6 | import * as Jimp from 'jimp';
 7 | 
 8 | test.serial.afterEach(() => sinon.restore());
 9 | 
10 | test.serial('should process object detection', async (t) => {
11 |   const result = [
12 |     [
13 |       {
14 |         id: 1,
15 |         category: 'mock-category',
16 |         score: 0.5,
17 |         box: [ 0, 0, 15, 15 ]
18 |       }
19 |     ]
20 |   ];
21 |   const stubWrite = sinon.stub(Jimp.prototype, 'write').resolves();
22 |   await t.notThrowsAsync(processData(result, {
23 |     type: PipelineType.ObjectDetection,
24 |     inputs: [ __dirname + '/../../../../docs/images/logo.png' ]
25 |   }));
26 |   t.true(stubWrite.calledOnce, 'write should be called once');
27 | });
28 | 
29 | test.serial('should process object detection with buffer', async (t) => {
30 |   const result = [
31 |     [
32 |       {
33 |         id: 1,
34 |         category: 'mock-category',
35 |         score: 0.5,
36 |         box: [ 0, 0, 15, 15 ]
37 |       }
38 |     ]
39 |   ];
40 |   const stubWrite = sinon.stub(Jimp.prototype, 'write').resolves();
41 |   const buffer = await fs.readFile(__dirname + '/../../../../docs/images/logo.png');
42 |   await t.notThrowsAsync(processData(result, {
43 |     type: PipelineType.ObjectDetection,
44 |     inputs: [ buffer ]
45 |   }));
46 |   t.true(stubWrite.calledOnce, 'write should be called once');
47 | });
48 | 
49 | test.serial('should process object detection with buffer but args count not matched', async (t) => {
50 |   const result = [
51 |     [
52 |       {
53 |         id: 1,
54 |         category: 'mock-category',
55 |         score: 0.5,
56 |         box: [ -10, -10, 15, 15 ]
57 |       }
58 |     ]
59 |   ];
60 |   const buffer = await fs.readFile(__dirname + '/../../../../docs/images/logo.png');
61 |   await t.throwsAsync(processData(result, {
62 |     type: PipelineType.ObjectDetection,
63 |     inputs: [ buffer, buffer ]
64 |   }), { message: 'Size of predict result is not equal to inputs.' });
65 | });
66 | 
67 | test.serial('should process text classification', async (t) => {
68 |   const result = [
69 |     [
70 |       {
71 |         id: 1,
72 |         category: 'mock-category',
73 |         score: 0.5
74 |       }
75 |     ]
76 |   ];
77 |   await t.notThrowsAsync(processData(result, {
78 |     type: PipelineType.TextClassification,
79 |     inputs: [ 'mock input text' ]
80 |   }));
81 | });
82 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/post-predict.ts:
--------------------------------------------------------------------------------
 1 | import { PredictResult, DatasetPool } from '@pipcook/core';
 2 | import { PipelineType } from '@pipcook/costa';
 3 | import * as Jimp from 'jimp';
 4 | import { PredictInput } from './predict-dataset';
 5 | import * as path from 'path';
 6 | import { logger } from './';
 7 | 
 8 | export interface Options {
 9 |   type: PipelineType;
10 |   inputs: Array<PredictInput>;
11 |   [k: string]: any
12 | }
13 | 
14 | export async function processData(predictResult: PredictResult, opts: Options): Promise<void> {
15 |   logger.success(`Origin result:${JSON.stringify(predictResult)}`);
16 |   switch (opts.type) {
17 |   case PipelineType.ObjectDetection:
18 |     await processObjectDetection(predictResult, opts);
19 |     break;
20 |   default:
21 |     return;
22 |   }
23 | }
24 | 
25 | async function processObjectDetection(predictResult: DatasetPool.Types.ObjectDetection.PredictResult, opts: Options): Promise<void> {
26 |   if (predictResult.length !== opts.inputs.length) {
27 |     throw new TypeError('Size of predict result is not equal to inputs.');
28 |   }
29 |   for (let i = 0; i < opts.inputs.length; i++) {
30 |     let img: Jimp;
31 |     if (typeof opts.inputs[i] === 'string') {
32 |       img = await Jimp.read(opts.inputs[i] as string);
33 |     } else {
34 |       img = await Jimp.read(opts.inputs[i] as Buffer);
35 |     }
36 |     const font = await Jimp.loadFont(Jimp.FONT_SANS_16_BLACK);
37 |     predictResult[i].forEach((res: DatasetPool.Types.ObjectDetection.PredictObject) => {
38 |       const x = Math.round(res.box[0] < 0 ? 0 : res.box[0]);
39 |       const y = Math.round(res.box[1] < 0 ? 0 : res.box[1]);
40 |       const w = Math.round(res.box[0] < 0 ? res.box[2] - Math.abs(res.box[0]) : res.box[2]);
41 |       const h = Math.round(res.box[1] < 0 ? res.box[3] - Math.abs(res.box[1]) : res.box[3]);
42 |       // draw class name and score
43 |       img.print(font, x, y, `${res.category}:${res.score.toFixed(2)}`);
44 |       // draw box
45 |       for (let drawX = x; drawX <= x + w; ++drawX) {
46 |         img.setPixelColor(0xFF, drawX, y);
47 |         img.setPixelColor(0xFF, drawX, y + h);
48 |       }
49 |       for (let drawY = y; drawY <= y + h; ++drawY) {
50 |         img.setPixelColor(0xFF, x, drawY);
51 |         img.setPixelColor(0xFF, x + w, drawY);
52 |       }
53 |     });
54 |     await img.write(`${path.join(process.cwd(), `predict-result-${i}.png`)}`);
55 |   }
56 |   logger.success('Object detection result has been saved to:');
57 |   for (let i = 0; i < opts.inputs.length; ++i) {
58 |     logger.info(`predict-result-${i}.png`);
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/predict-databset.test.ts:
--------------------------------------------------------------------------------
 1 | import test from 'ava';
 2 | import * as sinon from 'sinon';
 3 | import { PipelineType } from '@pipcook/costa';
 4 | import { makePredictDataset } from './predict-dataset';
 5 | 
 6 | test.serial.afterEach(() => sinon.restore());
 7 | 
 8 | test('unsupported type', async (t) => {
 9 |   t.is(makePredictDataset([], -1 as any), null);
10 | });
11 | 
12 | test('make object detection dataset', async (t) => {
13 |   const dataset = makePredictDataset([ 's' ], PipelineType.ObjectDetection);
14 |   t.deepEqual(await dataset.predicted.next(), { data: { uri: 's' }, label: undefined });
15 | });
16 | 
17 | test('make object detection dataset from buffer', async (t) => {
18 |   const buffer = Buffer.from([ 1 ]);
19 |   const dataset = makePredictDataset([ buffer ], PipelineType.ObjectDetection);
20 |   t.deepEqual(await dataset.predicted.next(), { data: { buffer: buffer.buffer }, label: undefined });
21 | });
22 | 
23 | test('make image classification dataset', async (t) => {
24 |   const dataset = makePredictDataset([ 's' ], PipelineType.ImageClassification);
25 |   t.deepEqual(await dataset.predicted.next(), { data: { uri: 's' }, label: undefined });
26 | });
27 | 
28 | test('make image classification dataset from buffer', async (t) => {
29 |   const buffer = Buffer.from([ 1 ]);
30 |   const dataset = makePredictDataset([ buffer ], PipelineType.ImageClassification);
31 |   t.deepEqual(await dataset.predicted.next(), { data: { buffer: buffer.buffer }, label: undefined });
32 | });
33 | 
34 | test('make text classification dataset from string', async (t) => {
35 |   const dataset = makePredictDataset([ 'text1' ], PipelineType.TextClassification);
36 |   t.deepEqual(await dataset.predicted.next(), { data: 'text1', label: undefined });
37 | });
38 | 
39 | test('make text classification dataset from buffer', async (t) => {
40 |   const buffer = Buffer.from([ 1 ]);
41 |   t.throws(() => makePredictDataset([ buffer ], PipelineType.TextClassification), { message: 'Should input text for text classification.' });
42 | });
43 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/predict-dataset.ts:
--------------------------------------------------------------------------------
 1 | import { DatasetPool, DataCook } from '@pipcook/core';
 2 | import { PipelineType } from '@pipcook/costa';
 3 | /**
 4 |  * Data type for predict tasks
 5 |  */
 6 | export type PredictInput = string | Buffer;
 7 | 
 8 | export function makePredictDataset(inputs: Array<PredictInput>, pipelineType: PipelineType): DatasetPool.Types.DatasetPool<any, any> {
 9 |   let samples;
10 |   if (pipelineType === PipelineType.ObjectDetection) {
11 |     samples = inputs.map((input) => {
12 |       if (typeof input === 'string') {
13 |         return {
14 |           data: {
15 |             uri: input
16 |           },
17 |           label: undefined
18 |         } as DataCook.Dataset.Types.ObjectDetection.Sample;
19 |       } else {
20 |         return {
21 |           data: {
22 |             buffer: input.buffer
23 |           },
24 |           label: undefined
25 |         } as DataCook.Dataset.Types.ObjectDetection.Sample;
26 |       }
27 |     });
28 | 
29 |     const datasetData: DatasetPool.Types.DatasetData<DataCook.Dataset.Types.ObjectDetection.Sample> = {
30 |       predictedData: samples
31 |     };
32 |     return DatasetPool.ArrayDatasetPoolImpl.from(datasetData, { type: DataCook.Dataset.Types.DatasetType.Image });
33 |   } else if (pipelineType === PipelineType.ImageClassification) {
34 |     samples = inputs.map((input) => {
35 |       if (typeof input === 'string') {
36 |         return {
37 |           data: {
38 |             uri: input
39 |           },
40 |           label: undefined
41 |         } as DataCook.Dataset.Types.ImageClassification.Sample;
42 |       } else {
43 |         return {
44 |           data: {
45 |             buffer: input.buffer
46 |           },
47 |           label: undefined
48 |         } as DataCook.Dataset.Types.ImageClassification.Sample;
49 |       }
50 |     });
51 | 
52 |     const datasetData: DatasetPool.Types.DatasetData<DataCook.Dataset.Types.ImageClassification.Sample> = {
53 |       predictedData: samples
54 |     };
55 |     return DatasetPool.ArrayDatasetPoolImpl.from(datasetData, { type: DataCook.Dataset.Types.DatasetType.Image });
56 |   } else if (pipelineType === PipelineType.TextClassification) {
57 |     samples = inputs.map((input) => {
58 |       if (typeof input === 'string') {
59 |         return {
60 |           data: input,
61 |           label: undefined
62 |         } as DataCook.Dataset.Types.TextClassification.Sample;
63 |       } else {
64 |         throw new TypeError('Should input text for text classification.');
65 |       }
66 |     });
67 | 
68 |     const datasetData: DatasetPool.Types.DatasetData<DataCook.Dataset.Types.TextClassification.Sample> = {
69 |       predictedData: samples
70 |     };
71 |     return DatasetPool.ArrayDatasetPoolImpl.from(datasetData, { type: DataCook.Dataset.Types.DatasetType.Table });
72 |   } else {
73 |     return null;
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/script.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   PipelineMeta,
 3 |   ScriptConfig,
 4 |   ScriptType,
 5 |   PipcookScript
 6 | } from '@pipcook/costa';
 7 | import * as constants from '../constants';
 8 | import * as fs from 'fs-extra';
 9 | import * as path from 'path';
10 | import { parse } from 'url';
11 | import { fetchWithCache } from './cache';
12 | import * as queryString from 'query-string';
13 | import { DownloadProtocol } from './';
14 | 
15 | export const downloadScript = async (scriptDir: string, scriptOrder: number, url: string, type: ScriptType, enableCache: boolean, devMode: boolean): Promise<PipcookScript> => {
16 |   const urlObj = parse(url);
17 |   const baseName = path.parse(urlObj.pathname).base;
18 |   let localPath = path.join(scriptDir, `${scriptOrder}-${baseName}`);
19 |   const query = queryString.parse(urlObj.query);
20 |   // if the url is is file protocol, import it directly in development mode or copy it in normal mode.
21 |   if (urlObj.protocol === DownloadProtocol.FILE || urlObj.protocol === null) {
22 |     if (path.isAbsolute(urlObj.pathname)) {
23 |       if (devMode) {
24 |         localPath = urlObj.pathname;
25 |       } else {
26 |         await fs.copy(urlObj.pathname, localPath);
27 |       }
28 |     } else {
29 |       if (devMode) {
30 |         localPath = path.join(process.cwd(), urlObj.pathname);
31 |       } else {
32 |         await fs.copy(path.join(process.cwd(), urlObj.pathname), localPath);
33 |       }
34 |     }
35 |   } else {
36 |     if (urlObj.protocol === DownloadProtocol.HTTP || urlObj.protocol === DownloadProtocol.HTTPS) {
37 |       // maybe should copy the script with COW
38 |       await fetchWithCache(constants.PIPCOOK_SCRIPT_PATH, url, localPath, enableCache, true);
39 |     } else {
40 |       throw new TypeError(`unsupported protocol ${urlObj.protocol}`);
41 |     }
42 |   }
43 |   return {
44 |     name: baseName,
45 |     path: localPath,
46 |     type,
47 |     query
48 |   };
49 | };
50 | 
51 | export const prepareScript = async (pipelineMeta: PipelineMeta, scriptDir: string, enableCache = true, devMode = false): Promise<ScriptConfig> => {
52 |   const scripts: ScriptConfig = {
53 |     datasource: null,
54 |     dataflow: null,
55 |     model: null
56 |   };
57 |   let scriptOrder = 0;
58 |   scripts.datasource
59 |     = await downloadScript(scriptDir, scriptOrder, pipelineMeta.datasource, ScriptType.DataSource, enableCache, devMode);
60 |   scriptOrder++;
61 |   if (Array.isArray(pipelineMeta.dataflow) && pipelineMeta.dataflow.length > 0) {
62 |     scripts.dataflow = [];
63 |     for (const dataflowUri of pipelineMeta.dataflow) {
64 |       scripts.dataflow.push(await downloadScript(scriptDir, scriptOrder, dataflowUri, ScriptType.Dataflow, enableCache, devMode));
65 |       scriptOrder++;
66 |     }
67 |   }
68 |   scripts.model = await downloadScript(scriptDir, scriptOrder, pipelineMeta.model, ScriptType.Model, enableCache, devMode);
69 |   return scripts;
70 | };
71 | 
72 | export const linkCoreToScript = async (scriptModulePath: string): Promise<void> => {
73 |   const coreTargetPath = path.join(scriptModulePath, '@pipcook/core');
74 |   await fs.remove(path.join(coreTargetPath));
75 |   const coreScriptPath = require.resolve('@pipcook/core');
76 |   const coreDir = path.join('/core/');
77 |   const coreSourcePath = coreScriptPath.substr(0, coreScriptPath.lastIndexOf(coreDir) + coreDir.length - 1);
78 |   await fs.mkdirp(path.join(scriptModulePath, '@pipcook'));
79 |   await fs.symlink(coreSourcePath, coreTargetPath);
80 | };
81 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/serve-predict.test.ts:
--------------------------------------------------------------------------------
  1 | import test from 'ava';
  2 | import * as express from 'express';
  3 | import * as sinon from 'sinon';
  4 | import * as path from 'path';
  5 | import { stop, predictText, predictImage, serveText, serveImage, serve } from './serve-predict';
  6 | import { PipelineType } from '@pipcook/costa';
  7 | 
  8 | test.serial.afterEach(() => sinon.restore());
  9 | 
 10 | test.serial('serveText', async (t) => {
 11 |   const mockStatic: any = {};
 12 |   const stubStatic = sinon.stub(express, 'static').returns(mockStatic);
 13 |   const app = express();
 14 |   const stubUse = sinon.stub(app, 'use').returns(app);
 15 |   const stubGet = sinon.stub(app, 'get').returns(app);
 16 |   const mockCb = sinon.stub();
 17 |   serveText(app, mockCb);
 18 |   t.true(stubStatic.calledOnce);
 19 |   t.true(stubGet.calledOnce);
 20 |   t.true(stubUse.calledOnce);
 21 |   t.is(stubStatic.args[0][0], path.join(__dirname, '../../serve-resource/text'));
 22 | });
 23 | 
 24 | test.serial('serveImage', async (t) => {
 25 |   const mockStatic: any = {};
 26 |   const stubStatic = sinon.stub(express, 'static').returns(mockStatic);
 27 |   const app = express();
 28 |   const stubUse = sinon.stub(app, 'use').returns(app);
 29 |   const stubGet = sinon.stub(app, 'get').returns(app);
 30 |   const mockCb = sinon.stub();
 31 |   serveImage(app, mockCb);
 32 |   t.true(stubStatic.calledOnce);
 33 |   t.true(stubGet.calledOnce);
 34 |   t.true(stubUse.calledOnce);
 35 |   t.is(stubStatic.args[0][0], path.join(__dirname, '../../serve-resource/image'));
 36 | });
 37 | 
 38 | test.serial('predict image', async (t) => {
 39 |   const mockPredictResult = { mock: 'value' };
 40 |   const mockCb = sinon.stub().resolves(mockPredictResult);
 41 |   const req: any = {
 42 |     files: [ { buffer: Buffer.from([ 1, 2, 3 ]) }, { buffer: Buffer.from([ 2, 3, 4 ]) } ]
 43 |   };
 44 |   const resp: any = {
 45 |     json: sinon.stub()
 46 |   };
 47 |   await predictImage(mockCb, req, resp);
 48 |   t.true(mockCb.calledOnce);
 49 |   t.true(resp.json.calledOnce);
 50 |   t.deepEqual(resp.json.args[0][0], { success: true, data: mockPredictResult });
 51 | });
 52 | 
 53 | test.serial('predict image but no data', async (t) => {
 54 |   const message = 'no file available';
 55 |   const mockCb = sinon.stub();
 56 |   const req: any = {
 57 |     files: undefined
 58 |   };
 59 |   const resp: any = {
 60 |     json: sinon.stub()
 61 |   };
 62 |   await predictImage(mockCb, req, resp);
 63 |   t.false(mockCb.called);
 64 |   t.true(resp.json.calledOnce);
 65 |   t.deepEqual(resp.json.args[0][0], { success: false, message });
 66 | });
 67 | 
 68 | test.serial('predict text', async (t) => {
 69 |   const mockPredictResult = { mock: 'value' };
 70 |   const mockCb = sinon.stub().resolves(mockPredictResult);
 71 |   const req: any = {
 72 |     query: { input: [ 'input1', 'input2' ] }
 73 |   };
 74 |   const resp: any = {
 75 |     json: sinon.stub()
 76 |   };
 77 |   await predictText(mockCb, req, resp);
 78 |   t.true(mockCb.calledOnce);
 79 |   t.deepEqual(mockCb.args[0][0], req.query.input);
 80 |   t.true(resp.json.calledOnce);
 81 |   t.deepEqual(resp.json.args[0][0], { success: true, data: mockPredictResult });
 82 | });
 83 | 
 84 | test.serial('predict text but no input', async (t) => {
 85 |   const message = 'no input available';
 86 |   const mockCb = sinon.stub();
 87 |   const req: any = {
 88 |     query: {}
 89 |   };
 90 |   const resp: any = {
 91 |     json: sinon.stub()
 92 |   };
 93 |   await predictText(mockCb, req, resp);
 94 |   t.false(mockCb.called);
 95 |   t.true(resp.json.calledOnce);
 96 |   t.deepEqual(resp.json.args[0][0], { success: false, message });
 97 | });
 98 | 
 99 | test('serve predict but pipeline type is not valid', async (t) => {
100 |   const mockCb = sinon.stub();
101 |   await t.throwsAsync(
102 |     serve(1234, undefined, mockCb),
103 |     { message: 'Pipeline type is not supported: undefined' }
104 |   );
105 |   t.false(mockCb.called);
106 | });
107 | 
108 | test('start and stop', async (t) => {
109 |   const mockCb = sinon.stub();
110 |   await t.notThrowsAsync(serve(1234, PipelineType.TextClassification, mockCb));
111 |   await t.notThrowsAsync(stop());
112 | });
113 | 


--------------------------------------------------------------------------------
/packages/cli/src/utils/serve-predict.ts:
--------------------------------------------------------------------------------
  1 | import * as express from 'express';
  2 | import { Express, Request, Response } from 'express';
  3 | import * as multer from 'multer';
  4 | import * as path from 'path';
  5 | import * as http from 'http';
  6 | import { PipelineType } from '@pipcook/costa';
  7 | 
  8 | let server: http.Server;
  9 | const ServeMap = {
 10 |   [PipelineType.TextClassification]: serveText,
 11 |   [PipelineType.ImageClassification]: serveImage,
 12 |   [PipelineType.ObjectDetection]: serveImage
 13 | };
 14 | 
 15 | export type PredictCallBack
 16 |   = (input: Buffer[] | string[]) => Promise<Record<string, any>[]>;
 17 | 
 18 | /**
 19 |  * Serve model.
 20 |  * @param port listen port.
 21 |  * @param pipelineType pipeline type.
 22 |  * @param predictCallback callback for predict.
 23 |  */
 24 | export async function serve(
 25 |   port: number,
 26 |   pipelineType: PipelineType,
 27 |   predictCallback: PredictCallBack
 28 | ): Promise<void> {
 29 |   if (!ServeMap[pipelineType]) {
 30 |     throw new TypeError(`Pipeline type is not supported: ${pipelineType}`);
 31 |   }
 32 | 
 33 |   const app = express();
 34 |   ServeMap[pipelineType](app, predictCallback);
 35 |   return new Promise<void>((resolve) => {
 36 |     server = app.listen(port, () => {
 37 |       resolve();
 38 |     });
 39 |   });
 40 | }
 41 | 
 42 | export async function stop(): Promise<void> {
 43 |   if (server) {
 44 |     return new Promise<void>((resolve, reject) => {
 45 |       server.close((err?: Error) => {
 46 |         server = undefined;
 47 |         if (err) {
 48 |           reject(err);
 49 |         } else {
 50 |           resolve();
 51 |         }
 52 |       });
 53 |     });
 54 |   }
 55 | }
 56 | 
 57 | export async function predictText(
 58 |   predictCallback: PredictCallBack,
 59 |   req: Request,
 60 |   res: Response
 61 | ): Promise<void> {
 62 |   if (req.query && req.query['input']) {
 63 |     let inputs: string[];
 64 |     if (Array.isArray(req.query['input'])) {
 65 |       inputs = req.query['input'] as string[];
 66 |     } else if (typeof req.query['input'] === 'string') {
 67 |       inputs = [ req.query['input'] ];
 68 |     }
 69 |     const result = await predictCallback(inputs);
 70 |     res.json({ success: true, data: result });
 71 |   } else {
 72 |     res.json({ success: false, message: 'no input available' });
 73 |   }
 74 | }
 75 | 
 76 | export function serveText(
 77 |   app: Express,
 78 |   predictCallback: PredictCallBack
 79 | ): void {
 80 |   app.use(express.static(path.join(__dirname, '../../serve-resource/text')))
 81 |     .get('/predict', predictText.bind(this, predictCallback));
 82 | }
 83 | 
 84 | export async function predictImage(
 85 |   predictCallback: PredictCallBack,
 86 |   req: Request,
 87 |   res: Response
 88 | ): Promise<void> {
 89 |   let buf: Buffer[];
 90 |   if (Array.isArray(req.files)) {
 91 |     buf = (req.files as Express.Multer.File[]).map((file) => file.buffer);
 92 |   }
 93 | 
 94 |   if (buf) {
 95 |     const result = await predictCallback(buf);
 96 |     res.json({ success: true, data: result });
 97 |   } else {
 98 |     res.json({ success: false, message: 'no file available' });
 99 |   }
100 | }
101 | 
102 | export function serveImage(
103 |   app: Express,
104 |   predictCallback: PredictCallBack
105 | ): void {
106 |   const upload = multer({ storage: multer.memoryStorage() });
107 |   app.use(express.static(path.join(__dirname, '../../serve-resource/image')))
108 |     .post('/predict', upload.array('image'), predictImage.bind(this, predictCallback));
109 | }
110 | 


--------------------------------------------------------------------------------
/packages/cli/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "../../tsconfig.json",
 3 |   "compilerOptions": {
 4 |     "resolveJsonModule": true,
 5 |     "composite": false,
 6 |     "declaration": true,
 7 |     "outDir": "./dist",
 8 |     "rootDir": "./src"
 9 |   },
10 |   "exclude": [
11 |     "**/*.test.ts",
12 |     "dist"
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/packages/core/.gitignore:
--------------------------------------------------------------------------------
1 | /node_modules


--------------------------------------------------------------------------------
/packages/core/.npmignore:
--------------------------------------------------------------------------------
1 | .*
2 | tsconfig.*
3 | src
4 | coverage
5 | *.tgz
6 | 


--------------------------------------------------------------------------------
/packages/core/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipcook/core",
 3 |   "version": "2.1.4",
 4 |   "lockfileVersion": 1,
 5 |   "requires": true,
 6 |   "dependencies": {
 7 |     "typescript": {
 8 |       "version": "4.3.5",
 9 |       "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.3.5.tgz",
10 |       "integrity": "sha512-DqQgihaQ9cUrskJo9kIyW/+g0Vxsk8cDtZ52a3NGh0YNTfpUSArXSohyUGnvbPazEPLu398C0UxmKSOrPumUzA==",
11 |       "dev": true
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/packages/core/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipcook/core",
 3 |   "version": "2.1.5",
 4 |   "main": "dist/index",
 5 |   "types": "dist/index",
 6 |   "files": [
 7 |     "dist"
 8 |   ],
 9 |   "scripts": {
10 |     "test": "ava -v",
11 |     "build": "npm run clean && npm run compile",
12 |     "clean": "((rm -rf dist tsconfig.tsbuildinfo) || (rmdir /Q /S dist tsconfig.tsbuildinfo)) || echo 'nothing to clean'",
13 |     "compile": "tsc -b tsconfig.json",
14 |     "cov": "nyc -r text-summary npm run test",
15 |     "cov:report": "nyc report -r lcov"
16 |   },
17 |   "author": "",
18 |   "license": "Apache-2.0",
19 |   "dependencies": {
20 |     "@pipcook/datacook": "0.1.1",
21 |     "papaparse": "^5.3.1"
22 |   },
23 |   "devDependencies": {
24 |     "@types/node": "^14.6.0",
25 |     "@types/papaparse": "^5.2.6",
26 |     "@types/sinon": "^9.0.11",
27 |     "ava": "^3.13.0",
28 |     "nyc": "^15.1.0",
29 |     "sinon": "^10.0.0",
30 |     "typescript": "^4.3.5"
31 |   },
32 |   "publishConfig": {
33 |     "access": "public"
34 |   },
35 |   "keywords": [],
36 |   "description": "",
37 |   "ava": {
38 |     "extensions": [
39 |       "ts"
40 |     ],
41 |     "require": [
42 |       "ts-node/register"
43 |     ]
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/packages/core/src/artifact.ts:
--------------------------------------------------------------------------------
 1 | 
 2 | /**
 3 |  * The type for artifact plugin export. As a pipcook artifact plugin, we need to export
 4 |  * two function named `initialize` and `build`.
 5 |  */
 6 | export interface ArtifactExports {
 7 |   /**
 8 |    * `initialize` is called before the pipeline starting,
 9 |    * plugin can do initialization here, something like environment checking,
10 |    * login to the server, etc. The options are defined in the pipeline metadata, like:
11 |    * ```json
12 |    * {
13 |    *   artifacts:[{
14 |    *     processor: 'server-uploader',
15 |    *     options: {
16 |    *       targetUrl: 'http://os.alibaba.com/pipcook/model/'
17 |    *     }
18 |    *   }]
19 |    * }
20 |    * ```
21 |    * @param options the options for the plugin
22 |    */
23 |   initialize(options: Record<string, any>): Promise<void>;
24 | 
25 |   /**
26 |    * After the model being trained successfully, the function `build` will
27 |    * be called with the model directory and options.
28 |    * @param options the options for the plugin
29 |    */
30 |   build(modelDir: string, options: Record<string, any>): Promise<void>;
31 | }
32 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/format/coco.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { Types as DatasetTypes, ArrayDatasetPoolImpl } from '..';
 3 | 
 4 | export type Options = {
 5 |   trainAnnotationObj?: DataCook.Dataset.Types.Coco.Meta;
 6 |   testAnnotationObj?: DataCook.Dataset.Types.Coco.Meta;
 7 |   validAnnotationObj?: DataCook.Dataset.Types.Coco.Meta;
 8 |   predictedAnnotationObj?: DataCook.Dataset.Types.Coco.Meta;
 9 | };
10 | 
11 | export const makeDatasetPoolFromCocoFormat = async (
12 |   options: Options
13 | ): Promise<
14 |   DatasetTypes.DatasetPool<
15 |     DataCook.Dataset.Types.Sample<DataCook.Dataset.Types.Coco.Image, DataCook.Dataset.Types.Coco.Label>,
16 |     DatasetTypes.Coco.DatasetMeta
17 |   >
18 | > => {
19 |   const train = options.trainAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.trainAnnotationObj) : undefined;
20 |   const test = options.testAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.testAnnotationObj) : undefined;
21 |   const valid = options.validAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.validAnnotationObj) : undefined;
22 |   const predicted = options.predictedAnnotationObj ? DataCook.Dataset.makeDatasetFromCoco(options.predictedAnnotationObj) : undefined;
23 | 
24 |   const categories = options.trainAnnotationObj ? DataCook.Dataset.extractCategoriesFromCoco(options.trainAnnotationObj) : undefined;
25 | 
26 |   const datasetMeta: DatasetTypes.Coco.DatasetMeta = {
27 |     type: DataCook.Dataset.Types.DatasetType.Image,
28 |     size: {
29 |       train: (await train?.nextBatch(-1))?.length || 0,
30 |       test: (await test?.nextBatch(-1))?.length || 0,
31 |       valid: (await valid?.nextBatch(-1))?.length || 0,
32 |       predicted: (await predicted?.nextBatch(-1))?.length || 0
33 |     },
34 |     categories,
35 |     info: options.trainAnnotationObj?.info,
36 |     licenses: options.trainAnnotationObj?.licenses
37 |   };
38 |   await Promise.all([
39 |     train?.seek(0),
40 |     test?.seek(0),
41 |     valid?.seek(0),
42 |     predicted?.seek(0)
43 |   ]);
44 |   return ArrayDatasetPoolImpl.from({
45 |     train,
46 |     test,
47 |     valid,
48 |     predicted
49 |   }, datasetMeta);
50 | };
51 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/format/csv.test.ts:
--------------------------------------------------------------------------------
  1 | import test from 'ava';
  2 | import * as DataCook from '@pipcook/datacook';
  3 | import { Types } from '../';
  4 | import { makeDatasetPoolFromCsv } from './csv';
  5 | import Sample = DataCook.Dataset.Types.Sample;
  6 | 
  7 | const csvDataWithHead = 'A,B,C\n1,2,3\n4,5,6\n7,8,9';
  8 | const csvDataWithoutHead = '1,2,3\n4,5,6\n7,8,9';
  9 | 
 10 | const sample1: Sample = {
 11 |   data: {
 12 |     A: '1',
 13 |     B: '2'
 14 |   },
 15 |   label: {
 16 |     C: '3'
 17 |   }
 18 | };
 19 | 
 20 | const sample2: Sample = {
 21 |   data: {
 22 |     A: '4',
 23 |     B: '5'
 24 |   },
 25 |   label: {
 26 |     C: '6'
 27 |   }
 28 | };
 29 | 
 30 | const sample3: Sample = {
 31 |   data: {
 32 |     A: '7',
 33 |     B: '8'
 34 |   },
 35 |   label: {
 36 |     C: '9'
 37 |   }
 38 | };
 39 | 
 40 | const sampleNoHead1: Sample = {
 41 |   data: {
 42 |     '0': '1',
 43 |     '1': '2'
 44 |   },
 45 |   label: {
 46 |     '2': '3'
 47 |   }
 48 | };
 49 | 
 50 | const sampleNoHead2: Sample = {
 51 |   data: {
 52 |     '0': '4',
 53 |     '1': '5'
 54 |   },
 55 |   label: {
 56 |     '2': '6'
 57 |   }
 58 | };
 59 | 
 60 | const sampleNoHead3: Sample = {
 61 |   data: {
 62 |     '0': '7',
 63 |     '1': '8'
 64 |   },
 65 |   label: {
 66 |     '2': '9'
 67 |   }
 68 | };
 69 | 
 70 | test('should make a dataset from csv', async (t) => {
 71 |   const dataset = makeDatasetPoolFromCsv({
 72 |     trainData: csvDataWithHead,
 73 |     testData: csvDataWithHead,
 74 |     validData: undefined,
 75 |     hasHeader: true,
 76 |     labels: [ 'C' ]
 77 |   });
 78 | 
 79 |   const metadata: Types.Csv.DatasetMeta = {
 80 |     type: DataCook.Dataset.Types.DatasetType.Table,
 81 |     size: { train: 3, test: 3, valid: 0, predicted: 0 }
 82 |   };
 83 | 
 84 |   t.deepEqual(await dataset.getDatasetMeta(), metadata);
 85 |   t.deepEqual(await dataset.train?.next(), sample1);
 86 |   t.deepEqual(await dataset.test?.next(), sample1);
 87 |   t.deepEqual(await dataset.train?.nextBatch(2), [ sample2, sample3 ]);
 88 |   t.deepEqual(await dataset.train?.nextBatch(1), []);
 89 |   t.deepEqual(await dataset.test?.nextBatch(1), [ sample2 ]);
 90 | });
 91 | test('should make a dataset from csv with valid', async (t) => {
 92 |   const dataset = makeDatasetPoolFromCsv({
 93 |     trainData: csvDataWithHead,
 94 |     testData: csvDataWithHead,
 95 |     validData: csvDataWithHead,
 96 |     hasHeader: true,
 97 |     labels: [ 'C' ]
 98 |   });
 99 | 
100 |   const metadata: Types.Csv.DatasetMeta = {
101 |     type: DataCook.Dataset.Types.DatasetType.Table,
102 |     size: { train: 3, test: 3, valid: 3, predicted: 0 }
103 |   };
104 | 
105 |   t.deepEqual(await dataset.getDatasetMeta(), metadata);
106 |   t.deepEqual(await dataset.train?.next(), sample1);
107 |   t.deepEqual(await dataset.test?.next(), sample1);
108 |   t.deepEqual(await dataset.valid?.next(), sample1);
109 |   t.deepEqual(await dataset.train?.nextBatch(2), [ sample2, sample3 ]);
110 |   t.deepEqual(await dataset.test?.nextBatch(1), [ sample2 ]);
111 |   t.deepEqual(await dataset.valid?.nextBatch(1), [ sample2 ]);
112 | });
113 | 
114 | test('should make a dataset from csv without head', async (t) => {
115 |   const dataset = makeDatasetPoolFromCsv({
116 |     trainData: csvDataWithoutHead,
117 |     testData: csvDataWithoutHead,
118 |     validData: csvDataWithoutHead,
119 |     hasHeader: false,
120 |     labels: [ '2' ]
121 |   });
122 | 
123 |   const metadata: Types.Csv.DatasetMeta = {
124 |     type: DataCook.Dataset.Types.DatasetType.Table,
125 |     size: { train: 3, test: 3, valid: 3, predicted: 0 }
126 |   };
127 | 
128 |   t.deepEqual(await dataset.getDatasetMeta(), metadata);
129 |   t.deepEqual(await dataset.train?.next(), sampleNoHead1);
130 |   t.deepEqual(await dataset.test?.next(), sampleNoHead1);
131 |   t.deepEqual(await dataset.valid?.next(), sampleNoHead1);
132 |   t.deepEqual(await dataset.train?.nextBatch(2), [ sampleNoHead2, sampleNoHead3 ]);
133 |   t.deepEqual(await dataset.test?.nextBatch(1), [ sampleNoHead2 ]);
134 |   t.deepEqual(await dataset.valid?.nextBatch(1), [ sampleNoHead2 ]);
135 | });
136 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/format/csv.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import * as Papaparse from 'papaparse';
 3 | import { ArrayDatasetPoolImpl, Types } from '..';
 4 | import Csv = DataCook.Dataset.Types.Csv;
 5 | 
 6 | export interface Options {
 7 |   trainData?: string;
 8 |   testData?: string;
 9 |   validData?: string;
10 |   predictedData?: string;
11 |   hasHeader: boolean;
12 |   delimiter?: string;
13 |   labels?: string[];
14 | }
15 | 
16 | function toSamples(
17 |   parsedData: Papaparse.ParseResult<Record<string, string>>,
18 |   labelFields?: Array<string>
19 | ): Array<Csv.Sample> {
20 |   return parsedData.data.map((data) => {
21 |     const label: Record<string, string> = {};
22 |     const newData = { ...data };
23 |     labelFields?.forEach((field) => {
24 |       label[field] = newData[field];
25 |       delete newData[field];
26 |     });
27 |     return {
28 |       data: newData,
29 |       label
30 |     };
31 |   });
32 | }
33 | 
34 | export const makeDatasetPoolFromCsv = (options: Options): Types.DatasetPool<Csv.Sample, Types.Csv.DatasetMeta> => {
35 |   const config = {
36 |     header: options.hasHeader, delimiter: options.delimiter
37 |   };
38 |   const parsedTrainData = options.trainData ? Papaparse.parse<Record<string, string>>(options.trainData, config) : undefined;
39 |   const parsedTestData = options.testData ? Papaparse.parse<Record<string, string>>(options.testData, config) : undefined;
40 |   const parsedValidData = options.validData ? Papaparse.parse<Record<string, string>>(options.validData, config) : undefined;
41 |   const parsedPredictedData = options.predictedData ? Papaparse.parse<Record<string, string>>(options.predictedData, config) : undefined;
42 |   const data = {
43 |     trainData: parsedTrainData ? toSamples(parsedTrainData, options.labels) : undefined,
44 |     testData: parsedTestData ? toSamples(parsedTestData, options.labels) : undefined,
45 |     validData: parsedValidData ? toSamples(parsedValidData, options.labels) : undefined,
46 |     predictedData: parsedPredictedData ? toSamples(parsedPredictedData, options.labels) : undefined
47 |   };
48 |   const meta: Types.Csv.DatasetMeta = {
49 |     type: DataCook.Dataset.Types.DatasetType.Table,
50 |     size: {
51 |       train: data.trainData?.length || 0,
52 |       test: data.testData?.length || 0,
53 |       valid: data.validData?.length || 0,
54 |       predicted: data.predictedData?.length || 0
55 |     }
56 |   };
57 |   return ArrayDatasetPoolImpl.from(data, meta);
58 | };
59 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/format/index.ts:
--------------------------------------------------------------------------------
1 | export * as PascalVoc from './pascal-voc';
2 | export * as Coco from './coco';
3 | export * as Csv from './csv';
4 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/format/pascal-voc.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { ArrayDatasetPoolImpl, Types } from '../';
 3 | import DatasetType = DataCook.Dataset.Types.DatasetType;
 4 | import PascalVoc = DataCook.Dataset.Types.PascalVoc;
 5 | 
 6 | export interface Options {
 7 |   trainAnnotationList?: Array<PascalVoc.Annotation>;
 8 |   testAnnotationList?: Array<PascalVoc.Annotation>;
 9 |   validAnnotationList?: Array<PascalVoc.Annotation>;
10 |   predictedAnnotationList?: Array<PascalVoc.Annotation>;
11 | }
12 | 
13 | export const makeDatasetPoolFromPascalVoc = async (options: Options): Promise<Types.DatasetPool<PascalVoc.Sample, Types.PascalVoc.DatasetMeta>> => {
14 |   const train = options.trainAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.trainAnnotationList) : undefined;
15 |   const test = options.testAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.testAnnotationList) : undefined;
16 |   const valid = options.validAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.validAnnotationList) : undefined;
17 |   const predicted = options.predictedAnnotationList ? DataCook.Dataset.makeDatasetFromPascalVoc(options.predictedAnnotationList) : undefined;
18 |   const categories: Array<string> = options.trainAnnotationList ? DataCook.Dataset.extractCategoriesFromPascalVoc(options.trainAnnotationList) : [];
19 | 
20 |   const datasetMeta: Types.PascalVoc.DatasetMeta = {
21 |     type: DatasetType.Image,
22 |     size: {
23 |       train: (await train?.nextBatch(-1))?.length || 0,
24 |       test: (await test?.nextBatch(-1))?.length || 0,
25 |       valid: (await valid?.nextBatch(-1))?.length || 0,
26 |       predicted: (await predicted?.nextBatch(-1))?.length || 0
27 |     },
28 |     categories
29 |   };
30 |   await Promise.all([
31 |     train?.seek(0),
32 |     test?.seek(0),
33 |     valid?.seek(0),
34 |     predicted?.seek(0)
35 |   ]);
36 |   return ArrayDatasetPoolImpl.from({
37 |     train,
38 |     test,
39 |     valid,
40 |     predicted
41 |   }, datasetMeta);
42 | };
43 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/index.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import * as Types from './types';
 3 | 
 4 | import Dataset = DataCook.Dataset.Types.Dataset;
 5 | import Sample = DataCook.Dataset.Types.Sample;
 6 | import ArrayDatasetImpl = DataCook.Dataset.ArrayDatasetImpl;
 7 | 
 8 | export * from './pipeline-type';
 9 | export * from './format';
10 | export * as Types from './types';
11 | 
12 | function isDatasetData<SAMPLE extends Sample>(arg: Types.DatasetGroup<SAMPLE> | Types.DatasetData<SAMPLE>): arg is Types.DatasetGroup<SAMPLE> {
13 |   return (arg as any).train || (arg as any).test || (arg as any).predicted || (arg as any).valid;
14 | }
15 | function isTransformOption<
16 |   T extends Sample,
17 |   D extends Types.DatasetMeta,
18 |   TARGET_SAMPLE extends Sample,
19 |   TARGET_META extends Types.DatasetMeta
20 | >(arg: Types.TransformOption<T, D, TARGET_SAMPLE, TARGET_META> | ((sample: T) => Promise<TARGET_SAMPLE>)): arg is Types.TransformOption<T, D, TARGET_SAMPLE, TARGET_META> {
21 |   return typeof arg !== 'function';
22 | }
23 | 
24 | export class ArrayDatasetPoolImpl<T extends Sample, D extends Types.DatasetMeta> implements Types.DatasetPool<T, D> {
25 |   public meta?: D;
26 | 
27 |   public train?: Dataset<T>;
28 |   public test?: Dataset<T>;
29 |   public valid?: Dataset<T>;
30 |   public predicted?: Dataset<T>;
31 | 
32 |   static from<SAMPLE extends Sample, META extends Types.DatasetMeta>(datasetGrp: Types.DatasetGroup<SAMPLE>, datasetMeta?: META): ArrayDatasetPoolImpl<SAMPLE, META>;
33 |   static from<SAMPLE extends Sample, META extends Types.DatasetMeta>(datasetData: Types.DatasetData<SAMPLE>, datasetMeta?: META): ArrayDatasetPoolImpl<SAMPLE, META>;
34 |   static from<SAMPLE extends Sample, META extends Types.DatasetMeta>(datasetDataOrGrp: Types.DatasetGroup<SAMPLE> | Types.DatasetData<SAMPLE>, datasetMeta?: META): ArrayDatasetPoolImpl<SAMPLE, META> {
35 |     const obj = new ArrayDatasetPoolImpl<SAMPLE, META>();
36 |     obj.meta = datasetMeta;
37 |     if (isDatasetData(datasetDataOrGrp)) {
38 |       const datasetGrp = datasetDataOrGrp;
39 |       obj.train = datasetGrp.train;
40 |       obj.test = datasetGrp.test;
41 |       obj.valid = datasetGrp.valid;
42 |       obj.predicted = datasetGrp.predicted;
43 |     } else {
44 |       const datasetData = datasetDataOrGrp;
45 |       obj.train = datasetData.trainData ? new ArrayDatasetImpl(datasetData.trainData) : undefined;
46 |       obj.test = datasetData.testData ? new ArrayDatasetImpl(datasetData.testData) : undefined;
47 |       obj.valid = datasetData.validData ? new ArrayDatasetImpl(datasetData.validData) : undefined;
48 |       obj.predicted = datasetData.predictedData ? new ArrayDatasetImpl(datasetData.predictedData) : undefined;
49 |     }
50 |     return obj;
51 |   }
52 | 
53 |   async getDatasetMeta(): Promise<D | undefined> {
54 |     return this.meta;
55 |   }
56 | 
57 |   shuffle(): void {
58 |     this.train?.shuffle();
59 |     this.test?.shuffle();
60 |     this.valid?.shuffle();
61 |     this.predicted?.shuffle();
62 |   }
63 | 
64 |   transform<
65 |     TARGET_SAMPLE extends Sample
66 |   > (transformFun: (sample: T) => Promise<TARGET_SAMPLE>): Types.DatasetPool<TARGET_SAMPLE, D>;
67 |   transform<
68 |     TARGET_SAMPLE extends Sample,
69 |     TARGET_META extends Types.DatasetMeta = D
70 |   > (opts: Types.TransformOption<T, D, TARGET_SAMPLE, TARGET_META>): Types.DatasetPool<TARGET_SAMPLE, TARGET_META>;
71 |   transform<
72 |     TARGET_SAMPLE extends Sample,
73 |     TARGET_META extends Types.DatasetMeta = D
74 |   > (optsOrFun: Types.TransformOption<T, D, TARGET_SAMPLE, TARGET_META> | ((sample: T) => Promise<TARGET_SAMPLE>)): Types.DatasetPool<TARGET_SAMPLE, TARGET_META | D> {
75 |     if (isTransformOption(optsOrFun)) {
76 |       const { metadata, transform } = optsOrFun;
77 |       const newDatasetPool = ArrayDatasetPoolImpl.from<TARGET_SAMPLE, TARGET_META>({
78 |         train: this.train ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.train, transform) : undefined,
79 |         test: this.test ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.test, transform) : undefined,
80 |         valid: this.valid ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.valid, transform) : undefined,
81 |         predicted: this.predicted ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.predicted, transform) : undefined
82 |       });
83 |       const metaPromise = this.getDatasetMeta();
84 |       newDatasetPool.getDatasetMeta = async () => {
85 |         return metadata(await metaPromise);
86 |       };
87 |       return newDatasetPool;
88 |     } else {
89 |       return ArrayDatasetPoolImpl.from<TARGET_SAMPLE, D>({
90 |         train: this.train ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.train, optsOrFun) : undefined,
91 |         test: this.test ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.test, optsOrFun) : undefined,
92 |         valid: this.valid ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.valid, optsOrFun) : undefined,
93 |         predicted: this.predicted ? DataCook.Dataset.makeTransform<T, TARGET_SAMPLE>(this.predicted, optsOrFun) : undefined
94 |       }, this.meta);
95 |     }
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/pipeline-type/image-classification.test.ts:
--------------------------------------------------------------------------------
  1 | import test from 'ava';
  2 | import * as DataCook from '@pipcook/datacook';
  3 | import { makeImageClassificationDatasetFromList } from './image-classification';
  4 | 
  5 | test('make dataset pool from train and test list', async (t) => {
  6 |   const buffer1 = new ArrayBuffer(1);
  7 |   const buffer2 = new ArrayBuffer(2);
  8 |   const opts = {
  9 |     train: [
 10 |       {
 11 |         category: 'a',
 12 |         uri: 'uri mock'
 13 |       },
 14 |       {
 15 |         category: 'b',
 16 |         buffer: buffer1
 17 |       }
 18 |     ],
 19 |     test: [
 20 |       {
 21 |         category: 'b',
 22 |         uri: 'test uri mock'
 23 |       },
 24 |       {
 25 |         category: 'a',
 26 |         buffer: buffer2
 27 |       }
 28 |     ],
 29 |     valid: undefined,
 30 |     predicted: undefined
 31 |   };
 32 |   const dataset = makeImageClassificationDatasetFromList(opts);
 33 |   t.deepEqual(await dataset.getDatasetMeta(), {
 34 |     type: DataCook.Dataset.Types.DatasetType.Image,
 35 |     size: { train: 2, test: 2, valid: 0, predicted: 0 },
 36 |     categories: [ 'a', 'b' ]
 37 |   });
 38 |   t.truthy(dataset.train);
 39 |   t.truthy(dataset.test);
 40 |   t.falsy(dataset.valid);
 41 |   t.falsy(dataset.predicted);
 42 |   const sample1 = await dataset.train?.next();
 43 |   const sample2 = await dataset.train?.next();
 44 |   const sample3 = await dataset.train?.next();
 45 |   t.deepEqual(sample1, {
 46 |     data: { uri: 'uri mock', buffer: undefined },
 47 |     label: 'a'
 48 |   });
 49 |   t.deepEqual(sample2, {
 50 |     data: { buffer: buffer1, uri: undefined },
 51 |     label: 'b'
 52 |   });
 53 |   t.is(sample3, undefined);
 54 | });
 55 | 
 56 | test('make dataset pool from valid and predict list', async (t) => {
 57 |   const buffer1 = new ArrayBuffer(1);
 58 |   const buffer2 = new ArrayBuffer(2);
 59 |   const opts = {
 60 |     valid: [
 61 |       {
 62 |         category: 'a',
 63 |         uri: 'uri mock'
 64 |       },
 65 |       {
 66 |         category: 'b',
 67 |         buffer: buffer1
 68 |       }
 69 |     ],
 70 |     predicted: [
 71 |       {
 72 |         category: 'b',
 73 |         uri: 'test uri mock'
 74 |       },
 75 |       {
 76 |         category: 'a',
 77 |         buffer: buffer2
 78 |       }
 79 |     ],
 80 |     train: undefined,
 81 |     test: undefined
 82 |   };
 83 |   const dataset = makeImageClassificationDatasetFromList(opts);
 84 |   t.deepEqual(await dataset.getDatasetMeta(), {
 85 |     type: DataCook.Dataset.Types.DatasetType.Image,
 86 |     size: { train: 0, test: 0, valid: 2, predicted: 2 },
 87 |     categories: undefined
 88 |   });
 89 |   t.falsy(dataset.train);
 90 |   t.falsy(dataset.test);
 91 |   t.truthy(dataset.valid);
 92 |   t.truthy(dataset.predicted);
 93 |   const sample1 = await dataset.valid?.next();
 94 |   const sample2 = await dataset.valid?.next();
 95 |   const sample3 = await dataset.valid?.next();
 96 |   t.deepEqual(sample1, {
 97 |     data: { uri: 'uri mock', buffer: undefined },
 98 |     label: 'a'
 99 |   });
100 |   t.deepEqual(sample2, {
101 |     data: { buffer: buffer1, uri: undefined },
102 |     label: 'b'
103 |   });
104 |   t.is(sample3, undefined);
105 | });
106 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/pipeline-type/image-classification.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { Types, ArrayDatasetPoolImpl } from '../';
 3 | 
 4 | import ImageClassification = DataCook.Dataset.Types.ImageClassification;
 5 | import Sample = ImageClassification.Sample;
 6 | import DatasetMate = Types.ImageClassification.DatasetMeta;
 7 | 
 8 | export const makeImageClassificationDatasetFromList = (opts: Types.ImageClassification.Options): Types.ImageClassification.DatasetPool => {
 9 |   const categories: Set<string> = new Set();
10 |   if (opts.train) {
11 |     for (const data of opts.train) {
12 |       categories.add(data.category);
13 |     }
14 |   }
15 |   const meta: Types.ImageClassification.DatasetMeta = {
16 |     type: DataCook.Dataset.Types.DatasetType.Image,
17 |     size: {
18 |       train: opts.train ? opts.train.length : 0,
19 |       test: opts.test ? opts.test.length : 0,
20 |       valid: opts.valid ? opts.valid.length : 0,
21 |       predicted: opts.predicted ? opts.predicted.length : 0
22 |     },
23 |     categories: categories.size > 0 ? Array.from(categories) : undefined
24 |   };
25 |   return ArrayDatasetPoolImpl.from({
26 |     train: opts.train ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.train) : undefined,
27 |     test: opts.test ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.test) : undefined,
28 |     valid: opts.valid ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.valid) : undefined,
29 |     predicted: opts.predicted ? DataCook.Dataset.makeImageClassificationDatasetFromList(opts.predicted) : undefined
30 |   }, meta);
31 | };
32 | 
33 | 
34 | export const makeImageClassificationDataset = (
35 |   datasetData: Types.DatasetData<Sample>,
36 |   meta: DatasetMate
37 | ): Types.ImageClassification.DatasetPool => {
38 |   return ArrayDatasetPoolImpl.from(datasetData, meta);
39 | };
40 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/pipeline-type/index.ts:
--------------------------------------------------------------------------------
1 | export * from './object-detection';
2 | export * from './image-classification';
3 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/pipeline-type/object-detection.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { Coco as CocoDataset, PascalVoc as PascalVocDataset } from '../format';
 3 | import { ArrayDatasetPoolImpl, Types } from '../';
 4 | 
 5 | import Sample = DataCook.Dataset.Types.Sample;
 6 | import Coco = DataCook.Dataset.Types.Coco;
 7 | import PascalVoc = DataCook.Dataset.Types.PascalVoc;
 8 | import ObjectDetection = DataCook.Dataset.Types.ObjectDetection;
 9 | 
10 | export const makeObjectDetectionDatasetFromCoco = async (options: CocoDataset.Options): Promise<Types.ObjectDetection.DatasetPool> => {
11 |   const dataset = await CocoDataset.makeDatasetPoolFromCocoFormat(options);
12 |   const categoryFiner: Record<number, Coco.Category> = {};
13 |   const categorySet = new Set<string>();
14 |   (await dataset.getDatasetMeta())?.categories?.forEach((item) => {
15 |     categoryFiner[item.id] = item;
16 |     categorySet.add(item.name);
17 |   });
18 |   const categories = Array.from(categorySet);
19 |   return dataset.transform({
20 |     transform: async (sample: Sample<Coco.Image, Coco.Label>): Promise<ObjectDetection.Sample> => {
21 |       const newLabels = sample.label.map((lable) => {
22 |         return {
23 |           name: categoryFiner[lable.category_id].name,
24 |           bbox: lable.bbox
25 |         };
26 |       });
27 |       return {
28 |         data: { uri: sample.data.url || sample.data.coco_url || sample.data.flickr_url },
29 |         label: newLabels
30 |       };
31 |     },
32 |     metadata: async (meta: Types.Coco.DatasetMeta): Promise<Types.ObjectDetection.DatasetMeta> => {
33 |       return {
34 |         type: meta.type,
35 |         size: meta.size,
36 |         categories
37 |       };
38 |     }
39 |   });
40 | };
41 | 
42 | export const makeObjectDetectionDatasetFromPascalVoc = async (options: PascalVocDataset.Options): Promise<Types.ObjectDetection.DatasetPool> => {
43 |   return (await PascalVocDataset.makeDatasetPoolFromPascalVoc(options)).transform<ObjectDetection.Sample>(
44 |     async (sample: PascalVoc.Sample): Promise<ObjectDetection.Sample> => {
45 |       const newLabels: ObjectDetection.Label = sample.label.map((lable) => {
46 |         return {
47 |           name: lable.name,
48 |           bbox: [
49 |             lable.bndbox.xmin,
50 |             lable.bndbox.ymin,
51 |             lable.bndbox.xmax - lable.bndbox.xmin,
52 |             lable.bndbox.ymax - lable.bndbox.ymin
53 |           ]
54 |         };
55 |       });
56 |       return {
57 |         data: { uri: sample.data.annotation.path },
58 |         label: newLabels
59 |       };
60 |     }
61 |   );
62 | };
63 | 
64 | export const makeObjectDetectionDataset = (
65 |   datasetData: Types.DatasetData<ObjectDetection.Sample>,
66 |   meta: Types.ObjectDetection.DatasetMeta
67 | ): Types.ObjectDetection.DatasetPool => {
68 |   return ArrayDatasetPoolImpl.from(datasetData, meta);
69 | };
70 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/pipeline-type/text-classification.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { Types, ArrayDatasetPoolImpl } from '..';
 3 | 
 4 | import TextClassification = DataCook.Dataset.Types.TextClassification;
 5 | import Sample = TextClassification.Sample;
 6 | import DatasetMate = Types.TextClassification.DatasetMeta;
 7 | 
 8 | export const makeTextClassificationDatasetFromList = (opts: Types.TextClassification.Options): Types.TextClassification.DatasetPool => {
 9 |   const categories: Set<string> = new Set();
10 |   if (opts.train) {
11 |     for (const data of opts.train) {
12 |       categories.add(data.category);
13 |     }
14 |   }
15 |   const meta: Types.TextClassification.DatasetMeta = {
16 |     type: DataCook.Dataset.Types.DatasetType.Table,
17 |     size: {
18 |       train: opts.train ? opts.train.length : 0,
19 |       test: opts.test ? opts.test.length : 0,
20 |       valid: opts.valid ? opts.valid.length : 0,
21 |       predicted: opts.predicted ? opts.predicted.length : 0
22 |     },
23 |     categories: categories.size > 0 ? Array.from(categories) : undefined
24 |   };
25 |   return ArrayDatasetPoolImpl.from({
26 |     train: opts.train ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.train) : undefined,
27 |     test: opts.test ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.test) : undefined,
28 |     valid: opts.valid ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.valid) : undefined,
29 |     predicted: opts.predicted ? DataCook.Dataset.makeTextClassificationDatasetFromList(opts.predicted) : undefined
30 |   }, meta);
31 | };
32 | 
33 | 
34 | export const makeTextClassificationDataset = (
35 |   datasetData: Types.DatasetData<Sample>,
36 |   meta: DatasetMate
37 | ): Types.TextClassification.DatasetPool => {
38 |   return ArrayDatasetPoolImpl.from(datasetData, meta);
39 | };
40 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/format/coco.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { DatasetSize, DatasetMeta as BaseDatasetMeta } from '../';
 3 | 
 4 | import DatasetType = DataCook.Dataset.Types.DatasetType;
 5 | import Category = DataCook.Dataset.Types.Coco.Category;
 6 | import Info = DataCook.Dataset.Types.Coco.Info;
 7 | import License = DataCook.Dataset.Types.Coco.License;
 8 | 
 9 | export interface DatasetMeta extends BaseDatasetMeta {
10 |   type: DatasetType.Image,
11 |   size: DatasetSize,
12 |   categories?: Array<Category>,
13 |   info?: Info;
14 |   licenses?: Array<License>;
15 | }
16 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/format/csv.ts:
--------------------------------------------------------------------------------
1 | import * as DataCook from '@pipcook/datacook';
2 | import { DatasetMeta as BaseDatasetMeta } from '../';
3 | 
4 | import DatasetType = DataCook.Dataset.Types.DatasetType;
5 | 
6 | export interface DatasetMeta extends BaseDatasetMeta {
7 |   type: DatasetType.Table
8 | }
9 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/format/index.ts:
--------------------------------------------------------------------------------
1 | export * as Coco from './coco';
2 | export * as Csv from './csv';
3 | export * as PascalVoc from './pascal-voc';
4 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/format/pascal-voc.ts:
--------------------------------------------------------------------------------
1 | import * as DataCook from '@pipcook/datacook';
2 | import { ClassificationDatasetMeta } from '../';
3 | 
4 | import DatasetType = DataCook.Dataset.Types.DatasetType;
5 | 
6 | export interface DatasetMeta extends ClassificationDatasetMeta {
7 |   type: DatasetType.Image
8 | }
9 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/index.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | 
 3 | import Dataset = DataCook.Dataset.Types.Dataset;
 4 | import Sample = DataCook.Dataset.Types.Sample;
 5 | import DatasetType = DataCook.Dataset.Types.DatasetType;
 6 | import ImageDimension = DataCook.Dataset.Types.ImageDimension;
 7 | import TableSchema = DataCook.Dataset.Types.TableSchema;
 8 | 
 9 | export * from './format';
10 | export * from './pipeline-type';
11 | 
12 | /**
13 |  * size of data source
14 |  */
15 | export interface DatasetSize {
16 |   train: number;
17 |   test: number;
18 |   valid: number;
19 |   predicted: number;
20 | }
21 | 
22 | /**
23 |  * data source api
24 |  */
25 | export interface DatasetPool<T extends Sample, D extends DatasetMeta> {
26 |   getDatasetMeta: () => Promise<D | undefined>;
27 |   test?: Dataset<T>;
28 |   train?: Dataset<T>;
29 |   valid?: Dataset<T>;
30 |   predicted?: Dataset<T>;
31 |   shuffle: (seed?: string) => void;
32 |   transform<TARGET_SAMPLE extends Sample> (transformFun: (sample: T) => Promise<TARGET_SAMPLE>): DatasetPool<TARGET_SAMPLE, D>;
33 |   transform<TARGET_SAMPLE extends Sample, TARGET_META extends DatasetMeta = D> (opts: TransformOption<T, D, TARGET_SAMPLE, TARGET_META>): DatasetPool<TARGET_SAMPLE, TARGET_META>;
34 | }
35 | 
36 | export interface TransformOption<
37 |   IN_SAMPLE extends Sample,
38 |   IN_META extends DatasetMeta,
39 |   OUT_SAMPLE extends Sample = IN_SAMPLE,
40 |   OUT_META extends DatasetMeta = IN_META
41 | > {
42 |   transform: (sample: IN_SAMPLE) => Promise<OUT_SAMPLE>,
43 |   metadata: (meta?: IN_META) => Promise<OUT_META>
44 | }
45 | 
46 | export interface DatasetData<T extends Sample> {
47 |   trainData?: Array<T>,
48 |   testData?: Array<T>,
49 |   validData?: Array<T>,
50 |   predictedData?: Array<T>
51 | }
52 | 
53 | export interface DatasetMeta {
54 |   type: DatasetType;
55 |   size?: DatasetSize;
56 | }
57 | 
58 | export interface ClassificationDatasetMeta extends DatasetMeta {
59 |   categories?: Array<string>;
60 | }
61 | 
62 | export interface ObjectDetectionDatasetMeta extends DatasetMeta {
63 |   categories?: Array<any>;
64 | }
65 | 
66 | /**
67 |  * image data source metadata
68 |  */
69 | export interface ImageDatasetMeta extends DatasetMeta {
70 |   dimension: ImageDimension;
71 | }
72 | 
73 | /**
74 |  * table data source metadata
75 |  */
76 | export interface TableDatasetMeta extends DatasetMeta {
77 |   tableSchema: TableSchema;
78 |   dataKeys: Array<string> | null;
79 | }
80 | 
81 | export interface DatasetGroup<T extends Sample> {
82 |   train?: Dataset<T>,
83 |   test?: Dataset<T>,
84 |   valid?: Dataset<T>,
85 |   predicted?: Dataset<T>
86 | }
87 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/pipeline-type/image-classification.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { DatasetPool as BaseDatasetPool, ClassificationDatasetMeta } from '../';
 3 | 
 4 | import DatasetType = DataCook.Dataset.Types.DatasetType;
 5 | import BaseDataset = DataCook.Dataset.Types.Dataset;
 6 | 
 7 | export interface Options {
 8 |   train?: DataCook.Dataset.Types.ImageClassification.ImageList;
 9 |   test?: DataCook.Dataset.Types.ImageClassification.ImageList;
10 |   valid?: DataCook.Dataset.Types.ImageClassification.ImageList;
11 |   predicted?: DataCook.Dataset.Types.ImageClassification.ImageList;
12 | }
13 | 
14 | export type Sample = DataCook.Dataset.Types.ImageClassification.Sample;
15 | 
16 | export interface DatasetMeta extends ClassificationDatasetMeta {
17 |   type: DatasetType.Image;
18 | }
19 | 
20 | export type Dataset = BaseDataset<Sample>;
21 | 
22 | export type DatasetPool = BaseDatasetPool<Sample, DatasetMeta>;
23 | 
24 | export interface SinglePredictResult {
25 |   id: number;
26 |   category: string;
27 |   score: number;
28 | }
29 | 
30 | export type PredictResult = Array<SinglePredictResult>;
31 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/pipeline-type/index.ts:
--------------------------------------------------------------------------------
1 | export * as ObjectDetection from './object-detection';
2 | export * as ImageClassification from './image-classification';
3 | export * as TextClassification from './text-classification';
4 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/pipeline-type/object-detection.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { DatasetPool as BaseDatasetPool, ClassificationDatasetMeta } from '../';
 3 | 
 4 | import DatasetType = DataCook.Dataset.Types.DatasetType;
 5 | import BaseDataset = DataCook.Dataset.Types.Dataset;
 6 | 
 7 | export type Sample = DataCook.Dataset.Types.ObjectDetection.Sample;
 8 | 
 9 | export interface DatasetMeta extends ClassificationDatasetMeta {
10 |   type: DatasetType.Image
11 | }
12 | 
13 | export type Dataset = BaseDataset<Sample>;
14 | 
15 | export type DatasetPool = BaseDatasetPool<Sample, DatasetMeta>;
16 | 
17 | export interface PredictObject {
18 |   id: number;
19 |   category: string;
20 |   score: number;
21 |   box: DataCook.Dataset.Types.ObjectDetection.Bbox;
22 | }
23 | 
24 | export type SinglePredictResult = Array<PredictObject>;
25 | 
26 | export type PredictResult = Array<SinglePredictResult>;
27 | 


--------------------------------------------------------------------------------
/packages/core/src/dataset-pool/types/pipeline-type/text-classification.ts:
--------------------------------------------------------------------------------
 1 | import * as DataCook from '@pipcook/datacook';
 2 | import { DatasetPool as BaseDatasetPool, ClassificationDatasetMeta } from '..';
 3 | 
 4 | import DatasetType = DataCook.Dataset.Types.DatasetType;
 5 | import BaseDataset = DataCook.Dataset.Types.Dataset;
 6 | 
 7 | export interface Options {
 8 |   train?: DataCook.Dataset.Types.TextClassification.TextList;
 9 |   test?: DataCook.Dataset.Types.TextClassification.TextList;
10 |   valid?: DataCook.Dataset.Types.TextClassification.TextList;
11 |   predicted?: DataCook.Dataset.Types.TextClassification.TextList;
12 | }
13 | 
14 | export type Sample = DataCook.Dataset.Types.TextClassification.Sample;
15 | 
16 | export interface DatasetMeta extends ClassificationDatasetMeta {
17 |   type: DatasetType.Table;
18 | }
19 | 
20 | export type Dataset = BaseDataset<Sample>;
21 | 
22 | export type DatasetPool = BaseDatasetPool<Sample, DatasetMeta>;
23 | 
24 | export interface SinglePredictResult {
25 |   id: number;
26 |   category: string;
27 |   score: number;
28 | }
29 | 
30 | export type PredictResult = Array<SinglePredictResult>;
31 | 


--------------------------------------------------------------------------------
/packages/core/src/index.ts:
--------------------------------------------------------------------------------
1 | export * as DataCook from '@pipcook/datacook';
2 | export * from './runtime';
3 | export * from './artifact';
4 | export * as DatasetPool from './dataset-pool';
5 | 


--------------------------------------------------------------------------------
/packages/core/src/runtime.ts:
--------------------------------------------------------------------------------
  1 | import * as DataCook from '@pipcook/datacook';
  2 | import { Types } from './dataset-pool';
  3 | 
  4 | /**
  5 |  * The model script can emit the training progress through the API `Runtime.notifyProgress`.
  6 |  */
  7 | export interface ProgressInfo {
  8 |   /**
  9 |    * The training progress percentage, it should be [0, 100].
 10 |    */
 11 |   value: number;
 12 |   /**
 13 |    * Custom data.
 14 |    */
 15 |   extendData: Record<string, any>;
 16 | }
 17 | 
 18 | /**
 19 |  * A Runtime is used to run pipelines on a specific platform. The interface `Runtime<T, M>`
 20 |  * declares APIs which the runtime implementation must or shall achieve.
 21 |  */
 22 | export interface Runtime<T extends DataCook.Dataset.Types.Sample<any>, M extends Types.DatasetMeta> {
 23 |   // report progress of pipeline
 24 |   notifyProgress: (progress: ProgressInfo) => void;
 25 |   // save the model file
 26 |   saveModel: (localPathOrStream: string | NodeJS.ReadableStream, filename?: string) => Promise<void>;
 27 |   // read model file
 28 |   readModel: () => Promise<string>;
 29 |   // datasource
 30 |   dataset: Types.DatasetPool<T, M>;
 31 | }
 32 | 
 33 | export type FrameworkModule = any;
 34 | 
 35 | /**
 36 |  * There ara 2 kinds of pipeline task type, `TaskType.TRAIN` means running for model training,
 37 |  * `TaskType.PREDICT` means running for predicting.
 38 |  */
 39 | export enum TaskType { TRAIN = 1, PREDICT = 2 }
 40 | 
 41 | /**
 42 |  * The context of script running.
 43 |  */
 44 | export interface ScriptContext {
 45 |   /**
 46 |    * The workspace for the pipeline. There are some directories to save temporary files.
 47 |    */
 48 |   workspace: {
 49 |     /**
 50 |      * Dataset directory, should save the dataset files here.
 51 |      */
 52 |     dataDir: string;
 53 |     /**
 54 |      * Cache directory, every sample passed to the model script will be cached into the cache directory,
 55 |      * so, the dataflow scripts will not be executed again after the fisrt epoch.
 56 |      */
 57 |     cacheDir: string;
 58 |     /**
 59 |      * The model file should be saved here.
 60 |      */
 61 |     modelDir: string;
 62 | 
 63 |     /**
 64 |      * framework directory
 65 |      */
 66 |     frameworkDir: string;
 67 |   },
 68 |   taskType: TaskType;
 69 | }
 70 | 
 71 | export type PredictResult = Types.ObjectDetection.PredictResult | Types.TextClassification.PredictResult | Types.ImageClassification.PredictResult | any;
 72 | 
 73 | /**
 74 |  * type of data source script entry
 75 |  */
 76 | export type DatasourceEntry<SAMPLE extends DataCook.Dataset.Types.Sample<any>, META extends Types.DatasetMeta> =
 77 |   (options: Record<string, any>, context: ScriptContext) => Promise<Types.DatasetPool<SAMPLE, META>>;
 78 | 
 79 | /**
 80 |  * type of data flow script entry
 81 |  */
 82 | export type DataflowEntry<
 83 |   IN extends DataCook.Dataset.Types.Sample<any>,
 84 |   IN_META extends Types.DatasetMeta,
 85 |   OUT extends DataCook.Dataset.Types.Sample<any> = IN,
 86 |   OUT_META extends Types.DatasetMeta = IN_META
 87 | > =
 88 |   (api: Types.DatasetPool<IN, IN_META>, options: Record<string, any>, context: ScriptContext) => Promise<Types.DatasetPool<OUT, OUT_META>>;
 89 | 
 90 | /**
 91 |  * type of model script entry for train
 92 |  */
 93 | export type ModelEntry<SAMPLE extends DataCook.Dataset.Types.Sample<any>, META extends Types.DatasetMeta> =
 94 |   (api: Runtime<SAMPLE, META>, options: Record<string, any>, context: ScriptContext) => Promise<void>;
 95 | 
 96 | /**
 97 |  * type of model script entry for predict
 98 |  */
 99 | export type PredictEntry<SAMPLE extends DataCook.Dataset.Types.Sample<any>, META extends Types.DatasetMeta> =
100 |   (api: Runtime<SAMPLE, META>, options: Record<string, any>, context: ScriptContext) => Promise<PredictResult>;
101 | 
102 | /**
103 |  * type of model script entry for train and predict
104 |  */
105 | export interface ExtModelEntry<SAMPLE extends DataCook.Dataset.Types.Sample<any>, META extends Types.DatasetMeta> {
106 |   train: ModelEntry<SAMPLE, META>;
107 |   predict: PredictEntry<SAMPLE, META>;
108 | }
109 | 


--------------------------------------------------------------------------------
/packages/core/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json.schemastore.org/tsconfig",
 3 |   "compilerOptions": {
 4 |     "outDir": "dist",
 5 |     "rootDir": "src",
 6 | 
 7 |     "emitDecoratorMetadata": true,
 8 |     "experimentalDecorators": true,
 9 |     "noImplicitAny": true,
10 |     "strictNullChecks": true,
11 |     "resolveJsonModule": true,
12 |     "skipLibCheck": true,
13 | 
14 |     "incremental": true,
15 | 
16 |     "lib": [ "es2018", "esnext.asynciterable" ],
17 |     "module": "commonjs",
18 |     "esModuleInterop": false,
19 |     "moduleResolution": "node",
20 |     "target": "es2017",
21 |     "sourceMap": true,
22 |     "declaration": true
23 |   },
24 |   "include": ["src"],
25 |   "exclude": [
26 |     "**/node_modules/**",
27 |     "**/*.test.ts",
28 |     "**/dist/**",
29 |     "**/*.test.ts",
30 |     "**/*.d.ts"
31 |   ]
32 | }
33 | 


--------------------------------------------------------------------------------
/packages/costa/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | .debug
4 | .tests
5 | 


--------------------------------------------------------------------------------
/packages/costa/.npmignore:
--------------------------------------------------------------------------------
1 | .*
2 | tsconfig.*
3 | src
4 | *.tgz
5 | benchmark
6 | coverage
7 | 


--------------------------------------------------------------------------------
/packages/costa/.nycrc:
--------------------------------------------------------------------------------
1 | {
2 |   "include": [
3 |     "src/**/*.ts"
4 |   ],
5 |   "all": true,
6 |   "instrument": true
7 | }
8 | 


--------------------------------------------------------------------------------
/packages/costa/benchmark/bootstrap.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | const { join } = require('path');
 4 | const { CostaRuntime } = require('../dist/src/runtime');
 5 | const { PluginRunnable } = require('../dist/src/runnable');
 6 | 
 7 | const costa = new CostaRuntime({
 8 |   installDir: join(__dirname, '../.tests/plugins'),
 9 |   datasetDir: join(__dirname, '../.tests/datasets'),
10 |   componentDir: join(__dirname, '../.tests/components'),
11 |   npmRegistryPrefix: 'https://registry.npmjs.com/'
12 | });
13 | const r = new PluginRunnable(costa);
14 | 
15 | (async () => {
16 |   await r.bootstrap({});
17 |   r.destroy();
18 | })();
19 | 


--------------------------------------------------------------------------------
/packages/costa/benchmark/makefile:
--------------------------------------------------------------------------------
1 | export DEBUG=costa*
2 | 
3 | bootstrap:
4 | 	time node ./bootstrap.js
5 | install:
6 | 	time node ./plugin-install.js
7 | 


--------------------------------------------------------------------------------
/packages/costa/benchmark/plugin-install.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | const { join } = require('path');
 4 | const { CostaRuntime } = require('../dist/src/runtime');
 5 | 
 6 | const costa = new CostaRuntime({
 7 |   installDir: join(__dirname, '../.tests/plugins'),
 8 |   datasetDir: join(__dirname, '../.tests/datasets'),
 9 |   componentDir: join(__dirname, '../.tests/components'),
10 |   npmRegistryPrefix: 'https://registry.npmjs.com/'
11 | });
12 | 
13 | (async () => {
14 |   const pkg = await costa.fetch('@pipcook/plugins-tensorflow-resnet-model-define');
15 |   await costa.install(pkg, process);
16 | })();
17 | 


--------------------------------------------------------------------------------
/packages/costa/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipcook/costa",
 3 |   "version": "2.1.4",
 4 |   "lockfileVersion": 1,
 5 |   "requires": true,
 6 |   "dependencies": {
 7 |     "arg": {
 8 |       "version": "4.1.3",
 9 |       "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
10 |       "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==",
11 |       "dev": true
12 |     },
13 |     "buffer-from": {
14 |       "version": "1.1.1",
15 |       "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
16 |       "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==",
17 |       "dev": true
18 |     },
19 |     "create-require": {
20 |       "version": "1.1.1",
21 |       "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
22 |       "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
23 |       "dev": true
24 |     },
25 |     "diff": {
26 |       "version": "4.0.2",
27 |       "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
28 |       "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
29 |       "dev": true
30 |     },
31 |     "make-error": {
32 |       "version": "1.3.6",
33 |       "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
34 |       "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
35 |       "dev": true
36 |     },
37 |     "source-map": {
38 |       "version": "0.6.1",
39 |       "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
40 |       "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
41 |       "dev": true
42 |     },
43 |     "source-map-support": {
44 |       "version": "0.5.19",
45 |       "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.19.tgz",
46 |       "integrity": "sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==",
47 |       "dev": true,
48 |       "requires": {
49 |         "buffer-from": "^1.0.0",
50 |         "source-map": "^0.6.0"
51 |       }
52 |     },
53 |     "ts-node": {
54 |       "version": "9.1.1",
55 |       "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-9.1.1.tgz",
56 |       "integrity": "sha512-hPlt7ZACERQGf03M253ytLY3dHbGNGrAq9qIHWUY9XHYl1z7wYngSr3OQ5xmui8o2AaxsONxIzjafLUiWBo1Fg==",
57 |       "dev": true,
58 |       "requires": {
59 |         "arg": "^4.1.0",
60 |         "create-require": "^1.1.0",
61 |         "diff": "^4.0.1",
62 |         "make-error": "^1.1.1",
63 |         "source-map-support": "^0.5.17",
64 |         "yn": "3.1.1"
65 |       }
66 |     },
67 |     "yn": {
68 |       "version": "3.1.1",
69 |       "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
70 |       "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
71 |       "dev": true
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/packages/costa/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipcook/costa",
 3 |   "version": "2.1.5",
 4 |   "description": "The Pipcook Script Runner",
 5 |   "main": "dist/index",
 6 |   "types": "dist/index",
 7 |   "files": [
 8 |     "dist"
 9 |   ],
10 |   "scripts": {
11 |     "test": "ava -v",
12 |     "cov": "nyc --reporter=text-summary npm run test",
13 |     "cov:report": "nyc report -r=lcov",
14 |     "build": "npm run clean && npm run compile",
15 |     "clean": "((rm -rf dist tsconfig.tsbuildinfo) || (rmdir /Q /S dist tsconfig.tsbuildinfo)) || echo 'nothing to clean'",
16 |     "compile": "tsc -b tsconfig.json"
17 |   },
18 |   "license": "Apache-2.0",
19 |   "dependencies": {
20 |     "@pipcook/core": "^2.1.4",
21 |     "debug": "^4.3.1"
22 |   },
23 |   "devDependencies": {
24 |     "@types/debug": "^4.1.5",
25 |     "@types/node": "^14.6.0",
26 |     "@types/sinon": "^9.0.11",
27 |     "ava": "^3.13.0",
28 |     "nyc": "^15.1.0",
29 |     "sinon": "^10.0.0",
30 |     "ts-node": "^9.1.1",
31 |     "typescript": "^4.3.5"
32 |   },
33 |   "publishConfig": {
34 |     "access": "public"
35 |   },
36 |   "keywords": [],
37 |   "ava": {
38 |     "extensions": [
39 |       "ts"
40 |     ],
41 |     "require": [
42 |       "ts-node/register"
43 |     ]
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/packages/costa/src/constans.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * The initialize file name.
3 |  */
4 | export const FrameworkIndexFile = 'index.js';
5 | 


--------------------------------------------------------------------------------
/packages/costa/src/types.ts:
--------------------------------------------------------------------------------
  1 | import { ParsedUrlQuery } from 'querystring';
  2 | 
  3 | /**
  4 |  * There are three types of Pipcook script: `DataSource`, `Dataflow`, `Model`.
  5 |  */
  6 | export enum ScriptType {
  7 |   /**
  8 |    * The `DataSource` script is to collect the original data,
  9 |    * and offers an API object for the following flow.
 10 |    */
 11 |   DataSource,
 12 |   /**
 13 |    * The `Dataflow` script processes the original data from `DataSource` script.
 14 |    */
 15 |   Dataflow,
 16 |   /**
 17 |    * The `Model` script is to access the data from `DataSource` and `Dataflow` scripts,
 18 |    * and to train the model optionally.
 19 |    */
 20 |   Model
 21 | }
 22 | 
 23 | /**
 24 |  * The Pipcook script structure. It describes a script and tells `Costa` how to run.
 25 |  */
 26 | export interface PipcookScript {
 27 |   /**
 28 |    * The script name.
 29 |    */
 30 |   name: string;
 31 |   /**
 32 |    * The script path in the file system.
 33 |    */
 34 |   path: string;
 35 |   /**
 36 |    * Script type.
 37 |    */
 38 |   type: ScriptType;
 39 |   /**
 40 |    * The script query data.
 41 |    */
 42 |   query: ParsedUrlQuery;
 43 | }
 44 | 
 45 | /**
 46 |  * Type of package in the framework.
 47 |  */
 48 | export enum PackageType {
 49 |   /**
 50 |    * Python package which can be used through `boa.import`.
 51 |    */
 52 |   Python = 'python',
 53 |   /**
 54 |    * JS module which can be used through `import`.
 55 |    */
 56 |   JS = 'js'
 57 | }
 58 | 
 59 | /**
 60 |  * package structure in pipcook framework
 61 |  */
 62 | /**
 63 |  * pipcook framework description struct
 64 |  */
 65 | export interface PipcookFramework {
 66 |   /**
 67 |    * The location of the framework.
 68 |    */
 69 |   path: string;
 70 |   /**
 71 |    * The framework name.
 72 |    */
 73 |   name: string;
 74 |   /**
 75 |    * Description of the framework.
 76 |    */
 77 |   desc: string | null;
 78 |   /**
 79 |    * Framework version.
 80 |    */
 81 |   version: string;
 82 |   /**
 83 |    * Arch of current machine, should be one of 'x86', 'x64'.
 84 |    */
 85 |   arch: string | null;
 86 |   /**
 87 |    * OS types, it should be one of 'darwin', 'win32', 'linux'.
 88 |    */
 89 |   platform: string | null;
 90 |   /**
 91 |    * Node version, it should be a semver string: see https://www.npmjs.com/package/semver.
 92 |    */
 93 |   nodeVersion: string | null;
 94 |   /**
 95 |    * The n-api version that the framework depends.
 96 |    */
 97 |   napiVersion: number | null;
 98 |   /**
 99 |    * Python runtime version, the python packages run on boa.
100 |    */
101 |   pythonVersion: string | null;
102 |   /**
103 |    * Python site-packages relative path in the directory, 'site-packages' by default.
104 |    */
105 |   pythonPackagePath: string | null;
106 |   /**
107 |    * The node modules relative path in the directory, 'node_modules' by default.
108 |    */
109 |   jsPackagePath: string | null;
110 | }
111 | 
112 | /**
113 |  * the struct which defines the scripts in a pipeline,
114 |  *   datasource: the script use to fetch the data
115 |  *   dataflow: some scripts use to process the data,
116 |  *   model: define, train and evaluate model
117 |  */
118 | export interface ScriptConfig {
119 |   datasource: PipcookScript | null;
120 |   dataflow: Array<PipcookScript> | null;
121 |   model: PipcookScript;
122 | }
123 | 
124 | /**
125 |  * Artifact configuration, `processor` is the name and version of the artifact plugin,
126 |  * like `pipcook-ali-oss-uploader@0.0.1`. The others are the options which will be
127 |  * passed into the plugin.
128 |  */
129 | export interface Artifact {
130 |   processor: string;
131 |   [k: string]: any;
132 | }
133 | 
134 | /**
135 |  * Pipeline type
136 |  */
137 | export enum PipelineType {
138 |   ObjectDetection = 'ObjectDetection',
139 |   TextClassification = 'TextClassification',
140 |   ImageClassification = 'ImageClassification'
141 | }
142 | 
143 | /**
144 |  * pipeline configuration stucture
145 |  */
146 | export interface PipelineMeta {
147 |   /**
148 |    * pipeline version, '2.0' by default
149 |    */
150 |   specVersion: string;
151 |   /**
152 |    * data source script url or sql
153 |    */
154 |   datasource: string;
155 |   /**
156 |    * data process script, set to null if not used
157 |    */
158 |   dataflow: Array<string> | null;
159 |   /**
160 |    * model script url
161 |    */
162 |   model: string;
163 |   /**
164 |    * artifact plugins and their options
165 |    */
166 |   artifact: Array<Artifact>;
167 |   /**
168 |    * pipeline options
169 |    */
170 |   options: Record<string, any>;
171 |   /**
172 |    * Pipeline type which cloud be one of `PipelineType` or null.
173 |    * It should be specified if predict is needed.
174 |    */
175 |   type?: PipelineType;
176 | }
177 | 


--------------------------------------------------------------------------------
/packages/costa/src/utils.ts:
--------------------------------------------------------------------------------
 1 | import { FrameworkModule } from '@pipcook/core';
 2 | 
 3 | /**
 4 |  * import from path
 5 |  * @param path the path where import from
 6 |  */
 7 | export const importFrom = (path: string): Promise<FrameworkModule> => {
 8 |   return import(path);
 9 | };
10 | 


--------------------------------------------------------------------------------
/packages/costa/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json.schemastore.org/tsconfig",
 3 |   "compilerOptions": {
 4 |     "outDir": "dist",
 5 |     "rootDir": "src",
 6 | 
 7 |     "emitDecoratorMetadata": true,
 8 |     "experimentalDecorators": true,
 9 |     "noImplicitAny": true,
10 |     "strictNullChecks": true,
11 |     "resolveJsonModule": true,
12 |     "skipLibCheck": true,
13 | 
14 |     "incremental": true,
15 | 
16 |     "lib": ["es2018", "esnext.asynciterable"],
17 |     "module": "commonjs",
18 |     "esModuleInterop": false,
19 |     "moduleResolution": "node",
20 |     "target": "es2017",
21 |     "sourceMap": true,
22 |     "declaration": true
23 |   },
24 |   "include": ["src"],
25 |   "exclude": [
26 |     "**/node_modules/**",
27 |     "**/dist/**",
28 |     "**/*.d.ts",
29 |     "**/*.test.ts"
30 |   ]
31 | }
32 | 


--------------------------------------------------------------------------------
/test/pipelines/image-classification-mobilenet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "ImageClassification",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/image-classification-test.zip",
 5 |   "dataflow": [
 6 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224"
 7 |   ],
 8 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js",
 9 |   "artifact": [{
10 |     "processor": "pipcook-artifact-zip@0.0.2",
11 |     "target": "./image-classification-mobilenet-model.zip"
12 |   }],
13 |   "options": {
14 |     "framework": "tfjs@3.8",
15 |     "train": {
16 |       "epochs": 1,
17 |       "validationRequired": true,
18 |       "modelUrl": "https://pc-github.oss-us-west-1.aliyuncs.com/model/mobilenet_tfjs/web_model/model.json"
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/test/pipelines/image-classification-resnet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "ImageClassification",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/image-classification-test.zip",
 5 |   "dataflow": [
 6 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/dataflow.js?size=224&size=224"
 7 |   ],
 8 |   "artifact": [{
 9 |     "processor": "pipcook-artifact-zip@0.0.2",
10 |     "target": "./image-classification-resnet-model.zip"
11 |   }],
12 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/image-classification/build/model.js",
13 |   "options": {
14 |     "framework": "tfjs@3.8",
15 |     "train": {
16 |       "epochs": 1,
17 |       "validationRequired": true,
18 |       "modelUrl": "https://pc-github.oss-us-west-1.aliyuncs.com/model/resnet50_tfjs/model.json"
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/test/pipelines/object-detection-yolo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "ObjectDetection",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/datasource.js?url=https://pc-github.oss-us-west-1.aliyuncs.com/dataset/object-detection-yolo-min.zip",
 5 |   "dataflow": [
 6 |     "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/dataflow.js?size=416&size=416"
 7 |   ],
 8 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/object-detection-yolo/build/model.js",
 9 |   "artifact": [{
10 |     "processor": "pipcook-artifact-zip@0.0.2",
11 |     "target": "./object-detection-yolo-model.zip"
12 |   }],
13 |   "options": {
14 |     "framework": "tfjs@3.8",
15 |     "train": {
16 |       "epochs": 1,
17 |       "batchSize": 1,
18 |       "validationRequired": true
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/test/pipelines/text-classification-bayes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "specVersion": "2.0",
 3 |   "type": "TextClassification",
 4 |   "datasource": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/datasource.js?url=http://pc-github.oss-us-west-1.aliyuncs.com/dataset/text-classification.zip",
 5 |   "model": "https://cdn.jsdelivr.net/gh/imgcook/pipcook-script@9d210de/scripts/text-classification-bayes/build/model.js",
 6 |   "artifact": [{
 7 |     "processor": "pipcook-artifact-zip@0.0.2",
 8 |     "target": "./text-classification-bayes-model.zip"
 9 |   }],
10 |   "options": {
11 |     "framework": "tfjs@3.8-nlp",
12 |     "backend": "@tensorflow/tfjs-backend-cpu"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/tools/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # get flag argument
 4 | for i in $@
 5 | do
 6 | case $i in 
 7 |   -u=*|--upload=*)
 8 |   UPLOAD=${i#*=}
 9 |   shift
10 |   ;;
11 | esac
12 | done
13 | 
14 | t1=$(date +%s)
15 | npm install
16 | t2=$(date +%s)
17 | install_time=$((t2-t1))
18 | 
19 | t1=$(date +%s)
20 | npm run build
21 | t2=$(date +%s)
22 | build_time=$((t2-t1))
23 | 
24 | t1=$(date +%s)
25 | npm run test
26 | t2=$(date +%s)
27 | test_time=$((t2-t1))
28 | 
29 | t1=$(date +%s)
30 | ./packages/cli/dist/bin/pipcook train ./example/pipelines/image-classification-mobilenet.json
31 | t2=$(date +%s)
32 | mobilenet_time=$((t2-t1))
33 | 
34 | if [ -z ${UPLOAD} ]
35 | then
36 |   echo "{\"install_time\":${install_time}, \"build_time\":$build_time, \"test_time\":$test_time, \
37 |   \"mobilenet_time\":$mobilenet_time, \"timestamp\": $time_stamp }" | jq
38 | else 
39 |   git clone https://github.com/imgcook/pipcook-benchmark.git
40 |   cd pipcook-benchmark
41 | 
42 |   echo $(cat data.json | 
43 |           jq --arg install_time $install_time \
44 |             --arg build_time $build_time \
45 |             --arg test_time $test_time \
46 |             --arg time_stamp $(date +%s) \
47 |             --arg mobilenet_time $mobilenet_time \
48 |             ". + [{commitId: $CIRCLE_SHA1, install_time:$install_time, build_time:$build_time, test_time:$test_time, \
49 |                   mobilenet_time:$mobilenet_time, timestamp: $time_stamp }]")  > data.json
50 | 
51 |   git config user.email ${EMAIL}
52 |   git config user.name ${USERNAME}
53 |   git add data.json
54 |   git commit --allow-empty  -am"update data"
55 |   git push -q https://${TOKEN}@github.com/imgcook/pipcook-benchmark.git
56 | fi
57 | 


--------------------------------------------------------------------------------
/tools/coverage.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # clear old data
4 | rm -rf .nyc_output coverage && \
5 | # generate coverage file
6 | npx lerna run cov && \
7 | # merge coverage directory into package root
8 | find packages -type d -name .nyc_output -exec cp -r {} ./ \;
9 | 


--------------------------------------------------------------------------------
/tools/mkdoc.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | function mkdoc() {
 3 |   npx typedoc \
 4 |     --name "$3" \
 5 |     --inputFiles "packages/$2/src" \
 6 |     --out $1 \
 7 |     --theme default \
 8 |     --tsconfig "packages/$2/tsconfig.json" \
 9 |     --readme none \
10 |     --mode file
11 | }
12 | 
13 | mkdoc docs/typedoc/script core "Pipcook Interfaces"
14 | mkdoc docs/typedoc/runtime costa "Costa Interfaces"
15 | 


--------------------------------------------------------------------------------
/tools/run_pipeline.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | ./packages/cli/dist/bin/pipcook run "./example/pipelines/$1.json"
4 | 
5 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "resolveJsonModule": true,
 4 |     "composite": true,
 5 |     "outDir": "./dist",
 6 |     "rootDir": ".",
 7 |     "declaration": true,
 8 |     "noImplicitAny": true,
 9 |     "incremental": true,
10 |     "module": "commonjs",
11 |     "target": "es6",
12 |     "lib": [ "es6" ],
13 |     "skipLibCheck": true,
14 |     "sourceMap": true,
15 |     "esModuleInterop": false
16 |   },
17 |   "exclude": [
18 |     "node_modules",
19 |     "dist"
20 |   ]
21 | }


--------------------------------------------------------------------------------