├── .c8rc.json
├── .github
    ├── dependabot.yml
    ├── release-drafter.yml
    └── workflows
    │   ├── ci.yml
    │   ├── codeql.yml
    │   ├── post-dependabot-prs.yml
    │   ├── release-drafter.yml
    │   └── update-license.yml
├── .gitignore
├── .mocharc.json
├── .yarnrc.yml
├── LICENSE.txt
├── README.md
├── biome.jsonc
├── lib
    ├── AbstractTokenizer.ts
    ├── BufferTokenizer.ts
    ├── FileTokenizer.ts
    ├── ReadStreamTokenizer.ts
    ├── core.ts
    ├── index.ts
    ├── tsconfig.json
    └── types.ts
├── package.json
├── test
    ├── resources
    │   ├── id3v1.mp3
    │   ├── test1.dat
    │   ├── test2.dat
    │   └── test3.dat
    ├── test.ts
    ├── tsconfig.json
    └── util.ts
├── tsconfig.json
└── yarn.lock


/.c8rc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "reporter": [
3 |     "lcov",
4 |     "text"
5 |   ],
6 |   "include": ["lib/**"],
7 |   "exclude": [".yarn/**"]
8 | }
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | 
 4 |   # ECMAScript Module (ESM)
 5 |   - package-ecosystem: npm
 6 |     directory: "/"
 7 |     schedule:
 8 |       interval: weekly
 9 |       time: "06:00"
10 |     open-pull-requests-limit: 30
11 |     versioning-strategy: increase
12 |     target-branch: "master"
13 |     labels:
14 |       - dependencies
15 |     groups:
16 |       remark:
17 |         dependency-type: "development"
18 |         patterns:
19 |           - "remark*"
20 |       types:
21 |         dependency-type: "development"
22 |         patterns:
23 |           - "@types/*"
24 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | # Release Drafter template
 2 | # Ref: https://github.com/marketplace/actions/release-drafter
 3 | 
 4 | name-template: 'v$RESOLVED_VERSION'
 5 | tag-template: 'v$RESOLVED_VERSION'
 6 | categories:
 7 |   - title: 💥 API Changes
 8 |     labels:
 9 |       - API change
10 |   - title: 🚀 Enhancements
11 |     labels:
12 |       - enhancement
13 |   - title: 🎨 Improvements
14 |     labels:
15 |       - improvement
16 |   - title: 🐛 Bug Fixes
17 |     labels:
18 |       - bug
19 |   - title: 🔧 Under the hood
20 |     labels:
21 |       - debt
22 |   - title: ⬆️ Dependencies
23 |     labels:
24 |       - dependencies
25 |   - title: 📝 Documentation
26 |     labels:
27 |       - documentation
28 | exclude-labels:
29 |   - 'DevOps'
30 |   - dev-dependencies
31 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
32 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
33 | version-resolver:
34 |   major:
35 |     labels:
36 |       - 'major'
37 |   minor:
38 |     labels:
39 |       - 'minor'
40 |   patch:
41 |     labels:
42 |       - 'patch'
43 |   default: patch
44 | template: |
45 |   ## Changes
46 | 
47 |   $CHANGES
48 | 
49 |   ## 📦 NPM release
50 |   NPM release: [$REPOSITORY@$RESOLVED_VERSION](https://www.npmjs.com/package/$REPOSITORY/v/$RESOLVED_VERSION)
51 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | on:
  3 |   pull_request:
  4 |     branches: [ "master" ]
  5 |   push:
  6 | 
  7 | jobs:
  8 | 
  9 |   build:
 10 |     name: "Build module"
 11 |     runs-on: ubuntu-latest
 12 | 
 13 |     steps:
 14 | 
 15 |       - uses: actions/checkout@v4
 16 | 
 17 |       - uses: actions/setup-node@v4
 18 |         with:
 19 |           node-version: 20.x
 20 | 
 21 |       - name: Enable Corepack
 22 |         run: corepack enable
 23 | 
 24 |       - name: Install dependencies
 25 |         run: yarn install
 26 | 
 27 |       - name: Lint TypeScript
 28 |         run: yarn run lint-ts
 29 | 
 30 |       - name: Lint Markdown
 31 |         run: yarn run lint-md
 32 | 
 33 |       - name: Build
 34 |         run: yarn run build
 35 | 
 36 |       - name: Upload build
 37 |         uses: actions/upload-artifact@v4
 38 |         with:
 39 |           name: build
 40 |           path: |
 41 |             lib/**/*.js
 42 |             lib/**/*.js.map
 43 |             lib/**/*.d.ts
 44 |             test/**/*.js
 45 |             test/**/*.js.map
 46 | 
 47 |   test-nodejs:
 48 |     name: "Test with Node.js (V8)"
 49 |     runs-on: ubuntu-latest
 50 |     needs: build
 51 | 
 52 |     env:
 53 |       YARN_IGNORE_NODE: 1
 54 | 
 55 |     strategy:
 56 |       matrix:
 57 |         node-version: [18.x, 20.x, 22.x]
 58 | 
 59 |     steps:
 60 | 
 61 |       - name: 'Checkout the repository'
 62 |         uses: actions/checkout@v4
 63 | 
 64 |       - name: Setup Node.js ${{ matrix.node-version }}
 65 |         uses: actions/setup-node@v4
 66 |         with:
 67 |           node-version: ${{ matrix.node-version }}
 68 | 
 69 |       - name: Enable Corepack
 70 |         run: corepack enable
 71 | 
 72 |       - name: Install dependencies
 73 |         run: yarn install
 74 | 
 75 |       - name: Download build
 76 |         uses: actions/download-artifact@v4
 77 |         with:
 78 |           name: build
 79 | 
 80 |       - name: Test with Node.js ${{ matrix.node-version }}
 81 |         run: yarn run test-coverage
 82 | 
 83 |       - name: Coveralls Parallel
 84 |         uses: coverallsapp/github-action@v2
 85 |         with:
 86 |           github-token: ${{ secrets.github_token }}
 87 |           flag-name: run-node-${{ matrix.test_number }}
 88 |           parallel: true
 89 | 
 90 |   test-bun:
 91 |     name: "Test with Bun (JavaScriptCore)"
 92 |     runs-on: ubuntu-latest
 93 |     needs: build
 94 | 
 95 |     strategy:
 96 |       matrix:
 97 |         bun-version: [1.2]
 98 | 
 99 |     env:
100 |       YARN_IGNORE_NODE: 1
101 | 
102 |     steps:
103 | 
104 |       - name: 'Checkout the repository'
105 |         uses: actions/checkout@v4
106 | 
107 |       - name: Setup Bun ${{ matrix.bun-version }}
108 |         uses: oven-sh/setup-bun@v2
109 |         with:
110 |           bun-version: ${{ matrix.bun-version }}
111 | 
112 |       - name: Enable Corepack
113 |         run: corepack enable
114 | 
115 |       - name: Install dependencies
116 |         run: yarn install
117 | 
118 |       - name: Download build
119 |         uses: actions/download-artifact@v4
120 |         with:
121 |           name: build
122 | 
123 |       - name: Unit tests with Bun ${{ matrix.bun-version }}
124 |         run: bun run bun:test
125 | 
126 |   finish:
127 |     needs:
128 |       - test-nodejs
129 |       - test-bun
130 |     runs-on: ubuntu-latest
131 |     steps:
132 |       - name: Coveralls Finished
133 |         uses: coverallsapp/github-action@v2
134 |         with:
135 |           github-token: ${{ secrets.github_token }}
136 |           parallel-finished: true
137 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 |   schedule:
 9 |     - cron: "32 12 * * 6"
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       actions: read
17 |       contents: read
18 |       security-events: write
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: [ javascript ]
24 | 
25 |     steps:
26 |       - name: Checkout
27 |         uses: actions/checkout@v4
28 | 
29 |       - name: Initialize CodeQL
30 |         uses: github/codeql-action/init@v3
31 |         with:
32 |           languages: ${{ matrix.language }}
33 |           queries: +security-and-quality
34 | 
35 |       - name: Autobuild
36 |         uses: github/codeql-action/autobuild@v3
37 | 
38 |       - name: Perform CodeQL Analysis
39 |         uses: github/codeql-action/analyze@v3
40 |         with:
41 |           category: "/language:${{ matrix.language }}"
42 | 


--------------------------------------------------------------------------------
/.github/workflows/post-dependabot-prs.yml:
--------------------------------------------------------------------------------
 1 | name: Dependabot Pull Request
 2 | on: pull_request_target
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' }}
 7 |     steps:
 8 |       - name: Fetch Dependabot metadata
 9 |         id: dependabot-metadata
10 |         uses: dependabot/fetch-metadata@v2
11 |         with:
12 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
13 |       - name: Add dev-dependencies label
14 |         uses: actions-ecosystem/action-add-labels@v1
15 |         if: ${{ steps.dependabot-metadata.outputs.dependency-type == 'direct:development' }}
16 |         with:
17 |           labels: dev-dependencies
18 |       - name: Remove dependencies label
19 |         uses: actions-ecosystem/action-remove-labels@v1
20 |         if: ${{ steps.dependabot-metadata.outputs.dependency-type == 'direct:development' }}
21 |         with:
22 |           labels: dependencies
23 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     types: [opened, reopened, synchronize]
 9 | 
10 | permissions:
11 |   contents: read
12 | 
13 | jobs:
14 |   update_release_draft:
15 |     permissions:
16 |       contents: write
17 |       pull-requests: write
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: release-drafter/release-drafter@v6
21 |         env:
22 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/update-license.yml:
--------------------------------------------------------------------------------
 1 | name: Update License Year
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 0 1 1 *"  # Runs on January 1st every year
 6 |   workflow_dispatch:  # Allows manual triggering
 7 | 
 8 | jobs:
 9 |   update-license:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout repository
14 |         uses: actions/checkout@v3
15 |         with:
16 |           token: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |       - name: Update LICENSE year
19 |         run: |
20 |           CURRENT_YEAR=$(date +"%Y")
21 |           sed -E -i "s/(Copyright © )[0-9]{4}/\1$CURRENT_YEAR/" LICENSE.txt
22 | 
23 |       - name: Commit and push changes
24 |         run: |
25 |           CURRENT_YEAR=$(date +"%Y")
26 |           git config --global user.name "Borewit"
27 |           git config --global user.email "Borewit@users.noreply.github.com"
28 |           git diff --quiet LICENSE.txt || (git add LICENSE.txt && git commit -m "Update license year to $CURRENT_YEAR" && git push)
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Node module
 2 | node_modules
 3 | 
 4 | #IntelliJ IDEA:
 5 | .idea
 6 | *.iml
 7 | 
 8 | # Yarn:
 9 | .pnp.*
10 | .yarn/*
11 | 
12 | # Project
13 | lib/**/*.js
14 | lib/**/*.js.map
15 | lib/**/*.d.ts
16 | test/**/*.js
17 | test/**/*.js.map
18 | test/**/*.d.ts
19 | test/resources/tmp.dat
20 | coverage
21 | 


--------------------------------------------------------------------------------
/.mocharc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extension": ["ts", "tsx"],
3 |   "watch-files": ["lib/**/*.ts", "test/**/*.ts"],
4 |   "spec": ["test/*.ts"],
5 |   "loader": ["ts-node/esm"],
6 |   "extensions": ["ts", "tsx"]
7 | }
8 | 


--------------------------------------------------------------------------------
/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | nodeLinker: node-modules
2 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright © 2025 Borewit
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Node.js CI](https://github.com/Borewit/strtok3/actions/workflows/ci.yml/badge.svg)](https://github.com/Borewit/strtok3/actions/workflows/ci.yml)
  2 | [![CodeQL](https://github.com/Borewit/strtok3/actions/workflows/codeql.yml/badge.svg?branch=master)](https://github.com/Borewit/strtok3/actions/workflows/codeql.yml)
  3 | [![NPM version](https://badge.fury.io/js/strtok3.svg)](https://npmjs.org/package/strtok3)
  4 | [![npm downloads](http://img.shields.io/npm/dm/strtok3.svg)](https://npmcharts.com/compare/strtok3,token-types?start=1200&interval=30)
  5 | [![DeepScan grade](https://deepscan.io/api/teams/5165/projects/8526/branches/103329/badge/grade.svg)](https://deepscan.io/dashboard#view=project&tid=5165&pid=8526&bid=103329)
  6 | [![Known Vulnerabilities](https://snyk.io/test/github/Borewit/strtok3/badge.svg?targetFile=package.json)](https://snyk.io/test/github/Borewit/strtok3?targetFile=package.json)
  7 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/59dd6795e61949fb97066ca52e6097ef)](https://www.codacy.com/app/Borewit/strtok3?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=Borewit/strtok3&amp;utm_campaign=Badge_Grade)
  8 | # strtok3
  9 | 
 10 | A promise based streaming [*tokenizer*](#tokenizer-object) for [Node.js](http://nodejs.org) and browsers.
 11 | 
 12 | The `strtok3` module provides several methods for creating a [*tokenizer*](#tokenizer-object) from various input sources. 
 13 | Designed for:
 14 | * Seamless support in streaming environments.
 15 | * Efficiently decode binary data, strings, and numbers.
 16 | * Reading [predefined](https://github.com/Borewit/token-types) or custom tokens.
 17 | * Offering [*tokenizers*](#tokenizer-object) for reading from [files](#method-strtok3fromfile), [streams](#fromstream-function) or [Uint8Arrays](#frombuffer-function).
 18 | 
 19 | ### Features
 20 | `strtok3` can read from:
 21 | * Files, using a file path as input.
 22 | * Node.js [streams](https://nodejs.org/api/stream.html).
 23 | * [Buffer](https://nodejs.org/api/buffer.html) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array).
 24 | * HTTP chunked transfer provided by [@tokenizer/http](https://github.com/Borewit/tokenizer-http).
 25 | * [Amazon S3](https://aws.amazon.com/s3) chunks with [@tokenizer/s3](https://github.com/Borewit/tokenizer-s3).
 26 | 
 27 | ## Installation
 28 | 
 29 | ```sh
 30 | npm install strtok3
 31 | ```
 32 | 
 33 | ### Compatibility
 34 | 
 35 | Starting with version 7, the module has migrated from [CommonJS](https://en.wikipedia.org/wiki/CommonJS) to [pure ECMAScript Module (ESM)](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c).
 36 | The distributed JavaScript codebase is compliant with the [ECMAScript 2020 (11th Edition)](https://en.wikipedia.org/wiki/ECMAScript_version_history#11th_Edition_%E2%80%93_ECMAScript_2020) standard.
 37 | 
 38 | Requires a modern browser, Node.js (V8) ≥ 18 engine or Bun (JavaScriptCore) ≥ 1.2.
 39 | 
 40 | For TypeScript CommonJs backward compatibility, you can use [load-esm](https://github.com/Borewit/load-esm).
 41 | 
 42 | > [!NOTE]
 43 | > This module requires a [Node.js ≥ 16](https://nodejs.org/en/about/previous-releases) engine.
 44 | > It can also be used in a browser environment when bundled with a module bundler.
 45 | 
 46 | ## Support the Project
 47 | If you find this project useful and would like to support its development, consider sponsoring or contributing:
 48 | 
 49 | - [Become a sponsor to Borewit](https://github.com/sponsors/Borewit)
 50 | 
 51 | - Buy me a coffee:
 52 | 
 53 |   <a href="https://www.buymeacoffee.com/borewit" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy me A coffee" height="41" width="174"></a>
 54 | 
 55 | ## API Documentation
 56 | 
 57 | ### strtok3 methods
 58 | 
 59 | Use one of the methods to instantiate an [*abstract tokenizer*](#tokenizer-object):
 60 | - [fromFile](#fromfile-function)*
 61 | - [fromStream](#fromstream-function)*
 62 | - [fromWebStream](#fromwebstream-function)
 63 | - [fromBuffer](#frombuffer-function)
 64 | 
 65 | > **_NOTE:_**  * `fromFile` and `fromStream`  only available when importing this module with Node.js
 66 | 
 67 | All methods return a [`Tokenizer`](#tokenizer-object), either directly or via a promise.
 68 | 
 69 | #### `fromFile` function
 70 | 
 71 | Creates a [*tokenizer*](#tokenizer-object) from a local file.
 72 | 
 73 | ```ts
 74 | function fromFile(sourceFilePath: string): Promise<FileTokenizer>
 75 | ```  
 76 | 
 77 | | Parameter      | Type     | Description                |
 78 | |----------------|----------|----------------------------|
 79 | | sourceFilePath | `string` | Path to file to read from  |
 80 | 
 81 | > [!NOTE]
 82 | > - Only available for Node.js engines
 83 | > - `fromFile` automatically embeds [file-information](#file-information)
 84 | 
 85 | Returns, via a promise, a [*tokenizer*](#tokenizer-object) which can be used to parse a file.
 86 | 
 87 | ```js
 88 | import * as strtok3 from 'strtok3';
 89 | import * as Token from 'token-types';
 90 | 
 91 | (async () => {
 92 | 
 93 |   const tokenizer = await strtok3.fromFile("somefile.bin");
 94 |          try {
 95 |     const myNumber = await tokenizer.readToken(Token.UINT8);
 96 |     console.log(`My number: ${myNumber}`);
 97 |   } finally {
 98 |     tokenizer.close(); // Close the file
 99 |   }
100 | })();
101 | ```
102 | 
103 | #### `fromStream` function
104 | 
105 | Creates a [*tokenizer*](#tokenizer-object) from a Node.js [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
106 | 
107 | ```ts
108 | function fromStream(stream: Readable, options?: ITokenizerOptions): Promise<ReadStreamTokenizer>
109 | ```
110 | 
111 | | Parameter |  Optional | Type                                                                                 | Description              |
112 | |-----------|-----------|-------------------------|--------------------------|
113 | | stream    | no        | [Readable](https://nodejs.org/api/stream.html#stream_class_stream_readable)         | Stream to read from      |
114 | | fileInfo  | yes       | [IFileInfo](#IFileInfo) | Provide file information |
115 | 
116 | Returns a Promise providing a [*tokenizer*](#tokenizer-object).
117 | 
118 | > [!NOTE]
119 | > - Only available for Node.js engines
120 | 
121 | #### `fromWebStream` function
122 | 
123 | Creates [*tokenizer*](#tokenizer-object) from a [WHATWG ReadableStream](https://nodejs.org/api/webstreams.html#web-streams-api).
124 | 
125 | ```ts
126 | function fromWebStream(webStream: AnyWebByteStream, options?: ITokenizerOptions): ReadStreamTokenizer
127 | ```
128 | 
129 | | Parameter      |  Optional | Type                                                                     | Description                        |
130 | |----------------|-----------|--------------------------------------------------------------------------|------------------------------------|
131 | | readableStream | no        | [ReadableStream](https://nodejs.org/api/webstreams.html#web-streams-api) | WHATWG ReadableStream to read from |
132 | | fileInfo       | yes       | [IFileInfo](#IFileInfo)                                                  | Provide file information           |
133 | 
134 | Returns a Promise providing a [*tokenizer*](#tokenizer-object)
135 | 
136 | ```js
137 | import strtok3 from 'strtok3';
138 | import * as Token from 'token-types';
139 | 
140 | strtok3.fromWebStream(readableStream).then(tokenizer => {
141 |   return tokenizer.readToken(Token.UINT8).then(myUint8Number => {
142 |     console.log(`My number: ${myUint8Number}`);
143 |   });
144 | });
145 | ```
146 | 
147 | #### `fromBuffer()` function
148 | 
149 | Create a tokenizer from memory ([Uint8Array](https://nodejs.org/api/buffer.html)).
150 | 
151 | ```ts
152 | function fromBuffer(uint8Array: Uint8Array, options?: ITokenizerOptions): BufferTokenizer
153 | ```
154 | 
155 | | Parameter  | Optional | Type                                             | Description                            |
156 | |------------|----------|--------------------------------------------------|----------------------------------------|
157 | | uint8Array | no       | [Uint8Array](https://nodejs.org/api/buffer.html) | Uint8Array or Buffer to read from      |
158 | | fileInfo   | yes      | [IFileInfo](#IFileInfo)                          | Provide file information               |
159 | 
160 | Returns a Promise providing a [*tokenizer*](#tokenizer-object).
161 | 
162 | ```js
163 | import * as strtok3 from 'strtok3';
164 | 
165 | const tokenizer = strtok3.fromBuffer(buffer);
166 | 
167 | tokenizer.readToken(Token.UINT8).then(myUint8Number => {
168 |   console.log(`My number: ${myUint8Number}`);
169 | });
170 | ```
171 | 
172 | ### `Tokenizer` object
173 | The *tokenizer* is an abstraction of a [stream](https://nodejs.org/api/stream.html), file or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array), allowing _reading_ or _peeking_ from the stream.
174 | It can also be translated in chunked reads, as done in [@tokenizer/http](https://github.com/Borewit/tokenizer-http);
175 | 
176 | #### Key Features:
177 | 
178 | - Supports seeking within the stream using `tokenizer.ignore()`.
179 | - Offers `peek` methods to preview data without advancing the read pointer.
180 | - Maintains the read position via tokenizer.position.
181 | 
182 | #### Tokenizer functions
183 | 
184 | _Read_ methods advance the stream pointer, while _peek_ methods do not.
185 | 
186 | There are two kind of functions:
187 | 1. *read* methods: used to read a *token* of [Buffer](https://nodejs.org/api/buffer.html) from the [*tokenizer*](#tokenizer-object). The position of the *tokenizer-stream* will advance with the size of the token.
188 | 2. *peek* methods: same as the read, but it will *not* advance the pointer. It allows to read (peek) ahead.
189 | 
190 | #### `readBuffer` function
191 | 
192 | Read data from the _tokenizer_ into provided "buffer" (`Uint8Array`).
193 | `readBuffer(buffer, options?)`
194 | 
195 | ```ts
196 | readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number>;
197 | ```
198 | 
199 | | Parameter  | Type                                                           | Description                                                                                                                                                                                                                            |
200 | |------------|----------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
201 | | buffer     | [Buffer](https://nodejs.org/api/buffer.html) &#124; Uint8Array | Target buffer to write the data read to                                                                                                                                                                                                |
202 | | options    | [IReadChunkOptions](#ireadchunkoptions)                        | An integer specifying the number of bytes to read                                                                                                                                                                                      |
203 | 
204 | Return promise with number of bytes read.
205 | The number of bytes read maybe if less, *mayBeLess* flag was set.
206 | 
207 | #### `peekBuffer` function
208 | 
209 | Peek (read ahead), from [*tokenizer*](#tokenizer-object), into the buffer without advancing the stream pointer.
210 | 
211 | ```ts
212 | peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number>;
213 | ```
214 | 
215 | | Parameter  | Type                                    | Description                                         |
216 | |------------|-----------------------------------------|-----------------------------------------------------|
217 | | buffer     | Buffer &#124; Uint8Array                | Target buffer to write the data read (peeked) to.   |
218 | | options    | [IReadChunkOptions](#ireadchunkoptions) | An integer specifying the number of bytes to read.  |                                                                                                                           |
219 | 
220 | Return value `Promise<number>` Promise with number of bytes read. The number of bytes read maybe if less, *mayBeLess* flag was set.
221 | 
222 | #### `readToken` function
223 | 
224 | Read a *token* from the tokenizer-stream.
225 | 
226 | ```ts
227 | readToken<Value>(token: IGetToken<Value>, position: number = this.position): Promise<Value>
228 | ```  
229 | 
230 | | Parameter  | Type                    | Description                                                                                                           |
231 | |------------|-------------------------|---------------------------------------------------------------------------------------------------------------------- |
232 | | token      | [IGetToken](#IGetToken) | Token to read from the tokenizer-stream.                                                                              |
233 | | position?  | number                  | Offset where to begin reading within the file. If position is null, data will be read from the current file position. |
234 | 
235 | Return value `Promise<number>`. Promise with number of bytes read. The number of bytes read maybe if less, *mayBeLess* flag was set.
236 | 
237 | #### `peek` function
238 | 
239 | Peek a *token* from the [*tokenizer*](#tokenizer-object).
240 | 
241 | ```ts
242 | peekToken<Value>(token: IGetToken<Value>, position: number = this.position): Promise<Value>
243 | ```
244 | 
245 | | Parameter  | Type                       | Description                                                                                                             |
246 | |------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------|
247 | | token      | [IGetToken<T>](#IGetToken) | Token to read from the tokenizer-stream.                                                                                |
248 | | position?  | number                     | Offset where to begin reading within the file. If position is null, data will be read from the current file position.   |
249 | 
250 | Return a promise with the token value peeked from the [*tokenizer*](#tokenizer-object).
251 | 
252 | #### `readNumber` function
253 | 
254 | Peek a numeric [*token*](#token) from the [*tokenizer*](#tokenizer-object).
255 | 
256 | ```ts
257 | readNumber(token: IToken<number>): Promise<number>
258 | ```
259 | 
260 | | Parameter  | Type                            | Description                                        |
261 | |------------|---------------------------------|----------------------------------------------------|
262 | | token      | [IGetToken<number>](#IGetToken) | Numeric token to read from the tokenizer-stream.   |
263 | 
264 | Returns a promise with the decoded numeric value from the *tokenizer-stream*.
265 | 
266 | #### `ignore` function
267 | 
268 | Advance the offset pointer with the token number of bytes provided.
269 | 
270 | ```ts
271 | ignore(length: number): Promise<number>
272 | ```
273 | 
274 | | Parameter  | Type   | Description                                                          |
275 | |------------|--------|----------------------------------------------------------------------|
276 | | ignore     | number | Numeric of bytes to ignore. Will advance the `tokenizer.position`    |
277 | 
278 | Returns a promise with the decoded numeric value from the *tokenizer-stream*.
279 | 
280 | #### `close` function
281 | Clean up resources, such as closing a file pointer if applicable.
282 | 
283 | #### `Tokenizer` attributes
284 | 
285 | - `fileInfo`
286 | 
287 |   Optional attribute describing the file information, see [IFileInfo](#IFileInfo)
288 | 
289 | - `position`
290 | 
291 |   Pointer to the current position in the [*tokenizer*](#tokenizer-object) stream.
292 |   If a *position* is provided to a _read_ or _peek_ method, is should be, at least, equal or greater than this value.
293 | 
294 | ### `IReadChunkOptions` interface
295 | 
296 | Each attribute is optional:
297 | 
298 | | Attribute | Type    | Description                                                                                                                                                                                                                   |
299 | |-----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
300 | | length    | number  | Requested number of bytes to read.                                                                                                                                                                                            |
301 | | position  | number  | Position where to peek from the file. If position is null, data will be read from the [current file position](#attribute-tokenizerposition). Position may not be less then [tokenizer.position](#attribute-tokenizerposition) |
302 | | mayBeLess | boolean | If and only if set, will not throw an EOF error if less then the requested *mayBeLess* could be read.                                                                                                                         |
303 | 
304 | Example usage:
305 | ```js
306 |   tokenizer.peekBuffer(buffer, {mayBeLess: true});
307 | ```
308 | 
309 | ### `IFileInfo` interface
310 | 
311 | Provides optional metadata about the file being tokenized.
312 | 
313 | | Attribute | Type    | Description                                                                                       |
314 | |-----------|---------|---------------------------------------------------------------------------------------------------|
315 | | size      | number  | File size in bytes                                                                                |
316 | | mimeType  | number  | [MIME-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) of file. |
317 | | path      | number  | File path                                                                                         |
318 | | url       | boolean | File URL                                                                                          |
319 | 
320 | ### `Token` object
321 | 
322 | The *token* is basically a description what to read form the [*tokenizer-stream*](#tokenizer-object).
323 | A basic set of *token types* can be found here: [*token-types*](https://github.com/Borewit/token-types).
324 | 
325 | A token is something which implements the following interface:
326 | ```ts
327 | export interface IGetToken<T> {
328 | 
329 |   /**
330 |    * Length in bytes of encoded value
331 |    */
332 |   len: number;
333 | 
334 |   /**
335 |    * Decode value from buffer at offset
336 |    * @param buf Buffer to read the decoded value from
337 |    * @param off Decode offset
338 |    */
339 |   get(buf: Uint8Array, off: number): T;
340 | }
341 | ```
342 | The *tokenizer* reads `token.len` bytes from the *tokenizer-stream* into a Buffer.
343 | The `token.get` will be called with the Buffer. `token.get` is responsible for conversion from the buffer to the desired output type.
344 | 
345 | ### Working with Web-API readable stream
346 | To convert a [Web-API readable stream](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStreamDefaultReader) into a [Node.js readable stream]((https://nodejs.org/api/stream.html#stream_readable_streams)), you can use [readable-web-to-node-stream](https://github.com/Borewit/readable-web-to-node-stream) to convert one in another.
347 | 
348 | ```js
349 | import { fromWebStream } strtok3 from 'strtok3';
350 | import { ReadableWebToNodeStream } from 'readable-web-to-node-stream';
351 | 
352 | (async () => {
353 | 
354 |   const response = await fetch(url);
355 |   const readableWebStream = response.body; // Web-API readable stream
356 |   const webStream = new ReadableWebToNodeStream(readableWebStream); // convert to Node.js readable stream
357 | 
358 |   const tokenizer = fromWebStream(webStream); // And we now have tokenizer in a web environment
359 | })();
360 | ```
361 | 
362 | ## Dependencies
363 | 
364 | The diagram below illustrates the primary dependencies of `strtok3`:
365 | 
366 | ```mermaid
367 | graph TD;
368 |   S(strtok3)-->P(peek-readable)
369 |   S(strtok3)-->TO("@tokenizer/token")
370 | ```
371 |  
372 | - [peek-readable](https://github.com/Borewit/peek-readable): Manages reading operations with peeking capabilities, allowing data to be previewed without advancing the read pointer.
373 | - [@tokenizer/token](https://github.com/Borewit/tokenizer-token): Provides token definitions and utilities used by `strtok3` for interpreting binary data.
374 | 
375 | ## Licence
376 | 
377 | This project is licensed under the [MIT License](LICENSE.txt). Feel free to use, modify, and distribute as needed.
378 | 


--------------------------------------------------------------------------------
/biome.jsonc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"$schema": "https://biomejs.dev/schemas/1.9.3/schema.json",
 3 | 	"organizeImports": {
 4 | 		"enabled": false
 5 | 	},
 6 | 	"formatter": {
 7 | 		"enabled": false
 8 | 	},
 9 | 	"linter": {
10 | 		"enabled": true,
11 | 		"rules": {
12 | 			"correctness": {
13 | 				"noUnusedImports": "error",
14 | 				"noNodejsModules": "error"
15 | 			},
16 | 			"recommended": true,
17 | 			"complexity": {
18 | 				"noForEach": "off"
19 | 			},
20 | 			"suspicious": {
21 | 				"noEmptyBlockStatements": "error",
22 | 				"useErrorMessage": "error"
23 | 			},
24 | 			"nursery":{
25 | 				"noRestrictedImports": {
26 | 					"level": "error",
27 | 					"options": {
28 | 						"paths": {
29 | 							"node:buffer": "Use Uint8Array instead of Buffer"
30 | 						}
31 | 					}
32 | 				}},
33 | 			"style":{
34 | 				"useConsistentBuiltinInstantiation": "error",
35 | 				"useThrowNewError": "error",
36 | 				"useThrowOnlyError": "error"
37 | 			}
38 | 		}
39 | 	},
40 | 	"files": {
41 | 		"ignoreUnknown": true,
42 | 		"ignore": [
43 | 			"./coverage",
44 | 			"./yarn",
45 | 			"./.pnp.*",
46 | 			"./lib/**/*.d.ts",
47 | 			"./lib/**/*.js",
48 | 			"./test/**/*.d.ts",
49 | 			"./test/**/*.js"
50 | 		]
51 | 	},
52 | 	"overrides": [
53 | 		{
54 | 			"include": ["./test/**/*", "./lib/index.ts", "./lib/FileTokenizer.ts"],
55 | 			"linter": {
56 | 				"rules": {
57 | 					"correctness": {
58 | 						"noNodejsModules": "off"
59 | 					}
60 | 				}
61 | 			}
62 | 		}
63 | 	]
64 | }
65 | 


--------------------------------------------------------------------------------
/lib/AbstractTokenizer.ts:
--------------------------------------------------------------------------------
  1 | import type { ITokenizer, IFileInfo, IReadChunkOptions, OnClose, ITokenizerOptions } from './types.js';
  2 | import type { IGetToken, IToken } from '@tokenizer/token';
  3 | import { EndOfStreamError } from 'peek-readable';
  4 | 
  5 | interface INormalizedReadChunkOptions extends IReadChunkOptions {
  6 |   length: number;
  7 |   position: number;
  8 |   mayBeLess?: boolean;
  9 | }
 10 | 
 11 | /**
 12 |  * Core tokenizer
 13 |  */
 14 | export abstract class AbstractTokenizer implements ITokenizer {
 15 | 
 16 |   private onClose?: OnClose;
 17 |   private numBuffer = new Uint8Array(8);
 18 | 
 19 |   public abstract fileInfo: IFileInfo;
 20 | 
 21 |   /**
 22 |    * Tokenizer-stream position
 23 |    */
 24 |   public position = 0;
 25 | 
 26 | 
 27 |   /**
 28 |    * Constructor
 29 |    * @param options Tokenizer options
 30 |    * @protected
 31 |    */
 32 |   protected constructor(options?: ITokenizerOptions) {
 33 |     this.onClose = options?.onClose;
 34 |     if (options?.abortSignal) {
 35 |       options.abortSignal.addEventListener('abort', () => {
 36 |         this.abort();
 37 |       })
 38 |     }
 39 |   }
 40 | 
 41 |   abstract supportsRandomAccess(): boolean;
 42 | 
 43 |   /**
 44 |    * Read buffer from tokenizer
 45 |    * @param buffer - Target buffer to fill with data read from the tokenizer-stream
 46 |    * @param options - Additional read options
 47 |    * @returns Promise with number of bytes read
 48 |    */
 49 |   public abstract readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number>;
 50 | 
 51 |   /**
 52 |    * Peek (read ahead) buffer from tokenizer
 53 |    * @param uint8Array - Target buffer to fill with data peeked from the tokenizer-stream
 54 |    * @param options - Peek behaviour options
 55 |    * @returns Promise with number of bytes read
 56 |    */
 57 |   public abstract peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number>;
 58 | 
 59 |   /**
 60 |    * Read a token from the tokenizer-stream
 61 |    * @param token - The token to read
 62 |    * @param position - If provided, the desired position in the tokenizer-stream
 63 |    * @returns Promise with token data
 64 |    */
 65 |   public async readToken<Value>(token: IGetToken<Value>, position: number = this.position): Promise<Value> {
 66 |     const uint8Array = new Uint8Array(token.len);
 67 |     const len = await this.readBuffer(uint8Array, {position});
 68 |     if (len < token.len)
 69 |       throw new EndOfStreamError();
 70 |     return token.get(uint8Array, 0);
 71 |   }
 72 | 
 73 |   /**
 74 |    * Peek a token from the tokenizer-stream.
 75 |    * @param token - Token to peek from the tokenizer-stream.
 76 |    * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
 77 |    * @returns Promise with token data
 78 |    */
 79 |   public async peekToken<Value>(token: IGetToken<Value>, position: number = this.position): Promise<Value> {
 80 |     const uint8Array = new Uint8Array(token.len);
 81 |     const len = await this.peekBuffer(uint8Array, {position});
 82 |     if (len < token.len)
 83 |       throw new EndOfStreamError();
 84 |     return token.get(uint8Array, 0);
 85 |   }
 86 | 
 87 |   /**
 88 |    * Read a numeric token from the stream
 89 |    * @param token - Numeric token
 90 |    * @returns Promise with number
 91 |    */
 92 |   public async readNumber(token: IToken<number>): Promise<number> {
 93 |     const len = await this.readBuffer(this.numBuffer, {length: token.len});
 94 |     if (len < token.len)
 95 |       throw new EndOfStreamError();
 96 |     return token.get(this.numBuffer, 0);
 97 |   }
 98 | 
 99 |   /**
100 |    * Read a numeric token from the stream
101 |    * @param token - Numeric token
102 |    * @returns Promise with number
103 |    */
104 |   public async peekNumber(token: IToken<number>): Promise<number> {
105 |     const len = await this.peekBuffer(this.numBuffer, {length: token.len});
106 |     if (len < token.len)
107 |       throw new EndOfStreamError();
108 |     return token.get(this.numBuffer, 0);
109 |   }
110 | 
111 |   /**
112 |    * Ignore number of bytes, advances the pointer in under tokenizer-stream.
113 |    * @param length - Number of bytes to ignore
114 |    * @return resolves the number of bytes ignored, equals length if this available, otherwise the number of bytes available
115 |    */
116 |   public async ignore(length: number): Promise<number> {
117 |     if (this.fileInfo.size !== undefined) {
118 |       const bytesLeft = this.fileInfo.size - this.position;
119 |       if (length > bytesLeft) {
120 |         this.position += bytesLeft;
121 |         return bytesLeft;
122 |       }
123 |     }
124 |     this.position += length;
125 |     return length;
126 |   }
127 | 
128 |   public async close(): Promise<void> {
129 |     await this.abort();
130 |     await this.onClose?.();
131 |   }
132 | 
133 |   protected normalizeOptions(uint8Array: Uint8Array, options?: IReadChunkOptions): INormalizedReadChunkOptions {
134 | 
135 |     if (!this.supportsRandomAccess() && options && options.position !== undefined && options.position < this.position) {
136 |       throw new Error('`options.position` must be equal or greater than `tokenizer.position`');
137 |     }
138 | 
139 |     return {
140 |       ...{
141 |         mayBeLess: false,
142 |         offset: 0,
143 |         length: uint8Array.length,
144 |         position: this.position
145 |       }, ...options
146 |     };
147 |   }
148 | 
149 |   public abort(): Promise<void> {
150 |     return Promise.resolve(); // Ignore abort signal
151 |   }
152 | }
153 | 


--------------------------------------------------------------------------------
/lib/BufferTokenizer.ts:
--------------------------------------------------------------------------------
 1 | import type {ITokenizerOptions, IReadChunkOptions, IRandomAccessFileInfo, IRandomAccessTokenizer} from './types.js';
 2 | import { EndOfStreamError } from 'peek-readable';
 3 | import { AbstractTokenizer } from './AbstractTokenizer.js';
 4 | 
 5 | export class BufferTokenizer extends AbstractTokenizer implements IRandomAccessTokenizer {
 6 | 
 7 |   public fileInfo: IRandomAccessFileInfo;
 8 | 
 9 |   /**
10 |    * Construct BufferTokenizer
11 |    * @param uint8Array - Uint8Array to tokenize
12 |    * @param options Tokenizer options
13 |    */
14 |   constructor(private uint8Array: Uint8Array, options?: ITokenizerOptions) {
15 |     super(options);
16 |     this.fileInfo = {...options?.fileInfo ?? {}, ...{size: uint8Array.length}};
17 |   }
18 | 
19 |   /**
20 |    * Read buffer from tokenizer
21 |    * @param uint8Array - Uint8Array to tokenize
22 |    * @param options - Read behaviour options
23 |    * @returns {Promise<number>}
24 |    */
25 |   public async readBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
26 | 
27 |     if (options?.position) {
28 |       this.position = options.position;
29 |     }
30 | 
31 |     const bytesRead = await this.peekBuffer(uint8Array, options);
32 |     this.position += bytesRead;
33 |     return bytesRead;
34 |   }
35 | 
36 |   /**
37 |    * Peek (read ahead) buffer from tokenizer
38 |    * @param uint8Array
39 |    * @param options - Read behaviour options
40 |    * @returns {Promise<number>}
41 |    */
42 |   public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
43 | 
44 |     const normOptions = this.normalizeOptions(uint8Array, options);
45 | 
46 |     const bytes2read = Math.min(this.uint8Array.length - normOptions.position, normOptions.length);
47 |     if ((!normOptions.mayBeLess) && bytes2read < normOptions.length) {
48 |       throw new EndOfStreamError();
49 |     }
50 |     uint8Array.set(this.uint8Array.subarray(normOptions.position, normOptions.position + bytes2read));
51 |     return bytes2read;
52 |   }
53 | 
54 |   public close(): Promise<void> {
55 |     return super.close();
56 |   }
57 | 
58 |   supportsRandomAccess(): boolean {
59 |     return true;
60 |   }
61 | 
62 |   setPosition(position: number): void {
63 |     this.position = position;
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/lib/FileTokenizer.ts:
--------------------------------------------------------------------------------
 1 | import { AbstractTokenizer } from './AbstractTokenizer.js';
 2 | import { EndOfStreamError } from 'peek-readable';
 3 | import type {IRandomAccessTokenizer, IRandomAccessFileInfo, IReadChunkOptions, ITokenizerOptions} from './types.js';
 4 | import { type FileHandle, open as fsOpen } from 'node:fs/promises';
 5 | 
 6 | interface IFileTokenizerOptions extends ITokenizerOptions {
 7 |   /**
 8 |    * Pass additional file information to the tokenizer
 9 |    */
10 |   fileInfo: IRandomAccessFileInfo;
11 | }
12 | 
13 | export class FileTokenizer extends AbstractTokenizer implements IRandomAccessTokenizer {
14 | 
15 |   public fileInfo: IRandomAccessFileInfo;
16 | 
17 |   /**
18 |    * Create tokenizer from provided file path
19 |    * @param sourceFilePath File path
20 |    */
21 |   static async fromFile(sourceFilePath: string): Promise<FileTokenizer> {
22 |     const fileHandle = await fsOpen(sourceFilePath, 'r');
23 |     const stat = await fileHandle.stat();
24 |     return new FileTokenizer(fileHandle, {fileInfo: {path: sourceFilePath, size: stat.size}});
25 |   }
26 | 
27 |   protected constructor(private fileHandle: FileHandle, options: IFileTokenizerOptions) {
28 |     super(options);
29 |     this.fileInfo = options.fileInfo;
30 |   }
31 | 
32 |   /**
33 |    * Read buffer from file
34 |    * @param uint8Array - Uint8Array to write result to
35 |    * @param options - Read behaviour options
36 |    * @returns Promise number of bytes read
37 |    */
38 |   public async readBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
39 |     const normOptions = this.normalizeOptions(uint8Array, options);
40 |     this.position = normOptions.position;
41 |     if (normOptions.length === 0) return 0;
42 |     const res = await this.fileHandle.read(uint8Array, 0, normOptions.length, normOptions.position);
43 |     this.position += res.bytesRead;
44 |     if (res.bytesRead < normOptions.length && (!options || !options.mayBeLess)) {
45 |       throw new EndOfStreamError();
46 |     }
47 |     return res.bytesRead;
48 |   }
49 | 
50 |   /**
51 |    * Peek buffer from file
52 |    * @param uint8Array - Uint8Array (or Buffer) to write data to
53 |    * @param options - Read behaviour options
54 |    * @returns Promise number of bytes read
55 |    */
56 |   public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
57 | 
58 |     const normOptions = this.normalizeOptions(uint8Array, options);
59 | 
60 |     const res = await this.fileHandle.read(uint8Array, 0, normOptions.length, normOptions.position);
61 |     if ((!normOptions.mayBeLess) && res.bytesRead < normOptions.length) {
62 |       throw new EndOfStreamError();
63 |     }
64 |     return res.bytesRead;
65 |   }
66 | 
67 |   public async close(): Promise<void> {
68 |     await this.fileHandle.close();
69 |     return super.close();
70 |   }
71 | 
72 |   setPosition(position: number): void {
73 |     this.position = position;
74 |   }
75 | 
76 |   supportsRandomAccess(): boolean {
77 |     return true;
78 |   }
79 | }
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/lib/ReadStreamTokenizer.ts:
--------------------------------------------------------------------------------
  1 | import { AbstractTokenizer } from './AbstractTokenizer.js';
  2 | import { EndOfStreamError, type IStreamReader } from 'peek-readable';
  3 | import type {IFileInfo, IReadChunkOptions, ITokenizerOptions} from './types.js';
  4 | 
  5 | const maxBufferSize = 256000;
  6 | 
  7 | export class ReadStreamTokenizer extends AbstractTokenizer {
  8 | 
  9 |   public fileInfo: IFileInfo;
 10 | 
 11 |   /**
 12 |    * Constructor
 13 |    * @param streamReader stream-reader to read from
 14 |    * @param options Tokenizer options
 15 |    */
 16 |   public constructor(private streamReader: IStreamReader, options?: ITokenizerOptions) {
 17 |     super(options);
 18 |     this.fileInfo = options?.fileInfo ?? {};
 19 |   }
 20 | 
 21 |   /**
 22 |    * Read buffer from tokenizer
 23 |    * @param uint8Array - Target Uint8Array to fill with data read from the tokenizer-stream
 24 |    * @param options - Read behaviour options
 25 |    * @returns Promise with number of bytes read
 26 |    */
 27 |   public async readBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 28 |     const normOptions = this.normalizeOptions(uint8Array, options);
 29 |     const skipBytes = normOptions.position - this.position;
 30 |     if (skipBytes > 0) {
 31 |       await this.ignore(skipBytes);
 32 |       return this.readBuffer(uint8Array, options);
 33 |     }
 34 |     if (skipBytes < 0) {
 35 |       throw new Error('`options.position` must be equal or greater than `tokenizer.position`');
 36 |     }
 37 |     if (normOptions.length === 0) {
 38 |       return 0;
 39 |     }
 40 |     const bytesRead = await this.streamReader.read(uint8Array.subarray(0, normOptions.length), normOptions.mayBeLess);
 41 |     this.position += bytesRead;
 42 |     if ((!options || !options.mayBeLess) && bytesRead < normOptions.length) {
 43 |       throw new EndOfStreamError();
 44 |     }
 45 |     return bytesRead;
 46 |   }
 47 | 
 48 |   /**
 49 |    * Peek (read ahead) buffer from tokenizer
 50 |    * @param uint8Array - Uint8Array (or Buffer) to write data to
 51 |    * @param options - Read behaviour options
 52 |    * @returns Promise with number of bytes peeked
 53 |    */
 54 |   public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 55 | 
 56 |     const normOptions = this.normalizeOptions(uint8Array, options);
 57 |     let bytesRead = 0;
 58 | 
 59 |     if (normOptions.position) {
 60 |       const skipBytes = normOptions.position - this.position;
 61 |       if (skipBytes > 0) {
 62 |         const skipBuffer = new Uint8Array(normOptions.length + skipBytes);
 63 |         bytesRead = await this.peekBuffer(skipBuffer, {mayBeLess: normOptions.mayBeLess});
 64 |         uint8Array.set(skipBuffer.subarray(skipBytes));
 65 |         return bytesRead - skipBytes;
 66 |       }
 67 |       if (skipBytes < 0) {
 68 |         throw new Error('Cannot peek from a negative offset in a stream');
 69 |       }
 70 |     }
 71 | 
 72 |     if (normOptions.length > 0) {
 73 |       try {
 74 |         bytesRead = await this.streamReader.peek(uint8Array.subarray(0, normOptions.length), normOptions.mayBeLess);
 75 |       } catch (err) {
 76 |         if (options?.mayBeLess && err instanceof EndOfStreamError) {
 77 |           return 0;
 78 |         }
 79 |         throw err;
 80 |       }
 81 |       if ((!normOptions.mayBeLess) && bytesRead < normOptions.length) {
 82 |         throw new EndOfStreamError();
 83 |       }
 84 |     }
 85 | 
 86 |     return bytesRead;
 87 |   }
 88 | 
 89 |   public async ignore(length: number): Promise<number> {
 90 |     // debug(`ignore ${this.position}...${this.position + length - 1}`);
 91 |     const bufSize = Math.min(maxBufferSize, length);
 92 |     const buf = new Uint8Array(bufSize);
 93 |     let totBytesRead = 0;
 94 |     while (totBytesRead < length) {
 95 |       const remaining = length - totBytesRead;
 96 |       const bytesRead = await this.readBuffer(buf, {length: Math.min(bufSize, remaining)});
 97 |       if (bytesRead < 0) {
 98 |         return bytesRead;
 99 |       }
100 |       totBytesRead += bytesRead;
101 |     }
102 |     return totBytesRead;
103 |   }
104 | 
105 |   public abort(): Promise<void> {
106 |     return this.streamReader.abort();
107 |   }
108 | 
109 |   public async close(): Promise<void> {
110 |     return this.streamReader.close();
111 |   }
112 | 
113 |   supportsRandomAccess(): boolean {
114 |     return false;
115 |   }
116 | }
117 | 


--------------------------------------------------------------------------------
/lib/core.ts:
--------------------------------------------------------------------------------
 1 | import type { Readable } from 'node:stream';
 2 | import { StreamReader, makeWebStreamReader, type AnyWebByteStream } from 'peek-readable';
 3 | 
 4 | import { ReadStreamTokenizer } from './ReadStreamTokenizer.js';
 5 | import { BufferTokenizer } from './BufferTokenizer.js';
 6 | import type { ITokenizerOptions } from './types.js';
 7 | 
 8 | export { EndOfStreamError, AbortError, type AnyWebByteStream } from 'peek-readable';
 9 | export type { ITokenizer, IRandomAccessTokenizer, IFileInfo, IRandomAccessFileInfo, ITokenizerOptions, IReadChunkOptions, OnClose } from './types.js';
10 | export type { IToken, IGetToken } from '@tokenizer/token';
11 | export { AbstractTokenizer } from './AbstractTokenizer.js';
12 | 
13 | /**
14 |  * Construct ReadStreamTokenizer from given Stream.
15 |  * Will set fileSize, if provided given Stream has set the .path property/
16 |  * @param stream - Read from Node.js Stream.Readable
17 |  * @param options - Tokenizer options
18 |  * @returns ReadStreamTokenizer
19 |  */
20 | export function fromStream(stream: Readable, options?: ITokenizerOptions): ReadStreamTokenizer {
21 |   const streamReader= new StreamReader(stream);
22 |   const _options: ITokenizerOptions = options ?? {};
23 |   const chainedClose = _options.onClose;
24 |   _options.onClose = async () => {
25 |     await streamReader.close();
26 |     if(chainedClose) {
27 |       return chainedClose();
28 |     }
29 |   };
30 |   return new ReadStreamTokenizer(streamReader, _options);
31 | }
32 | 
33 | /**
34 |  * Construct ReadStreamTokenizer from given ReadableStream (WebStream API).
35 |  * Will set fileSize, if provided given Stream has set the .path property/
36 |  * @param webStream - Read from Node.js Stream.Readable (must be a byte stream)
37 |  * @param options - Tokenizer options
38 |  * @returns ReadStreamTokenizer
39 |  */
40 | export function fromWebStream(webStream: AnyWebByteStream, options?: ITokenizerOptions): ReadStreamTokenizer {
41 |   const webStreamReader= makeWebStreamReader(webStream);
42 |   const _options: ITokenizerOptions = options ?? {};
43 |   const chainedClose = _options.onClose;
44 |   _options.onClose = async () => {
45 |     await webStreamReader.close();
46 |     if(chainedClose) {
47 |       return chainedClose();
48 |     }
49 |   };
50 |   return new ReadStreamTokenizer(webStreamReader, _options);
51 | }
52 | 
53 | /**
54 |  * Construct ReadStreamTokenizer from given Buffer.
55 |  * @param uint8Array - Uint8Array to tokenize
56 |  * @param options - Tokenizer options
57 |  * @returns BufferTokenizer
58 |  */
59 | export function fromBuffer(uint8Array: Uint8Array, options?: ITokenizerOptions): BufferTokenizer {
60 |   return new BufferTokenizer(uint8Array, options);
61 | }
62 | 


--------------------------------------------------------------------------------
/lib/index.ts:
--------------------------------------------------------------------------------
 1 | import type { Readable } from 'node:stream';
 2 | import type { ReadStreamTokenizer } from './ReadStreamTokenizer.js';
 3 | import { stat as fsStat } from 'node:fs/promises';
 4 | import { type ITokenizerOptions, fromStream as coreFromStream } from './core.js';
 5 | import {FileTokenizer} from "./FileTokenizer.js";
 6 | 
 7 | export { FileTokenizer } from './FileTokenizer.js';
 8 | export * from './core.js';
 9 | export type { IToken, IGetToken } from '@tokenizer/token';
10 | 
11 | interface StreamWithFile extends Readable {
12 |   /**
13 |    * Informal property set by `node:fs.createReadStream`
14 |    */
15 |   path?: string;
16 | }
17 | 
18 | /**
19 |  * Construct ReadStreamTokenizer from given Stream.
20 |  * Will set fileSize, if provided given Stream has set the .path property.
21 |  * @param stream - Node.js Stream.Readable
22 |  * @param options - Pass additional file information to the tokenizer
23 |  * @returns Tokenizer
24 |  */
25 | export async function fromStream(stream: Readable, options?: ITokenizerOptions): Promise<ReadStreamTokenizer> {
26 |   const rst = coreFromStream(stream, options);
27 |   if ((stream as StreamWithFile).path) {
28 |     const stat = await fsStat((stream as StreamWithFile).path as string);
29 |     rst.fileInfo.path = (stream as StreamWithFile).path;
30 |     rst.fileInfo.size = stat.size;
31 |   }
32 |   return rst;
33 | }
34 | 
35 | export const fromFile = FileTokenizer.fromFile;
36 | 


--------------------------------------------------------------------------------
/lib/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../tsconfig.json",
3 |   "compilerOptions": {
4 |     "declaration": true
5 |   }
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/lib/types.ts:
--------------------------------------------------------------------------------
  1 | import type { IGetToken } from '@tokenizer/token';
  2 | 
  3 | export interface IFileInfo {
  4 |   /**
  5 |    * File size in bytes
  6 |    */
  7 |   size?: number;
  8 |   /**
  9 |    * MIME-type of file
 10 |    */
 11 |   mimeType?: string;
 12 | 
 13 |   /**
 14 |    * File path
 15 |    */
 16 |   path?: string;
 17 | 
 18 |   /**
 19 |    * File URL
 20 |    */
 21 |   url?: string;
 22 | }
 23 | 
 24 | export interface IRandomAccessFileInfo extends IFileInfo {
 25 |   /**
 26 |    * File size in bytes
 27 |    */
 28 |   size: number;
 29 | }
 30 | 
 31 | export interface IReadChunkOptions {
 32 | 
 33 |   /**
 34 |    * Number of bytes to read.
 35 |    */
 36 |   length?: number;
 37 | 
 38 |   /**
 39 |    * Position where to begin reading from the file.
 40 |    * Default it is `tokenizer.position`.
 41 |    * Position may not be less than `tokenizer.position`, unless `supportsRandomAccess()` returns `true`.
 42 |    */
 43 |   position?: number;
 44 | 
 45 |   /**
 46 |    * If set, will not throw an EOF error if not all off the requested data could be read
 47 |    */
 48 |   mayBeLess?: boolean;
 49 | }
 50 | 
 51 | export interface IRandomAccessTokenizer extends ITokenizer {
 52 | 
 53 |   /**
 54 |    * Provide access to information of the underlying information stream or file.
 55 |    */
 56 |   fileInfo: IRandomAccessFileInfo;
 57 | 
 58 |   /**
 59 |    * Change the position (offset) of the tokenizer
 60 |    * @param position New position
 61 |    */
 62 |   setPosition(position: number): void;
 63 | }
 64 | 
 65 | /**
 66 |  * The tokenizer allows us to read or peek from the tokenizer-stream.
 67 |  * The tokenizer-stream is an abstraction of a stream, file or Buffer.
 68 |  */
 69 | export interface ITokenizer {
 70 | 
 71 |   /**
 72 |    * Provide access to information of the underlying information stream or file.
 73 |    */
 74 |   readonly fileInfo: IFileInfo;
 75 | 
 76 |   /**
 77 |    * Offset in bytes (= number of bytes read) since beginning of file or stream
 78 |    */
 79 |   readonly position: number;
 80 | 
 81 |   /**
 82 |    * Peek (read ahead) buffer from tokenizer
 83 |    * @param buffer - Target buffer to fill with data peek from the tokenizer-stream
 84 |    * @param options - Read behaviour options
 85 |    * @returns Promise with number of bytes read
 86 |    */
 87 |   peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number>;
 88 | 
 89 |   /**
 90 |    * Peek (read ahead) buffer from tokenizer
 91 |    * @param buffer - Target buffer to fill with data peeked from the tokenizer-stream
 92 |    * @param options - Additional read options
 93 |    * @returns Promise with number of bytes read
 94 |    */
 95 |   readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number>;
 96 | 
 97 |   /**
 98 |    * Peek a token from the tokenizer-stream.
 99 |    * @param token - Token to peek from the tokenizer-stream.
100 |    * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
101 |    * @param maybeless - If set, will not throw an EOF error if the less then the requested length could be read.
102 |    */
103 |   peekToken<T>(token: IGetToken<T>, position?: number | null, maybeless?: boolean): Promise<T>;
104 | 
105 |   /**
106 |    * Read a token from the tokenizer-stream.
107 |    * @param token - Token to peek from the tokenizer-stream.
108 |    * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
109 |    */
110 |   readToken<T>(token: IGetToken<T>, position?: number): Promise<T>;
111 | 
112 |   /**
113 |    * Peek a numeric token from the stream
114 |    * @param token - Numeric token
115 |    * @returns Promise with number
116 |    */
117 |   peekNumber(token: IGetToken<number>): Promise<number>;
118 | 
119 |   /**
120 |    * Read a numeric token from the stream
121 |    * @param token - Numeric token
122 |    * @returns Promise with number
123 |    */
124 |   readNumber(token: IGetToken<number>): Promise<number>;
125 | 
126 |   /**
127 |    * Ignore given number of bytes
128 |    * @param length - Number of bytes ignored
129 |    */
130 |   ignore(length: number): Promise<number>;
131 | 
132 |   /**
133 |    * Clean up resources.
134 |    * It does not close the stream for StreamReader, but is does close the file-descriptor.
135 |    */
136 |   close(): Promise<void>;
137 | 
138 |   /**
139 |    * Abort pending asynchronous operations
140 |    */
141 |   abort(): Promise<void>;
142 | 
143 |   /**
144 |    * Returns true when the underlying file supports random access
145 |    */
146 |   supportsRandomAccess(): boolean;
147 | }
148 | 
149 | export type OnClose = () => Promise<void>;
150 | 
151 | export interface ITokenizerOptions {
152 |   /**
153 |    * Pass additional file information to the tokenizer
154 |    */
155 |   fileInfo?: IFileInfo;
156 | 
157 |   /**
158 |    * On tokenizer close handler
159 |    */
160 |   onClose?: OnClose;
161 | 
162 |   /**
163 |    * Pass `AbortSignal` which can stop active async operations
164 |    * Ref: https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal
165 |    */
166 |   abortSignal?: AbortSignal;
167 | }
168 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "strtok3",
 3 |   "version": "10.2.2",
 4 |   "description": "A promise based streaming tokenizer",
 5 |   "author": {
 6 |     "name": "Borewit",
 7 |     "url": "https://github.com/Borewit"
 8 |   },
 9 |   "funding": {
10 |     "type": "github",
11 |     "url": "https://github.com/sponsors/Borewit"
12 |   },
13 |   "scripts": {
14 |     "clean": "del-cli 'lib/**/*.js' 'lib/**/*.js.map' 'lib/**/*.d.ts' 'test/**/*.js' 'test/**/*.js.map'",
15 |     "compile-src": "tsc -p lib",
16 |     "compile-test": "tsc -p test",
17 |     "compile": "yarn run compile-src && yarn run compile-test",
18 |     "build": "yarn run clean && yarn run compile",
19 |     "eslint": "eslint lib test",
20 |     "lint-md": "remark -u preset-lint-recommended .",
21 |     "lint-ts": "biome check",
22 |     "lint": "yarn run lint-md && yarn run lint-ts",
23 |     "fix": "yarn run biome lint --write",
24 |     "test": "mocha",
25 |     "bun:test": "bun run --bun test",
26 |     "test-coverage": "c8 yarn run test",
27 |     "send-codacy": "c8 report --reporter=text-lcov | codacy-coverage",
28 |     "start": "yarn run compile && yarn run lint && yarn run cover-test"
29 |   },
30 |   "engines": {
31 |     "node": ">=18"
32 |   },
33 |   "repository": {
34 |     "type": "git",
35 |     "url": "https://github.com/Borewit/strtok3.git"
36 |   },
37 |   "license": "MIT",
38 |   "type": "module",
39 |   "exports": {
40 |     ".": {
41 |       "node": "./lib/index.js",
42 |       "default": "./lib/core.js"
43 |     },
44 |     "./core": "./lib/core.js"
45 |   },
46 |   "types": "lib/index.d.ts",
47 |   "files": [
48 |     "lib/**/*.js",
49 |     "lib/**/*.d.ts"
50 |   ],
51 |   "bugs": {
52 |     "url": "https://github.com/Borewit/strtok3/issues"
53 |   },
54 |   "dependencies": {
55 |     "@tokenizer/token": "^0.3.0",
56 |     "peek-readable": "^7.0.0"
57 |   },
58 |   "devDependencies": {
59 |     "@biomejs/biome": "^1.9.4",
60 |     "@types/chai": "^5.2.2",
61 |     "@types/chai-as-promised": "^8.0.2",
62 |     "@types/debug": "^4.1.12",
63 |     "@types/mocha": "^10.0.10",
64 |     "@types/node": "^22.15.19",
65 |     "c8": "^10.1.3",
66 |     "chai": "^5.2.0",
67 |     "chai-as-promised": "^8.0.1",
68 |     "del-cli": "^6.0.0",
69 |     "mocha": "^11.5.0",
70 |     "node-readable-to-web-readable-stream": "^0.4.2",
71 |     "remark-cli": "^12.0.1",
72 |     "remark-preset-lint-recommended": "^7.0.1",
73 |     "token-types": "^6.0.0",
74 |     "ts-node": "^10.9.2",
75 |     "typescript": "^5.8.3",
76 |     "uint8array-extras": "^1.4.0"
77 |   },
78 |   "keywords": [
79 |     "tokenizer",
80 |     "reader",
81 |     "token",
82 |     "async",
83 |     "promise",
84 |     "parser",
85 |     "decoder",
86 |     "binary",
87 |     "endian",
88 |     "uint",
89 |     "stream",
90 |     "streaming"
91 |   ],
92 |   "packageManager": "yarn@4.9.1"
93 | }
94 | 


--------------------------------------------------------------------------------
/test/resources/id3v1.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Borewit/strtok3/5e7c191bd1930140438dd48fd837515c449365a3/test/resources/id3v1.mp3


--------------------------------------------------------------------------------
/test/resources/test1.dat:
--------------------------------------------------------------------------------
1 |         


--------------------------------------------------------------------------------
/test/resources/test2.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Borewit/strtok3/5e7c191bd1930140438dd48fd837515c449365a3/test/resources/test2.dat


--------------------------------------------------------------------------------
/test/resources/test3.dat:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/test.ts:
--------------------------------------------------------------------------------
   1 | import { PassThrough } from 'node:stream';
   2 | import * as fs from 'node:fs/promises';
   3 | import { createReadStream } from 'node:fs';
   4 | import { dirname } from 'node:path';
   5 | import { fileURLToPath } from 'node:url';
   6 | 
   7 | import * as Token from 'token-types';
   8 | import { assert, expect, use } from 'chai';
   9 | import chaiAsPromised from 'chai-as-promised';
  10 | import {
  11 |   fromBuffer,
  12 |   fromFile,
  13 |   fromStream,
  14 |   fromWebStream,
  15 |   type ITokenizer,
  16 |   type IRandomAccessTokenizer
  17 | } from '../lib/index.js';
  18 | import Path from 'node:path';
  19 | import { EndOfStreamError } from 'peek-readable';
  20 | 
  21 | import mocha from 'mocha';
  22 | import { stringToUint8Array } from 'uint8array-extras';
  23 | 
  24 | import { DelayedStream, makeByteReadableStreamFromFile } from './util.js';
  25 | import process from 'node:process';
  26 | 
  27 | use(chaiAsPromised);
  28 | 
  29 | const __dirname = dirname(fileURLToPath(import .meta.url));
  30 | 
  31 | const {describe, it} = mocha;
  32 | 
  33 | interface ITokenizerTest {
  34 |   name: string;
  35 |   loadTokenizer: (testFile: string, delay?: number, abortSignal?: AbortSignal) => Promise<ITokenizer>;
  36 |   hasFileInfo: boolean;
  37 |   abortable: boolean;
  38 |   randomRead: boolean;
  39 | }
  40 | 
  41 | function getResourcePath(testFile: string) {
  42 |   return Path.join(__dirname, 'resources', testFile);
  43 | }
  44 | 
  45 | async function getTokenizerWithData(testData: string, test: ITokenizerTest, delay?: number, abortSignal?: AbortSignal): Promise<ITokenizer> {
  46 |   const testPath = getResourcePath('tmp.dat');
  47 |   await fs.writeFile(testPath, testData, {encoding: 'latin1'});
  48 |   return test.loadTokenizer('tmp.dat', delay, abortSignal);
  49 | }
  50 | 
  51 | describe('Matrix tests', () => {
  52 | 
  53 |   const tokenizerTests: ITokenizerTest[] = [
  54 |     {
  55 |       name: 'fromStream()',
  56 |       loadTokenizer: async (testFile, delay, abortSignal?: AbortSignal) => {
  57 |         const stream = createReadStream(getResourcePath(testFile));
  58 |         const delayedStream = new DelayedStream(stream, delay);
  59 |         return fromStream(delayedStream, {abortSignal});
  60 |       },
  61 |       hasFileInfo: true,
  62 |       abortable: true,
  63 |       randomRead: false
  64 |     }, {
  65 |       name: 'fromWebStream()',
  66 |       loadTokenizer: async (testFile, delay, abortSignal?: AbortSignal) => {
  67 |         const fileStream = makeByteReadableStreamFromFile(Path.join(__dirname, 'resources', testFile), delay);
  68 |         return fromWebStream(fileStream, {abortSignal});
  69 |       },
  70 |       hasFileInfo: false,
  71 |       abortable: true,
  72 |       randomRead: false
  73 |     }, {
  74 |       name: 'fromFile()',
  75 |       loadTokenizer: async testFile => {
  76 |         return fromFile(Path.join(__dirname, 'resources', testFile));
  77 |       },
  78 |       hasFileInfo: true,
  79 |       abortable: false,
  80 |       randomRead: true
  81 |     }, {
  82 |       name: 'fromBuffer()',
  83 |       loadTokenizer: async testFile => {
  84 |         const data = await fs.readFile(Path.join(__dirname, 'resources', testFile));
  85 |         return fromBuffer(data);
  86 |       },
  87 |       hasFileInfo: true,
  88 |       abortable: false,
  89 |       randomRead: true
  90 |     }
  91 |   ];
  92 | 
  93 |   tokenizerTests
  94 |     // .filter((x, n) => n === 1)
  95 |     .forEach(tokenizerType => {
  96 |       describe(tokenizerType.name, () => {
  97 | 
  98 |         describe('tokenizer read options', () => {
  99 | 
 100 |           it('option.offset', async () => {
 101 |             const buf = new Uint8Array(7);
 102 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 103 |             assert.strictEqual(await rst.readBuffer(buf.subarray(1), {length: 6}), 6);
 104 |             await rst.close();
 105 |           });
 106 | 
 107 |           it('option.length', async () => {
 108 |             const buf = new Uint8Array(7);
 109 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 110 |             assert.strictEqual(await rst.readBuffer(buf, {length: 2}), 2);
 111 |             await rst.close();
 112 |           });
 113 | 
 114 |           it('default length', async () => {
 115 |             const buf = new Uint8Array(6);
 116 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 117 |             assert.strictEqual(await rst.readBuffer(buf.subarray(1)), 5, 'default length = buffer.length - option.offset');
 118 |             await rst.close();
 119 |           });
 120 | 
 121 |           it('option.maybeLess = true', async () => {
 122 |             const buffer = new Uint8Array(4);
 123 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 124 |             const len = await rst.readBuffer(buffer, {mayBeLess: true});
 125 |             assert.strictEqual(len, 3, 'should return 3 because no more bytes are available');
 126 |             await rst.close();
 127 |           });
 128 | 
 129 |           it('option.position', async () => {
 130 |             const buffer = new Uint8Array(5);
 131 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 132 |             const len = await rst.readBuffer(buffer, {position: 1});
 133 |             assert.strictEqual(len, 5, 'return value');
 134 |             assert.deepEqual(buffer, Uint8Array.from([0x02, 0x03, 0x04, 0x05, 0x06]));
 135 |             await rst.close();
 136 |           });
 137 | 
 138 |         });
 139 | 
 140 |         describe('tokenizer peek options', () => {
 141 | 
 142 |           it('option.offset', async () => {
 143 |             const buf = new Uint8Array(7);
 144 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 145 |             assert.strictEqual(await rst.peekBuffer(buf.subarray(1), {length: 6}), 6);
 146 |             await rst.close();
 147 |           });
 148 | 
 149 |           it('option.length', async () => {
 150 |             const buf = new Uint8Array(7);
 151 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 152 |             assert.strictEqual(await rst.peekBuffer(buf, {length: 2}), 2);
 153 |             await rst.close();
 154 |           });
 155 | 
 156 |           it('default length', async () => {
 157 |             const buf = new Uint8Array(6);
 158 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 159 |             assert.strictEqual(await rst.peekBuffer(buf.subarray(1)), 5, 'default length = buffer.length - option.offset');
 160 |             await rst.close();
 161 |           });
 162 | 
 163 |           it('option.maybeLess = true', async () => {
 164 |             const buffer = new Uint8Array(4);
 165 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 166 |             const len = await rst.peekBuffer(buffer, {mayBeLess: true});
 167 |             assert.strictEqual(len, 3, 'should return 3 because no more bytes are available');
 168 |             await rst.close();
 169 |           });
 170 | 
 171 |           it('option.position', async () => {
 172 |             const buffer = new Uint8Array(5);
 173 |             const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
 174 |             const len = await rst.peekBuffer(buffer, {position: 1});
 175 |             assert.strictEqual(len, 5, 'return value');
 176 |             assert.deepEqual(buffer, Uint8Array.from([0x02, 0x03, 0x04, 0x05, 0x06]));
 177 |             await rst.close();
 178 |           });
 179 | 
 180 |         });
 181 | 
 182 |         it('should decode buffer', async () => {
 183 | 
 184 |           const rst = await getTokenizerWithData('\x05peter', tokenizerType);
 185 |           // should decode UINT8 from chunk
 186 |           assert.strictEqual(rst.position, 0);
 187 |           let value: string | number = await rst.readToken(Token.UINT8);
 188 |           assert.strictEqual(typeof value, 'number');
 189 |           assert.strictEqual(value, 5, '0x05 == 5');
 190 |           // should decode string from chunk
 191 |           assert.strictEqual(rst.position, 1);
 192 |           value = await rst.readToken(new Token.StringType(5, 'utf-8'));
 193 |           assert.strictEqual(typeof value, 'string');
 194 |           assert.strictEqual(value, 'peter');
 195 |           assert.strictEqual(rst.position, 6);
 196 |           // should should reject at the end of the stream
 197 |           try {
 198 |             await rst.readToken(Token.UINT8);
 199 |             assert.fail('Should reject due to end-of-stream');
 200 |           } catch (err) {
 201 |             assert.instanceOf(err, EndOfStreamError);
 202 |           } finally {
 203 |             await rst.close();
 204 |           }
 205 |         });
 206 | 
 207 |         it('should be able to read from an absolute offset', async () => {
 208 | 
 209 |           const rst = await getTokenizerWithData('\x05peter', tokenizerType);
 210 |           // should decode UINT8 from chunk
 211 |           assert.strictEqual(rst.position, 0);
 212 |           const value: string | number = await rst.readToken(new Token.StringType(5, 'utf-8'), 1);
 213 |           assert.strictEqual(typeof value, 'string');
 214 |           assert.strictEqual(value, 'peter');
 215 |           assert.strictEqual(rst.position, 6);
 216 | 
 217 |           try {
 218 |             await rst.readToken(Token.UINT8);
 219 |             assert.fail('Should reject due to end-of-stream');
 220 |           } catch (err) {
 221 |             assert.instanceOf(err, EndOfStreamError);
 222 |           } finally {
 223 |             await rst.close();
 224 |           }
 225 | 
 226 |         });
 227 | 
 228 |         it('should pick length from buffer, if length is not explicit defined', async () => {
 229 | 
 230 |           const rst = await getTokenizerWithData('\x05peter', tokenizerType);
 231 | 
 232 |           const buf = new Uint8Array(4);
 233 | 
 234 |           // should decode UINT8 from chunk
 235 |           assert.strictEqual(rst.position, 0);
 236 |           const bufferLength = await rst.readBuffer(buf);
 237 |           assert.strictEqual(bufferLength, buf.length);
 238 |           assert.strictEqual(rst.position, buf.length);
 239 |           await rst.close();
 240 |         });
 241 | 
 242 |         it('should contain fileSize if constructed from file-read-stream', async () => {
 243 |           if (tokenizerType.hasFileInfo) {
 244 |             const rst = await tokenizerType.loadTokenizer('test1.dat');
 245 |             assert.strictEqual(rst.fileInfo.size, 16, ' ReadStreamTokenizer.fileSize.size');
 246 |             await rst.close();
 247 |           }
 248 |         });
 249 | 
 250 |         describe('Parsing binary numbers', () => {
 251 | 
 252 |           it('should encode signed 8-bit integer (INT8)', () => {
 253 | 
 254 |             const b = new Uint8Array(1);
 255 | 
 256 |             Token.INT8.put(b, 0, 0x00);
 257 |             assert.deepEqual(b, Uint8Array.from([0x00]));
 258 | 
 259 |             Token.INT8.put(b, 0, 0x22);
 260 |             assert.deepEqual(b, Uint8Array.from([0x22]));
 261 | 
 262 |             Token.INT8.put(b, 0, -0x22);
 263 |             assert.deepEqual(b, Uint8Array.from([0xde]));
 264 |           });
 265 | 
 266 |           it('should decode signed 8-bit integer (INT8)', async () => {
 267 | 
 268 |             const rst = await getTokenizerWithData('\x00\x7f\x80\xff\x81', tokenizerType);
 269 | 
 270 |             let value: number = await rst.readToken(Token.INT8);
 271 |             assert.strictEqual(typeof value, 'number');
 272 |             assert.strictEqual(value, 0, 'INT8 #1 == 0');
 273 |             value = await rst.readToken(Token.INT8);
 274 |             assert.strictEqual(typeof value, 'number');
 275 |             assert.strictEqual(value, 127, 'INT8 #2 == 127');
 276 |             value = await rst.readToken(Token.INT8);
 277 |             assert.strictEqual(typeof value, 'number');
 278 |             assert.strictEqual(value, -128, 'INT8 #3 == -128');
 279 |             value = await rst.readToken(Token.INT8);
 280 |             assert.strictEqual(typeof value, 'number');
 281 |             assert.strictEqual(value, -1, 'INT8 #4 == -1');
 282 |             value = await rst.readToken(Token.INT8);
 283 |             assert.strictEqual(typeof value, 'number');
 284 |             assert.strictEqual(value, -127, 'INT8 #5 == -127');
 285 | 
 286 |             await rst.close();
 287 | 
 288 |           });
 289 | 
 290 |           it('should encode signed 16-bit big-endian integer (INT16_BE)', () => {
 291 | 
 292 |             const b = new Uint8Array(2);
 293 | 
 294 |             Token.INT16_BE.put(b, 0, 0x00);
 295 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00]));
 296 | 
 297 |             Token.INT16_BE.put(b, 0, 0x0f0b);
 298 |             assert.deepEqual(b, Uint8Array.from([0x0f, 0x0b]));
 299 | 
 300 |             Token.INT16_BE.put(b, 0, -0x0f0b);
 301 |             assert.deepEqual(b, Uint8Array.from([0xf0, 0xf5]));
 302 |           });
 303 | 
 304 |           it('should decode signed 16-bit big-endian integer (INT16_BE)', async () => {
 305 | 
 306 |             const rst = await getTokenizerWithData('\x0a\x1a\x00\x00\xff\xff\x80\x00', tokenizerType);
 307 | 
 308 |             let value: number = await rst.readToken(Token.INT16_BE);
 309 |             assert.strictEqual(typeof value, 'number');
 310 |             assert.strictEqual(value, 2586, 'INT16_BE#1');
 311 |             value = await rst.readToken(Token.INT16_BE);
 312 |             assert.strictEqual(typeof value, 'number');
 313 |             assert.strictEqual(value, 0, 'INT16_BE#2');
 314 |             value = await rst.readToken(Token.INT16_BE);
 315 |             assert.strictEqual(typeof value, 'number');
 316 |             assert.strictEqual(value, -1, 'INT16_BE#3');
 317 |             value = await rst.readToken(Token.INT16_BE);
 318 |             assert.strictEqual(typeof value, 'number');
 319 |             assert.strictEqual(value, -32768, 'INT16_BE#4');
 320 | 
 321 |             await rst.close();
 322 |           });
 323 | 
 324 |           it('should encode signed 24-bit big-endian integer (INT24_BE)', async () => {
 325 | 
 326 |             const b = new Uint8Array(3);
 327 | 
 328 |             Token.INT24_BE.put(b, 0, 0x00);
 329 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00]));
 330 | 
 331 |             Token.INT24_BE.put(b, 0, 0x0f0ba0);
 332 |             assert.deepEqual(b, Uint8Array.from([0x0f, 0x0b, 0xa0]));
 333 | 
 334 |             Token.INT24_BE.put(b, 0, -0x0f0bcc);
 335 |             assert.deepEqual(b, Uint8Array.from([0xf0, 0xf4, 0x34]));
 336 |           });
 337 | 
 338 |           it('should decode signed 24-bit big-endian integer (INT24_BE)', async () => {
 339 | 
 340 |             const rst = await getTokenizerWithData('\x00\x00\x00\xff\xff\xff\x10\x00\xff\x80\x00\x00', tokenizerType);
 341 | 
 342 |             let value: number = await rst.readToken(Token.INT24_BE);
 343 |             assert.strictEqual(typeof value, 'number');
 344 |             assert.strictEqual(value, 0, 'INT24_BE#1');
 345 |             value = await rst.readToken(Token.INT24_BE);
 346 |             assert.strictEqual(typeof value, 'number');
 347 |             assert.strictEqual(value, -1, 'INT24_BE#2');
 348 |             value = await rst.readToken(Token.INT24_BE);
 349 |             assert.strictEqual(typeof value, 'number');
 350 |             assert.strictEqual(value, 1048831, 'INT24_BE#3');
 351 |             value = await rst.readToken(Token.INT24_BE);
 352 |             assert.strictEqual(typeof value, 'number');
 353 |             assert.strictEqual(value, -8388608, 'INT24_BE#4');
 354 |             await rst.close();
 355 |           });
 356 | 
 357 |           // ToDo: test decoding: INT24_LE
 358 | 
 359 |           it('should encode signed 32-bit big-endian integer (INT32_BE)', () => {
 360 | 
 361 |             const b = new Uint8Array(4);
 362 | 
 363 |             Token.INT32_BE.put(b, 0, 0x00);
 364 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0x00]));
 365 | 
 366 |             Token.INT32_BE.put(b, 0, 0x0f0bcca0);
 367 |             assert.deepEqual(b, Uint8Array.from([0x0f, 0x0b, 0xcc, 0xa0]));
 368 | 
 369 |             Token.INT32_BE.put(b, 0, -0x0f0bcca0);
 370 |             assert.deepEqual(b, Uint8Array.from([0xf0, 0xf4, 0x33, 0x60]));
 371 |           });
 372 | 
 373 |           it('should decode signed 32-bit big-endian integer (INT32_BE)', async () => {
 374 | 
 375 |             const rst = await getTokenizerWithData('\x00\x00\x00\x00\xff\xff\xff\xff\x00\x10\x00\xff\x80\x00\x00\x00', tokenizerType);
 376 | 
 377 |             let value: number = await rst.readToken(Token.INT32_BE);
 378 |             assert.strictEqual(typeof value, 'number');
 379 |             assert.strictEqual(value, 0, 'INT32_BE #1');
 380 |             value = await rst.readToken(Token.INT32_BE);
 381 |             assert.strictEqual(typeof value, 'number');
 382 |             assert.strictEqual(value, -1, 'INT32_BE #2');
 383 |             value = await rst.readToken(Token.INT32_BE);
 384 |             assert.strictEqual(typeof value, 'number');
 385 |             assert.strictEqual(value, 1048831, 'INT32_BE #3');
 386 |             value = await rst.readToken(Token.INT32_BE);
 387 |             assert.strictEqual(typeof value, 'number');
 388 |             assert.strictEqual(value, -2147483648, 'INT32_BE #4');
 389 |             await rst.close();
 390 |           });
 391 | 
 392 |           it('should encode signed 8-bit big-endian integer (INT8)', () => {
 393 | 
 394 |             const b = new Uint8Array(1);
 395 | 
 396 |             Token.UINT8.put(b, 0, 0x00);
 397 |             assert.deepEqual(b, Uint8Array.from([0x00]));
 398 | 
 399 |             Token.UINT8.put(b, 0, 0xff);
 400 |             assert.deepEqual(b, Uint8Array.from([0xff]));
 401 |           });
 402 | 
 403 |           it('should decode unsigned 8-bit integer (UINT8)', async () => {
 404 | 
 405 |             const rst = await getTokenizerWithData('\x00\x1a\xff', tokenizerType);
 406 | 
 407 |             let value: number = await rst.readToken(Token.UINT8);
 408 |             assert.strictEqual(typeof value, 'number');
 409 |             assert.strictEqual(value, 0, 'UINT8 #1');
 410 |             value = await rst.readToken(Token.UINT8);
 411 |             assert.strictEqual(typeof value, 'number');
 412 |             assert.strictEqual(value, 26, 'UINT8 #2');
 413 |             value = await rst.readToken(Token.UINT8);
 414 |             assert.strictEqual(typeof value, 'number');
 415 |             assert.strictEqual(value, 255, 'UINT8 #3');
 416 |             await rst.close();
 417 |           });
 418 | 
 419 |           it('should encode unsigned 16-bit big-endian integer (UINT16_LE)', () => {
 420 | 
 421 |             const b = new Uint8Array(4);
 422 | 
 423 |             Token.UINT16_LE.put(b, 0, 0x00);
 424 |             Token.UINT16_LE.put(b, 2, 0xffaa);
 425 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0xaa, 0xff]));
 426 |           });
 427 | 
 428 |           it('should encode unsigned 16-bit little-endian integer (UINT16_BE)', () => {
 429 |             const b = new Uint8Array(4);
 430 |             Token.UINT16_BE.put(b, 0, 0xf);
 431 |             Token.UINT16_BE.put(b, 2, 0xffaa);
 432 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x0f, 0xff, 0xaa]));
 433 |           });
 434 | 
 435 |           it('should encode unsigned 16-bit mixed little/big-endian integers', () => {
 436 |             const b = new Uint8Array(4);
 437 |             Token.UINT16_BE.put(b, 0, 0xffaa);
 438 |             Token.UINT16_LE.put(b, 2, 0xffaa);
 439 |             assert.deepEqual(b, Uint8Array.from([0xff, 0xaa, 0xaa, 0xff]));
 440 |           });
 441 | 
 442 |           it('should decode unsigned mixed 16-bit big/little-endian integer', async () => {
 443 | 
 444 |             const rst = await getTokenizerWithData('\x1a\x00\x1a\x00\x1a\x00\x1a\x00', tokenizerType);
 445 | 
 446 |             let value: number = await rst.readToken(Token.UINT16_LE);
 447 |             assert.strictEqual(typeof value, 'number');
 448 |             assert.strictEqual(value, 0x001a, 'UINT16_LE #1');
 449 |             value = await rst.readToken(Token.UINT16_BE);
 450 |             assert.strictEqual(typeof value, 'number');
 451 |             assert.strictEqual(value, 0x1a00, 'UINT16_BE #2');
 452 |             value = await rst.readToken(Token.UINT16_LE);
 453 |             assert.strictEqual(typeof value, 'number');
 454 |             assert.strictEqual(value, 0x001a, 'UINT16_BE #3');
 455 |             value = await rst.readToken(Token.UINT16_BE);
 456 |             assert.strictEqual(typeof value, 'number');
 457 |             assert.strictEqual(value, 0x1a00, 'UINT16_LE #4');
 458 | 
 459 |             await rst.close();
 460 |           });
 461 | 
 462 |           it('should encode unsigned 24-bit little-endian integer (UINT24_LE)', () => {
 463 | 
 464 |             const b = new Uint8Array(3);
 465 | 
 466 |             Token.UINT24_LE.put(b, 0, 0x00);
 467 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x000, 0x00]));
 468 | 
 469 |             Token.UINT24_LE.put(b, 0, 0xff);
 470 |             assert.deepEqual(b, Uint8Array.from([0xff, 0x00, 0x00]));
 471 | 
 472 |             Token.UINT24_LE.put(b, 0, 0xaabbcc);
 473 |             assert.deepEqual(b, Uint8Array.from([0xcc, 0xbb, 0xaa]));
 474 |           });
 475 | 
 476 |           it('should encode unsigned 24-bit big-endian integer (UINT24_BE)', () => {
 477 | 
 478 |             const b = new Uint8Array(3);
 479 | 
 480 |             Token.UINT24_BE.put(b, 0, 0x00);
 481 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00]));
 482 | 
 483 |             Token.UINT24_BE.put(b, 0, 0xff);
 484 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0xff]));
 485 | 
 486 |             Token.UINT24_BE.put(b, 0, 0xaabbcc);
 487 |             assert.deepEqual(b, Uint8Array.from([0xaa, 0xbb, 0xcc]));
 488 |           });
 489 | 
 490 |           it('should decode signed 24-bit big/little-endian integer (UINT24_LE/INT24_BE)', async () => {
 491 | 
 492 |             const rst = await getTokenizerWithData('\x1a\x1a\x00\x1a\x1a\x00\x1a\x1a\x00\x1a\x1a\x00', tokenizerType);
 493 | 
 494 |             let value: number = await rst.readToken(Token.UINT24_LE);
 495 |             assert.strictEqual(typeof value, 'number');
 496 |             assert.strictEqual(value, 0x001a1a, 'INT24_LE#1');
 497 |             value = await rst.readToken(Token.UINT24_BE);
 498 |             assert.strictEqual(typeof value, 'number');
 499 |             assert.strictEqual(value, 0x1a1a00, 'INT24_BE#2');
 500 |             value = await rst.readToken(Token.UINT24_LE);
 501 |             assert.strictEqual(typeof value, 'number');
 502 |             assert.strictEqual(value, 0x001a1a, 'INT24_LE#3');
 503 |             value = await rst.readToken(Token.UINT24_BE);
 504 |             assert.strictEqual(typeof value, 'number');
 505 |             assert.strictEqual(value, 0x1a1a00, 'INT24_BE#4');
 506 | 
 507 |             await rst.close();
 508 |           });
 509 | 
 510 |           it('should encode unsigned 32-bit little-endian integer (UINT32_LE)', () => {
 511 | 
 512 |             const b = new Uint8Array(4);
 513 | 
 514 |             Token.UINT32_LE.put(b, 0, 0x00);
 515 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0x00]));
 516 | 
 517 |             Token.UINT32_LE.put(b, 0, 0xff);
 518 |             assert.deepEqual(b, Uint8Array.from([0xff, 0x00, 0x00, 0x00]));
 519 | 
 520 |             Token.UINT32_LE.put(b, 0, 0xaabbccdd);
 521 |             assert.deepEqual(b, Uint8Array.from([0xdd, 0xcc, 0xbb, 0xaa]));
 522 |           });
 523 | 
 524 |           it('should encode unsigned 32-bit big-endian integer (INT32_BE)', () => {
 525 | 
 526 |             const b = new Uint8Array(4);
 527 | 
 528 |             Token.UINT32_BE.put(b, 0, 0x00);
 529 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0x00]));
 530 | 
 531 |             Token.UINT32_BE.put(b, 0, 0xff);
 532 |             assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0xff]));
 533 | 
 534 |             Token.UINT32_BE.put(b, 0, 0xaabbccdd);
 535 |             assert.deepEqual(b, Uint8Array.from([0xaa, 0xbb, 0xcc, 0xdd]));
 536 |           });
 537 | 
 538 |           it('should decode unsigned 32-bit little/big-endian integer (UINT32_LE/UINT32_BE)', async () => {
 539 | 
 540 |             const rst = await getTokenizerWithData('\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00', tokenizerType);
 541 | 
 542 |             let value: number = await rst.readToken(Token.UINT32_LE);
 543 |             assert.strictEqual(typeof value, 'number');
 544 |             assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
 545 |             value = await rst.readToken(Token.UINT32_BE);
 546 |             assert.strictEqual(typeof value, 'number');
 547 |             assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
 548 |             value = await rst.readToken(Token.UINT32_LE);
 549 |             assert.strictEqual(typeof value, 'number');
 550 |             assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
 551 |             value = await rst.readToken(Token.UINT32_BE);
 552 |             assert.strictEqual(typeof value, 'number');
 553 |             assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
 554 | 
 555 |             await rst.close();
 556 |           });
 557 | 
 558 |         });
 559 | 
 560 |         it('Transparency', async function() {
 561 | 
 562 |           this.timeout(5000);
 563 | 
 564 |           const size = 10 * 1024;
 565 |           const buf = new Uint8Array(size);
 566 | 
 567 |           for (let i = 0; i < size; ++i) {
 568 |             buf[i] = i % 255;
 569 |           }
 570 | 
 571 |           const testFile = 'test2.dat';
 572 |           const pathTestFile = Path.join(__dirname, 'resources', testFile);
 573 |           await fs.writeFile(pathTestFile, buf);
 574 | 
 575 |           const rst = await tokenizerType.loadTokenizer(testFile);
 576 |           let expected = 0;
 577 | 
 578 |           try {
 579 |             let v: number;
 580 |             do {
 581 |               v = await rst.readNumber(Token.UINT8);
 582 |               assert.strictEqual(v, expected % 255, `offset=${expected}`);
 583 |               ++expected;
 584 |             } while (v > 0);
 585 |           } catch (err) {
 586 |             assert.instanceOf(err, EndOfStreamError);
 587 |             assert.strictEqual(expected, size, 'total number of parsed bytes');
 588 |           }
 589 | 
 590 |           await rst.close();
 591 |         });
 592 | 
 593 |         it('Handle peek token', async () => {
 594 | 
 595 |           async function peekOnData(tokenizer: ITokenizer): Promise<void> {
 596 |             assert.strictEqual(tokenizer.position, 0);
 597 | 
 598 |             let value = await tokenizer.peekToken<number>(Token.UINT32_LE);
 599 |             assert.strictEqual(typeof value, 'number');
 600 |             assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
 601 |             assert.strictEqual(tokenizer.position, 0);
 602 | 
 603 |             value = await tokenizer.peekToken(Token.UINT32_LE);
 604 |             assert.strictEqual(typeof value, 'number');
 605 |             assert.strictEqual(value, 0x001a001a, 'UINT24_LE sequential peek #2');
 606 |             assert.strictEqual(tokenizer.position, 0);
 607 |             value = await tokenizer.readToken(Token.UINT32_LE);
 608 | 
 609 |             assert.strictEqual(typeof value, 'number');
 610 |             assert.strictEqual(value, 0x001a001a, 'UINT24_LE #3');
 611 |             assert.strictEqual(tokenizer.position, 4);
 612 |             value = await tokenizer.readToken(Token.UINT32_BE);
 613 |             assert.strictEqual(typeof value, 'number');
 614 |             assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
 615 |             assert.strictEqual(tokenizer.position, 8);
 616 |             value = await tokenizer.readToken(Token.UINT32_LE);
 617 | 
 618 |             assert.strictEqual(typeof value, 'number');
 619 |             assert.strictEqual(value, 0x001a001a, 'UINT32_LE #5');
 620 |             assert.strictEqual(tokenizer.position, 12);
 621 |             value = await tokenizer.readToken(Token.UINT32_BE);
 622 | 
 623 |             assert.strictEqual(typeof value, 'number');
 624 |             assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #6');
 625 |             assert.strictEqual(tokenizer.position, 16);
 626 | 
 627 |           }
 628 | 
 629 |           const rst = await tokenizerType.loadTokenizer('test1.dat');
 630 | 
 631 |           if (rst.supportsRandomAccess()) {
 632 |             assert.strictEqual(rst.fileInfo.size, 16, 'check file size property');
 633 |           }
 634 |           await peekOnData(rst);
 635 |           await rst.close();
 636 |         });
 637 | 
 638 |         it('Overlapping peeks', async () => {
 639 | 
 640 |           const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05', tokenizerType);
 641 |           const peekBuffer = new Uint8Array(3);
 642 |           const readBuffer = new Uint8Array(1);
 643 | 
 644 |           assert.strictEqual(0, rst.position);
 645 |           let len = await rst.peekBuffer(peekBuffer, {length: 3}); // Peek #1
 646 |           assert.strictEqual(3, len);
 647 |           assert.deepEqual(peekBuffer, stringToUint8Array('\x01\x02\x03'), 'Peek #1');
 648 |           assert.strictEqual(rst.position, 0);
 649 |           len = await rst.readBuffer(readBuffer, {length: 1}); // Read #1
 650 |           assert.strictEqual(len, 1);
 651 |           assert.strictEqual(rst.position, 1);
 652 |           assert.deepEqual(readBuffer, stringToUint8Array('\x01'), 'Read #1');
 653 |           len = await rst.peekBuffer(peekBuffer, {length: 3}); // Peek #2
 654 |           assert.strictEqual(len, 3);
 655 |           assert.strictEqual(rst.position, 1);
 656 |           assert.deepEqual(peekBuffer, stringToUint8Array('\x02\x03\x04'), 'Peek #2');
 657 |           len = await rst.readBuffer(readBuffer, {length: 1}); // Read #2
 658 |           assert.strictEqual(len, 1);
 659 |           assert.strictEqual(rst.position, 2);
 660 |           assert.deepEqual(readBuffer, stringToUint8Array('\x02'), 'Read #2');
 661 |           len = await rst.peekBuffer(peekBuffer, {length: 3}); // Peek #3
 662 |           assert.strictEqual(len, 3);
 663 |           assert.strictEqual(rst.position, 2);
 664 |           assert.deepEqual(peekBuffer, stringToUint8Array('\x03\x04\x05'), 'Peek #3');
 665 |           len = await rst.readBuffer(readBuffer, {length: 1}); // Read #3
 666 |           assert.strictEqual(len, 1);
 667 |           assert.strictEqual(rst.position, 3);
 668 |           assert.deepEqual(readBuffer, stringToUint8Array('\x03'), 'Read #3');
 669 |           len = await rst.peekBuffer(peekBuffer, {length: 2}); // Peek #4
 670 |           assert.strictEqual(len, 2, '3 bytes requested to peek, only 2 bytes left');
 671 |           assert.strictEqual(rst.position, 3);
 672 |           assert.deepEqual(peekBuffer, stringToUint8Array('\x04\x05\x05'), 'Peek #4');
 673 |           len = await rst.readBuffer(readBuffer, {length: 1}); // Read #4
 674 |           assert.strictEqual(len, 1);
 675 |           assert.strictEqual(rst.position, 4);
 676 |           assert.deepEqual(readBuffer, stringToUint8Array('\x04'), 'Read #4');
 677 | 
 678 |           await rst.close();
 679 |         });
 680 | 
 681 |         it('should be able to read at position ahead', async () => {
 682 | 
 683 |           const rst = await getTokenizerWithData('\x05peter', tokenizerType);
 684 |           // should decode string from chunk
 685 |           assert.strictEqual(rst.position, 0);
 686 |           const value = await rst.readToken(new Token.StringType(5, 'utf-8'), 1);
 687 |           assert.strictEqual(typeof value, 'string');
 688 |           assert.strictEqual(value, 'peter');
 689 |           assert.strictEqual(rst.position, 6);
 690 |           // should should reject at the end of the stream
 691 |           try {
 692 |             await rst.readToken(Token.UINT8);
 693 |             assert.fail('Should reject due to end-of-stream');
 694 |           } catch (err) {
 695 |             assert.instanceOf(err, EndOfStreamError);
 696 |           } finally {
 697 |             await rst.close();
 698 |           }
 699 |         });
 700 | 
 701 |         it('should be able to peek at position ahead', async () => {
 702 | 
 703 |           const rst = await getTokenizerWithData('\x05peter', tokenizerType);
 704 |           // should decode string from chunk
 705 |           assert.strictEqual(rst.position, 0);
 706 |           const value = await rst.peekToken(new Token.StringType(5, 'latin1'), 1);
 707 |           assert.strictEqual(typeof value, 'string');
 708 |           assert.strictEqual(value, 'peter');
 709 |           assert.strictEqual(rst.position, 0);
 710 | 
 711 |           await rst.close();
 712 |         });
 713 | 
 714 |         it('number', async () => {
 715 |           const tokenizer = await tokenizerType.loadTokenizer('test3.dat');
 716 |           assert.isDefined(tokenizer.fileInfo, 'tokenizer.fileInfo');
 717 |           // @ts-ignore
 718 |           await tokenizer.ignore(1);
 719 |           const x = await tokenizer.peekNumber(Token.INT32_BE);
 720 |           assert.strictEqual(x, 33752069);
 721 | 
 722 |           await tokenizer.close();
 723 |         });
 724 | 
 725 |         it('should throw an Error if we reach EOF while peeking a number', async () => {
 726 |           const tokenizer = await tokenizerType.loadTokenizer('test3.dat');
 727 |           if (tokenizerType.hasFileInfo) {
 728 |             assert.isDefined(tokenizer.fileInfo, 'tokenizer.fileInfo');
 729 |           }
 730 |           // @ts-ignore
 731 |           await tokenizer.ignore(2);
 732 |           try {
 733 |             await tokenizer.peekNumber(Token.INT32_BE);
 734 |             assert.fail('Should throw Error: End-Of-File');
 735 |           } catch (err) {
 736 |             assert.instanceOf(err, EndOfStreamError);
 737 |           }
 738 |           await tokenizer.close();
 739 |         });
 740 | 
 741 |         it('should be able to handle multiple ignores', async () => {
 742 |           const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
 743 |           let value = await tokenizer.readToken(Token.UINT32_LE);
 744 |           assert.strictEqual(typeof value, 'number');
 745 |           assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
 746 |           await tokenizer.ignore(Token.UINT32_BE.len);
 747 |           await tokenizer.ignore(Token.UINT32_LE.len);
 748 |           value = await tokenizer.readToken(Token.UINT32_BE);
 749 |           assert.strictEqual(typeof value, 'number');
 750 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
 751 |           await tokenizer.close();
 752 |         });
 753 | 
 754 |         it('should be able to ignore (skip)', async () => {
 755 | 
 756 |           const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
 757 |           assert.strictEqual(tokenizer.position, 0);
 758 |           await tokenizer.ignore(4);
 759 |           assert.strictEqual(tokenizer.position, 4);
 760 |           let value = await tokenizer.readToken(Token.UINT32_BE);
 761 |           assert.strictEqual(typeof value, 'number');
 762 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
 763 |           value = await tokenizer.readToken(Token.UINT32_LE);
 764 |           assert.strictEqual(typeof value, 'number');
 765 |           assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
 766 |           value = await tokenizer.readToken(Token.UINT32_BE);
 767 |           assert.strictEqual(typeof value, 'number');
 768 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
 769 |           await tokenizer.close();
 770 |         });
 771 | 
 772 |         describe('End-Of-File exception behaviour', () => {
 773 | 
 774 |           it('should not throw an Error if we read exactly until the end of the file', async () => {
 775 | 
 776 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 777 |             const num = await rst.readToken(Token.UINT24_BE);
 778 |             assert.strictEqual(num, 9000000);
 779 |             await rst.close();
 780 |           });
 781 | 
 782 |           it('readBuffer()', async () => {
 783 | 
 784 |             const testFile = 'test1.dat';
 785 | 
 786 |             const stat = await fs.stat(getResourcePath(testFile));
 787 |             const tokenizer = await tokenizerType.loadTokenizer(testFile);
 788 |             const buf = new Uint8Array(stat.size);
 789 |             const bytesRead = await tokenizer.readBuffer(buf);
 790 |             assert.ok(typeof bytesRead === 'number', 'readBuffer promise should provide a number');
 791 |             assert.strictEqual(stat.size, bytesRead);
 792 |             try {
 793 |               await tokenizer.readBuffer(buf);
 794 |               assert.fail('Should throw EOF');
 795 |             } catch (err) {
 796 |               assert.instanceOf(err, EndOfStreamError);
 797 |             } finally {
 798 |               await tokenizer.close();
 799 |             }
 800 |           });
 801 | 
 802 |           it('should handle zero byte read', async () => {
 803 | 
 804 |             const rst = await getTokenizerWithData('\x00\x00\x00', tokenizerType);
 805 |             const uint8Array = await rst.readToken(new Token.Uint8ArrayType(0));
 806 |             assert.strictEqual(uint8Array.length, 0);
 807 |             await rst.close();
 808 |           });
 809 | 
 810 |           it('should not throw an Error if we read exactly until the end of the file', async () => {
 811 | 
 812 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 813 |             const num = await rst.readToken(Token.UINT24_BE);
 814 |             assert.strictEqual(num, 9000000);
 815 |             await rst.close();
 816 |           });
 817 | 
 818 |           it('should be thrown if a token EOF reached in the middle of a token', async () => {
 819 | 
 820 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 821 |             try {
 822 |               await rst.readToken(Token.INT32_BE);
 823 |               assert.fail('It should throw EndOfFile Error');
 824 |             } catch (err) {
 825 |               assert.instanceOf(err, EndOfStreamError);
 826 |             } finally {
 827 |               await rst.close();
 828 |             }
 829 |           });
 830 | 
 831 |           it('should throw an EOF if we read to buffer', async () => {
 832 |             const buffer = new Uint8Array(4);
 833 | 
 834 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 835 |             try {
 836 |               await rst.readBuffer(buffer);
 837 |               assert.fail('It should throw EndOfFile Error');
 838 |             } catch (err) {
 839 |               assert.instanceOf(err, EndOfStreamError);
 840 |             } finally {
 841 |               await rst.close();
 842 |             }
 843 |           });
 844 | 
 845 |           it('should throw an EOF if we peek to buffer', async () => {
 846 | 
 847 |             const buffer = new Uint8Array(4);
 848 |             const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
 849 |             try {
 850 |               await rst.peekBuffer(buffer);
 851 |               assert.fail('It should throw EndOfFile Error');
 852 |             } catch (err) {
 853 |               assert.instanceOf(err, EndOfStreamError);
 854 |             } finally {
 855 |               await rst.close();
 856 |             }
 857 |           });
 858 | 
 859 |         });
 860 | 
 861 |         it('should be able to read from a file', async () => {
 862 | 
 863 |           const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
 864 |           if (tokenizerType.hasFileInfo) {
 865 |             assert.strictEqual(tokenizer.fileInfo.size, 16, 'check file size property');
 866 |           }
 867 |           let value = await tokenizer.readToken(Token.UINT32_LE);
 868 |           assert.strictEqual(typeof value, 'number');
 869 |           assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
 870 |           value = await tokenizer.readToken(Token.UINT32_BE);
 871 |           assert.strictEqual(typeof value, 'number');
 872 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
 873 |           value = await tokenizer.readToken(Token.UINT32_LE);
 874 |           assert.strictEqual(typeof value, 'number');
 875 |           assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
 876 |           value = await tokenizer.readToken(Token.UINT32_BE);
 877 |           assert.strictEqual(typeof value, 'number');
 878 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
 879 |           await tokenizer.close();
 880 |         });
 881 | 
 882 |         it('should be able to parse the IgnoreType-token', async () => {
 883 |           const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
 884 |           await tokenizer.readToken(new Token.IgnoreType(4));
 885 |           let value = await tokenizer.readToken(Token.UINT32_BE);
 886 |           assert.strictEqual(typeof value, 'number');
 887 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
 888 |           value = await tokenizer.readToken(Token.UINT32_LE);
 889 |           assert.strictEqual(typeof value, 'number');
 890 |           assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
 891 |           value = await tokenizer.readToken(Token.UINT32_BE);
 892 |           assert.strictEqual(typeof value, 'number');
 893 |           assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
 894 |           await tokenizer.close();
 895 |         });
 896 | 
 897 |         it('should be able to read 0 bytes from a file', async () => {
 898 |           const bufZero = new Uint8Array(0);
 899 |           const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
 900 |           try {
 901 |             await tokenizer.readBuffer(bufZero);
 902 |           } finally {
 903 |             await tokenizer.close();
 904 |           }
 905 |         });
 906 | 
 907 |         if (tokenizerType.abortable) {
 908 | 
 909 |           describe('Abort delayed read', () => {
 910 | 
 911 |             it('without aborting', async () => {
 912 |               const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500);
 913 |               try {
 914 |                 const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
 915 |                 assert.strictEqual(await promise, '123');
 916 |               } finally {
 917 |                 await fileReadStream.close();
 918 |               }
 919 |             });
 920 | 
 921 |             it('abort async operation using `abort()`', async function() {
 922 |               if (process.versions.bun) {
 923 |                 this.skip(); // Fails with Bun 1.2
 924 |               }
 925 |               const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500);
 926 |               try {
 927 |                 const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
 928 |                 await fileReadStream.abort();
 929 |                 await expect(promise).to.be.rejectedWith(Error);
 930 |               } finally {
 931 |                 await fileReadStream.close();
 932 |               }
 933 |             });
 934 | 
 935 |             it('abort async operation using `close()`', async function() {
 936 |               if (process.versions.bun) {
 937 |                 this.skip(); // Fails with Bun 1.2
 938 |               }
 939 |               const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500);
 940 |               const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
 941 |               await fileReadStream.close();
 942 |               await expect(promise).to.be.rejectedWith(Error);
 943 |             });
 944 | 
 945 |             it('abort async operation using `AbortController`', async function() {
 946 | 
 947 |               if (process.versions.bun) {
 948 |                 this.skip(); // Fails with Bun 1.2
 949 |               }
 950 | 
 951 |               const abortController = new AbortController();
 952 |               const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500, abortController.signal);
 953 |               try {
 954 |                 const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
 955 |                 abortController.abort();
 956 |                 await expect(promise).to.be.rejectedWith(Error);
 957 |               } finally {
 958 |                 await fileReadStream.close();
 959 |               }
 960 |             });
 961 | 
 962 |           });
 963 |         }
 964 | 
 965 |       }); // End of test "Tokenizer-types"
 966 |     });
 967 | 
 968 |   describe('Random-read-access', async () => {
 969 | 
 970 |     tokenizerTests
 971 |       .filter(tokenizerType => tokenizerType.randomRead)
 972 |       .forEach(tokenizerType => {
 973 |         describe(tokenizerType.name, () => {
 974 | 
 975 |           it('Read ID3v1 header at the end of the file', async () => {
 976 |             const tokenizer = await tokenizerType.loadTokenizer('id3v1.mp3') as IRandomAccessTokenizer;
 977 |             try {
 978 |               assert.isTrue(tokenizer.supportsRandomAccess(), 'Tokenizer should support random reads');
 979 |               const id3HeaderSize = 128;
 980 |               const id3Header = new Uint8Array(id3HeaderSize);
 981 |               await tokenizer.readBuffer(id3Header, {position: tokenizer.fileInfo.size - id3HeaderSize});
 982 |               const id3Tag = new TextDecoder('utf-8').decode(id3Header.subarray(0, 3));
 983 |               assert.strictEqual(id3Tag, 'TAG');
 984 |               assert.strictEqual(tokenizer.position, tokenizer.fileInfo.size, 'Tokenizer position should be at the end of the file');
 985 |               tokenizer.setPosition(0);
 986 |               assert.strictEqual(tokenizer.position, 0, 'Tokenizer position should be at the beginning of the file');
 987 |             } finally {
 988 |               await tokenizer.close();
 989 |             }
 990 |           });
 991 | 
 992 |           it('Be able to random read from position 0', async () => {
 993 |             const tokenizer = await fromFile(getResourcePath('id3v1.mp3'));
 994 |             try {
 995 |               // Advance tokenizer.position
 996 |               await tokenizer.ignore(20);
 997 |               const mpegSync = new Uint8Array(2);
 998 |               await tokenizer.readBuffer(mpegSync, {position: 0});
 999 |               assert.strictEqual(mpegSync[0], 255, 'First sync byte');
1000 |               assert.strictEqual(mpegSync[1], 251, 'Second sync byte');
1001 |             } finally {
1002 |               await tokenizer.close();
1003 |             }
1004 | 
1005 |           });
1006 |         });
1007 |       });
1008 | 
1009 |   });
1010 | });
1011 | 
1012 | describe('fromStream with mayBeLess flag', () => {
1013 | 
1014 |   it('mayBeLess=true', async () => {
1015 |     // Initialize empty stream
1016 |     const stream = new PassThrough();
1017 |     const tokenizer = await fromStream(stream);
1018 |     try {
1019 |       stream.end();
1020 | 
1021 |       // Try to read 5 bytes from empty stream, with mayBeLess flag enabled
1022 |       const buffer = new Uint8Array(5);
1023 |       const bytesRead = await tokenizer.peekBuffer(buffer, {mayBeLess: true});
1024 |       assert.strictEqual(bytesRead, 0);
1025 |     } finally {
1026 |       await tokenizer.close();
1027 |     }
1028 |   });
1029 | 
1030 |   it('mayBeLess=false', async () => {
1031 |     // Initialize empty stream
1032 |     const stream = new PassThrough();
1033 |     const tokenizer = await fromStream(stream);
1034 |     try {
1035 |       stream.end();
1036 | 
1037 |       // Try to read 5 bytes from empty stream, with mayBeLess flag enabled
1038 |       const buffer = new Uint8Array(5);
1039 |       await tokenizer.peekBuffer(buffer, {mayBeLess: false});
1040 |     } catch (err) {
1041 |       if (err instanceof Error) {
1042 |         assert.strictEqual(err.message, 'End-Of-Stream');
1043 |       } else {
1044 |         assert.fail('Expected: err instanceof Error');
1045 |       }
1046 |       return;
1047 |     } finally {
1048 |       if (tokenizer) {
1049 |         await tokenizer.close();
1050 |       }
1051 |     }
1052 |     assert.fail('Should throw End-Of-Stream error');
1053 |   });
1054 | 
1055 | });
1056 | 
1057 | it('should determine the file size using a file stream', async () => {
1058 |   const stream = createReadStream(Path.join(__dirname, 'resources', 'test1.dat'));
1059 |   const tokenizer = await fromStream(stream);
1060 |   try {
1061 |     assert.isDefined(tokenizer.fileInfo, '`fileInfo` should be defined');
1062 |     assert.strictEqual(tokenizer.fileInfo.size, 16, 'fileInfo.size');
1063 |   } finally {
1064 |     await tokenizer.close();
1065 |   }
1066 | });
1067 | 
1068 | it('should release stream after close', async () => {
1069 | 
1070 |   const fileStream = makeByteReadableStreamFromFile(Path.join(__dirname, 'resources', 'test1.dat'), 0);
1071 |   assert.isFalse(fileStream.locked, 'stream is unlocked before initializing tokenizer');
1072 |   const webStreamTokenizer = fromWebStream(fileStream);
1073 |   assert.isTrue(fileStream.locked, 'stream is locked after initializing tokenizer');
1074 |   await webStreamTokenizer.close();
1075 |   assert.isFalse(fileStream.locked, 'stream is unlocked after closing tokenizer');
1076 | });
1077 | 


--------------------------------------------------------------------------------
/test/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../tsconfig.json"
3 | }
4 | 


--------------------------------------------------------------------------------
/test/util.ts:
--------------------------------------------------------------------------------
 1 | import { createReadStream } from  'node:fs';
 2 | import { Transform, Readable } from 'node:stream';
 3 | import { makeByteReadableStreamFromNodeReadable } from 'node-readable-to-web-readable-stream';
 4 | 
 5 | export function makeByteReadableStreamFromFile(filename: string, delay = 0): ReadableStream<Uint8Array> {
 6 | 
 7 |   // Create a Node.js Readable stream
 8 |   const nodeReadable = createReadStream(filename);
 9 | 
10 |   // Create a Transform stream to introduce delay
11 |   const delayTransform = new Transform({
12 |     transform(chunk, encoding, callback) {
13 |       setTimeout(() => callback(null, chunk), delay);
14 |     }
15 |   });
16 | 
17 |   // Pipe through the delay transform
18 |   const delayedNodeStream = nodeReadable.pipe(delayTransform);
19 | 
20 |   return makeByteReadableStreamFromNodeReadable(delayedNodeStream);
21 | }
22 | 
23 | export class DelayedStream extends Readable {
24 | 
25 |   private buffer: (Uint8Array | null)[];
26 |   private isReading: boolean;
27 |   private path: string | undefined;
28 | 
29 |   constructor(private sourceStream: Readable, private delay = 0) {
30 |     super();
31 |     this.path = (sourceStream as unknown as {path: string}).path;
32 |     this.buffer = [];
33 |     this.isReading = false;
34 | 
35 |     this.sourceStream.on('data', (chunk) => {
36 |       this.buffer.push(chunk);
37 |       this.emitDelayed();
38 |     });
39 | 
40 |     this.sourceStream.on('end', () => {
41 |       this.buffer.push(null); // Signal the end of the stream
42 |       this.emitDelayed();
43 |     });
44 |   }
45 | 
46 |   _read() {
47 |     if (!this.isReading && this.buffer.length > 0) {
48 |       this.emitDelayed();
49 |     }
50 |   }
51 | 
52 |   emitDelayed() {
53 |     if (this.isReading) return;
54 | 
55 |     if (this.buffer.length > 0) {
56 |       this.isReading = true;
57 |       const chunk = this.buffer.shift();
58 | 
59 |       setTimeout(() => {
60 |         this.push(chunk);
61 |         this.isReading = false;
62 | 
63 |         if (this.buffer.length > 0) {
64 |           this.emitDelayed();
65 |         }
66 |       }, this.delay);
67 |     }
68 |   }
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "inlineSources": false,
 4 |     "module": "node16",
 5 |     "moduleResolution": "node16",
 6 |     "target": "ES2020",
 7 |     "esModuleInterop": true,
 8 |     "strict": true,
 9 |     "verbatimModuleSyntax": true
10 |   }
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------