├── .c8rc.json
├── .github
├── dependabot.yml
├── release-drafter.yml
└── workflows
│ ├── ci.yml
│ ├── codeql.yml
│ ├── post-dependabot-prs.yml
│ ├── release-drafter.yml
│ └── update-license.yml
├── .gitignore
├── .mocharc.json
├── .yarnrc.yml
├── LICENSE.txt
├── README.md
├── biome.jsonc
├── lib
├── AbstractTokenizer.ts
├── BufferTokenizer.ts
├── FileTokenizer.ts
├── ReadStreamTokenizer.ts
├── core.ts
├── index.ts
├── tsconfig.json
└── types.ts
├── package.json
├── test
├── resources
│ ├── id3v1.mp3
│ ├── test1.dat
│ ├── test2.dat
│ └── test3.dat
├── test.ts
├── tsconfig.json
└── util.ts
├── tsconfig.json
└── yarn.lock
/.c8rc.json:
--------------------------------------------------------------------------------
1 | {
2 | "reporter": [
3 | "lcov",
4 | "text"
5 | ],
6 | "include": ["lib/**"],
7 | "exclude": [".yarn/**"]
8 | }
9 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |
4 | # ECMAScript Module (ESM)
5 | - package-ecosystem: npm
6 | directory: "/"
7 | schedule:
8 | interval: weekly
9 | time: "06:00"
10 | open-pull-requests-limit: 30
11 | versioning-strategy: increase
12 | target-branch: "master"
13 | labels:
14 | - dependencies
15 | groups:
16 | remark:
17 | dependency-type: "development"
18 | patterns:
19 | - "remark*"
20 | types:
21 | dependency-type: "development"
22 | patterns:
23 | - "@types/*"
24 |
--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | # Release Drafter template
2 | # Ref: https://github.com/marketplace/actions/release-drafter
3 |
4 | name-template: 'v$RESOLVED_VERSION'
5 | tag-template: 'v$RESOLVED_VERSION'
6 | categories:
7 | - title: 💥 API Changes
8 | labels:
9 | - API change
10 | - title: 🚀 Enhancements
11 | labels:
12 | - enhancement
13 | - title: 🎨 Improvements
14 | labels:
15 | - improvement
16 | - title: 🐛 Bug Fixes
17 | labels:
18 | - bug
19 | - title: 🔧 Under the hood
20 | labels:
21 | - debt
22 | - title: ⬆️ Dependencies
23 | labels:
24 | - dependencies
25 | - title: 📝 Documentation
26 | labels:
27 | - documentation
28 | exclude-labels:
29 | - 'DevOps'
30 | - dev-dependencies
31 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
32 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
33 | version-resolver:
34 | major:
35 | labels:
36 | - 'major'
37 | minor:
38 | labels:
39 | - 'minor'
40 | patch:
41 | labels:
42 | - 'patch'
43 | default: patch
44 | template: |
45 | ## Changes
46 |
47 | $CHANGES
48 |
49 | ## 📦 NPM release
50 | NPM release: [$REPOSITORY@$RESOLVED_VERSION](https://www.npmjs.com/package/$REPOSITORY/v/$RESOLVED_VERSION)
51 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 | on:
3 | pull_request:
4 | branches: [ "master" ]
5 | push:
6 |
7 | jobs:
8 |
9 | build:
10 | name: "Build module"
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 |
15 | - uses: actions/checkout@v4
16 |
17 | - uses: actions/setup-node@v4
18 | with:
19 | node-version: 20.x
20 |
21 | - name: Enable Corepack
22 | run: corepack enable
23 |
24 | - name: Install dependencies
25 | run: yarn install
26 |
27 | - name: Lint TypeScript
28 | run: yarn run lint-ts
29 |
30 | - name: Lint Markdown
31 | run: yarn run lint-md
32 |
33 | - name: Build
34 | run: yarn run build
35 |
36 | - name: Upload build
37 | uses: actions/upload-artifact@v4
38 | with:
39 | name: build
40 | path: |
41 | lib/**/*.js
42 | lib/**/*.js.map
43 | lib/**/*.d.ts
44 | test/**/*.js
45 | test/**/*.js.map
46 |
47 | test-nodejs:
48 | name: "Test with Node.js (V8)"
49 | runs-on: ubuntu-latest
50 | needs: build
51 |
52 | env:
53 | YARN_IGNORE_NODE: 1
54 |
55 | strategy:
56 | matrix:
57 | node-version: [18.x, 20.x, 22.x]
58 |
59 | steps:
60 |
61 | - name: 'Checkout the repository'
62 | uses: actions/checkout@v4
63 |
64 | - name: Setup Node.js ${{ matrix.node-version }}
65 | uses: actions/setup-node@v4
66 | with:
67 | node-version: ${{ matrix.node-version }}
68 |
69 | - name: Enable Corepack
70 | run: corepack enable
71 |
72 | - name: Install dependencies
73 | run: yarn install
74 |
75 | - name: Download build
76 | uses: actions/download-artifact@v4
77 | with:
78 | name: build
79 |
80 | - name: Test with Node.js ${{ matrix.node-version }}
81 | run: yarn run test-coverage
82 |
83 | - name: Coveralls Parallel
84 | uses: coverallsapp/github-action@v2
85 | with:
86 | github-token: ${{ secrets.github_token }}
87 | flag-name: run-node-${{ matrix.test_number }}
88 | parallel: true
89 |
90 | test-bun:
91 | name: "Test with Bun (JavaScriptCore)"
92 | runs-on: ubuntu-latest
93 | needs: build
94 |
95 | strategy:
96 | matrix:
97 | bun-version: [1.2]
98 |
99 | env:
100 | YARN_IGNORE_NODE: 1
101 |
102 | steps:
103 |
104 | - name: 'Checkout the repository'
105 | uses: actions/checkout@v4
106 |
107 | - name: Setup Bun ${{ matrix.bun-version }}
108 | uses: oven-sh/setup-bun@v2
109 | with:
110 | bun-version: ${{ matrix.bun-version }}
111 |
112 | - name: Enable Corepack
113 | run: corepack enable
114 |
115 | - name: Install dependencies
116 | run: yarn install
117 |
118 | - name: Download build
119 | uses: actions/download-artifact@v4
120 | with:
121 | name: build
122 |
123 | - name: Unit tests with Bun ${{ matrix.bun-version }}
124 | run: bun run bun:test
125 |
126 | finish:
127 | needs:
128 | - test-nodejs
129 | - test-bun
130 | runs-on: ubuntu-latest
131 | steps:
132 | - name: Coveralls Finished
133 | uses: coverallsapp/github-action@v2
134 | with:
135 | github-token: ${{ secrets.github_token }}
136 | parallel-finished: true
137 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 | schedule:
9 | - cron: "32 12 * * 6"
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ javascript ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v4
28 |
29 | - name: Initialize CodeQL
30 | uses: github/codeql-action/init@v3
31 | with:
32 | languages: ${{ matrix.language }}
33 | queries: +security-and-quality
34 |
35 | - name: Autobuild
36 | uses: github/codeql-action/autobuild@v3
37 |
38 | - name: Perform CodeQL Analysis
39 | uses: github/codeql-action/analyze@v3
40 | with:
41 | category: "/language:${{ matrix.language }}"
42 |
--------------------------------------------------------------------------------
/.github/workflows/post-dependabot-prs.yml:
--------------------------------------------------------------------------------
1 | name: Dependabot Pull Request
2 | on: pull_request_target
3 | jobs:
4 | build:
5 | runs-on: ubuntu-latest
6 | if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' }}
7 | steps:
8 | - name: Fetch Dependabot metadata
9 | id: dependabot-metadata
10 | uses: dependabot/fetch-metadata@v2
11 | with:
12 | github-token: "${{ secrets.GITHUB_TOKEN }}"
13 | - name: Add dev-dependencies label
14 | uses: actions-ecosystem/action-add-labels@v1
15 | if: ${{ steps.dependabot-metadata.outputs.dependency-type == 'direct:development' }}
16 | with:
17 | labels: dev-dependencies
18 | - name: Remove dependencies label
19 | uses: actions-ecosystem/action-remove-labels@v1
20 | if: ${{ steps.dependabot-metadata.outputs.dependency-type == 'direct:development' }}
21 | with:
22 | labels: dependencies
23 |
--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
1 | name: Release Drafter
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | types: [opened, reopened, synchronize]
9 |
10 | permissions:
11 | contents: read
12 |
13 | jobs:
14 | update_release_draft:
15 | permissions:
16 | contents: write
17 | pull-requests: write
18 | runs-on: ubuntu-latest
19 | steps:
20 | - uses: release-drafter/release-drafter@v6
21 | env:
22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 |
--------------------------------------------------------------------------------
/.github/workflows/update-license.yml:
--------------------------------------------------------------------------------
1 | name: Update License Year
2 |
3 | on:
4 | schedule:
5 | - cron: "0 0 1 1 *" # Runs on January 1st every year
6 | workflow_dispatch: # Allows manual triggering
7 |
8 | jobs:
9 | update-license:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout repository
14 | uses: actions/checkout@v3
15 | with:
16 | token: ${{ secrets.GITHUB_TOKEN }}
17 |
18 | - name: Update LICENSE year
19 | run: |
20 | CURRENT_YEAR=$(date +"%Y")
21 | sed -E -i "s/(Copyright © )[0-9]{4}/\1$CURRENT_YEAR/" LICENSE.txt
22 |
23 | - name: Commit and push changes
24 | run: |
25 | CURRENT_YEAR=$(date +"%Y")
26 | git config --global user.name "Borewit"
27 | git config --global user.email "Borewit@users.noreply.github.com"
28 | git diff --quiet LICENSE.txt || (git add LICENSE.txt && git commit -m "Update license year to $CURRENT_YEAR" && git push)
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Node module
2 | node_modules
3 |
4 | #IntelliJ IDEA:
5 | .idea
6 | *.iml
7 |
8 | # Yarn:
9 | .pnp.*
10 | .yarn/*
11 |
12 | # Project
13 | lib/**/*.js
14 | lib/**/*.js.map
15 | lib/**/*.d.ts
16 | test/**/*.js
17 | test/**/*.js.map
18 | test/**/*.d.ts
19 | test/resources/tmp.dat
20 | coverage
21 |
--------------------------------------------------------------------------------
/.mocharc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extension": ["ts", "tsx"],
3 | "watch-files": ["lib/**/*.ts", "test/**/*.ts"],
4 | "spec": ["test/*.ts"],
5 | "loader": ["ts-node/esm"],
6 | "extensions": ["ts", "tsx"]
7 | }
8 |
--------------------------------------------------------------------------------
/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | nodeLinker: node-modules
2 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright © 2025 Borewit
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/Borewit/strtok3/actions/workflows/ci.yml)
2 | [](https://github.com/Borewit/strtok3/actions/workflows/codeql.yml)
3 | [](https://npmjs.org/package/strtok3)
4 | [](https://npmcharts.com/compare/strtok3,token-types?start=1200&interval=30)
5 | [](https://deepscan.io/dashboard#view=project&tid=5165&pid=8526&bid=103329)
6 | [](https://snyk.io/test/github/Borewit/strtok3?targetFile=package.json)
7 | [](https://www.codacy.com/app/Borewit/strtok3?utm_source=github.com&utm_medium=referral&utm_content=Borewit/strtok3&utm_campaign=Badge_Grade)
8 | # strtok3
9 |
10 | A promise based streaming [*tokenizer*](#tokenizer-object) for [Node.js](http://nodejs.org) and browsers.
11 |
12 | The `strtok3` module provides several methods for creating a [*tokenizer*](#tokenizer-object) from various input sources.
13 | Designed for:
14 | * Seamless support in streaming environments.
15 | * Efficiently decode binary data, strings, and numbers.
16 | * Reading [predefined](https://github.com/Borewit/token-types) or custom tokens.
17 | * Offering [*tokenizers*](#tokenizer-object) for reading from [files](#method-strtok3fromfile), [streams](#fromstream-function) or [Uint8Arrays](#frombuffer-function).
18 |
19 | ### Features
20 | `strtok3` can read from:
21 | * Files, using a file path as input.
22 | * Node.js [streams](https://nodejs.org/api/stream.html).
23 | * [Buffer](https://nodejs.org/api/buffer.html) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array).
24 | * HTTP chunked transfer provided by [@tokenizer/http](https://github.com/Borewit/tokenizer-http).
25 | * [Amazon S3](https://aws.amazon.com/s3) chunks with [@tokenizer/s3](https://github.com/Borewit/tokenizer-s3).
26 |
27 | ## Installation
28 |
29 | ```sh
30 | npm install strtok3
31 | ```
32 |
33 | ### Compatibility
34 |
35 | Starting with version 7, the module has migrated from [CommonJS](https://en.wikipedia.org/wiki/CommonJS) to [pure ECMAScript Module (ESM)](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c).
36 | The distributed JavaScript codebase is compliant with the [ECMAScript 2020 (11th Edition)](https://en.wikipedia.org/wiki/ECMAScript_version_history#11th_Edition_%E2%80%93_ECMAScript_2020) standard.
37 |
38 | Requires a modern browser, Node.js (V8) ≥ 18 engine or Bun (JavaScriptCore) ≥ 1.2.
39 |
40 | For TypeScript CommonJs backward compatibility, you can use [load-esm](https://github.com/Borewit/load-esm).
41 |
42 | > [!NOTE]
43 | > This module requires a [Node.js ≥ 16](https://nodejs.org/en/about/previous-releases) engine.
44 | > It can also be used in a browser environment when bundled with a module bundler.
45 |
46 | ## Support the Project
47 | If you find this project useful and would like to support its development, consider sponsoring or contributing:
48 |
49 | - [Become a sponsor to Borewit](https://github.com/sponsors/Borewit)
50 |
51 | - Buy me a coffee:
52 |
53 |
54 |
55 | ## API Documentation
56 |
57 | ### strtok3 methods
58 |
59 | Use one of the methods to instantiate an [*abstract tokenizer*](#tokenizer-object):
60 | - [fromFile](#fromfile-function)*
61 | - [fromStream](#fromstream-function)*
62 | - [fromWebStream](#fromwebstream-function)
63 | - [fromBuffer](#frombuffer-function)
64 |
65 | > **_NOTE:_** * `fromFile` and `fromStream` only available when importing this module with Node.js
66 |
67 | All methods return a [`Tokenizer`](#tokenizer-object), either directly or via a promise.
68 |
69 | #### `fromFile` function
70 |
71 | Creates a [*tokenizer*](#tokenizer-object) from a local file.
72 |
73 | ```ts
74 | function fromFile(sourceFilePath: string): Promise
75 | ```
76 |
77 | | Parameter | Type | Description |
78 | |----------------|----------|----------------------------|
79 | | sourceFilePath | `string` | Path to file to read from |
80 |
81 | > [!NOTE]
82 | > - Only available for Node.js engines
83 | > - `fromFile` automatically embeds [file-information](#file-information)
84 |
85 | Returns, via a promise, a [*tokenizer*](#tokenizer-object) which can be used to parse a file.
86 |
87 | ```js
88 | import * as strtok3 from 'strtok3';
89 | import * as Token from 'token-types';
90 |
91 | (async () => {
92 |
93 | const tokenizer = await strtok3.fromFile("somefile.bin");
94 | try {
95 | const myNumber = await tokenizer.readToken(Token.UINT8);
96 | console.log(`My number: ${myNumber}`);
97 | } finally {
98 | tokenizer.close(); // Close the file
99 | }
100 | })();
101 | ```
102 |
103 | #### `fromStream` function
104 |
105 | Creates a [*tokenizer*](#tokenizer-object) from a Node.js [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
106 |
107 | ```ts
108 | function fromStream(stream: Readable, options?: ITokenizerOptions): Promise
109 | ```
110 |
111 | | Parameter | Optional | Type | Description |
112 | |-----------|-----------|-------------------------|--------------------------|
113 | | stream | no | [Readable](https://nodejs.org/api/stream.html#stream_class_stream_readable) | Stream to read from |
114 | | fileInfo | yes | [IFileInfo](#IFileInfo) | Provide file information |
115 |
116 | Returns a Promise providing a [*tokenizer*](#tokenizer-object).
117 |
118 | > [!NOTE]
119 | > - Only available for Node.js engines
120 |
121 | #### `fromWebStream` function
122 |
123 | Creates [*tokenizer*](#tokenizer-object) from a [WHATWG ReadableStream](https://nodejs.org/api/webstreams.html#web-streams-api).
124 |
125 | ```ts
126 | function fromWebStream(webStream: AnyWebByteStream, options?: ITokenizerOptions): ReadStreamTokenizer
127 | ```
128 |
129 | | Parameter | Optional | Type | Description |
130 | |----------------|-----------|--------------------------------------------------------------------------|------------------------------------|
131 | | readableStream | no | [ReadableStream](https://nodejs.org/api/webstreams.html#web-streams-api) | WHATWG ReadableStream to read from |
132 | | fileInfo | yes | [IFileInfo](#IFileInfo) | Provide file information |
133 |
134 | Returns a Promise providing a [*tokenizer*](#tokenizer-object)
135 |
136 | ```js
137 | import strtok3 from 'strtok3';
138 | import * as Token from 'token-types';
139 |
140 | strtok3.fromWebStream(readableStream).then(tokenizer => {
141 | return tokenizer.readToken(Token.UINT8).then(myUint8Number => {
142 | console.log(`My number: ${myUint8Number}`);
143 | });
144 | });
145 | ```
146 |
147 | #### `fromBuffer()` function
148 |
149 | Create a tokenizer from memory ([Uint8Array](https://nodejs.org/api/buffer.html)).
150 |
151 | ```ts
152 | function fromBuffer(uint8Array: Uint8Array, options?: ITokenizerOptions): BufferTokenizer
153 | ```
154 |
155 | | Parameter | Optional | Type | Description |
156 | |------------|----------|--------------------------------------------------|----------------------------------------|
157 | | uint8Array | no | [Uint8Array](https://nodejs.org/api/buffer.html) | Uint8Array or Buffer to read from |
158 | | fileInfo | yes | [IFileInfo](#IFileInfo) | Provide file information |
159 |
160 | Returns a Promise providing a [*tokenizer*](#tokenizer-object).
161 |
162 | ```js
163 | import * as strtok3 from 'strtok3';
164 |
165 | const tokenizer = strtok3.fromBuffer(buffer);
166 |
167 | tokenizer.readToken(Token.UINT8).then(myUint8Number => {
168 | console.log(`My number: ${myUint8Number}`);
169 | });
170 | ```
171 |
172 | ### `Tokenizer` object
173 | The *tokenizer* is an abstraction of a [stream](https://nodejs.org/api/stream.html), file or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array), allowing _reading_ or _peeking_ from the stream.
174 | It can also be translated in chunked reads, as done in [@tokenizer/http](https://github.com/Borewit/tokenizer-http);
175 |
176 | #### Key Features:
177 |
178 | - Supports seeking within the stream using `tokenizer.ignore()`.
179 | - Offers `peek` methods to preview data without advancing the read pointer.
180 | - Maintains the read position via tokenizer.position.
181 |
182 | #### Tokenizer functions
183 |
184 | _Read_ methods advance the stream pointer, while _peek_ methods do not.
185 |
186 | There are two kind of functions:
187 | 1. *read* methods: used to read a *token* of [Buffer](https://nodejs.org/api/buffer.html) from the [*tokenizer*](#tokenizer-object). The position of the *tokenizer-stream* will advance with the size of the token.
188 | 2. *peek* methods: same as the read, but it will *not* advance the pointer. It allows to read (peek) ahead.
189 |
190 | #### `readBuffer` function
191 |
192 | Read data from the _tokenizer_ into provided "buffer" (`Uint8Array`).
193 | `readBuffer(buffer, options?)`
194 |
195 | ```ts
196 | readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise;
197 | ```
198 |
199 | | Parameter | Type | Description |
200 | |------------|----------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
201 | | buffer | [Buffer](https://nodejs.org/api/buffer.html) | Uint8Array | Target buffer to write the data read to |
202 | | options | [IReadChunkOptions](#ireadchunkoptions) | An integer specifying the number of bytes to read |
203 |
204 | Return promise with number of bytes read.
205 | The number of bytes read maybe if less, *mayBeLess* flag was set.
206 |
207 | #### `peekBuffer` function
208 |
209 | Peek (read ahead), from [*tokenizer*](#tokenizer-object), into the buffer without advancing the stream pointer.
210 |
211 | ```ts
212 | peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise;
213 | ```
214 |
215 | | Parameter | Type | Description |
216 | |------------|-----------------------------------------|-----------------------------------------------------|
217 | | buffer | Buffer | Uint8Array | Target buffer to write the data read (peeked) to. |
218 | | options | [IReadChunkOptions](#ireadchunkoptions) | An integer specifying the number of bytes to read. | |
219 |
220 | Return value `Promise` Promise with number of bytes read. The number of bytes read maybe if less, *mayBeLess* flag was set.
221 |
222 | #### `readToken` function
223 |
224 | Read a *token* from the tokenizer-stream.
225 |
226 | ```ts
227 | readToken(token: IGetToken, position: number = this.position): Promise
228 | ```
229 |
230 | | Parameter | Type | Description |
231 | |------------|-------------------------|---------------------------------------------------------------------------------------------------------------------- |
232 | | token | [IGetToken](#IGetToken) | Token to read from the tokenizer-stream. |
233 | | position? | number | Offset where to begin reading within the file. If position is null, data will be read from the current file position. |
234 |
235 | Return value `Promise`. Promise with number of bytes read. The number of bytes read maybe if less, *mayBeLess* flag was set.
236 |
237 | #### `peek` function
238 |
239 | Peek a *token* from the [*tokenizer*](#tokenizer-object).
240 |
241 | ```ts
242 | peekToken(token: IGetToken, position: number = this.position): Promise
243 | ```
244 |
245 | | Parameter | Type | Description |
246 | |------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------|
247 | | token | [IGetToken](#IGetToken) | Token to read from the tokenizer-stream. |
248 | | position? | number | Offset where to begin reading within the file. If position is null, data will be read from the current file position. |
249 |
250 | Return a promise with the token value peeked from the [*tokenizer*](#tokenizer-object).
251 |
252 | #### `readNumber` function
253 |
254 | Peek a numeric [*token*](#token) from the [*tokenizer*](#tokenizer-object).
255 |
256 | ```ts
257 | readNumber(token: IToken): Promise
258 | ```
259 |
260 | | Parameter | Type | Description |
261 | |------------|---------------------------------|----------------------------------------------------|
262 | | token | [IGetToken](#IGetToken) | Numeric token to read from the tokenizer-stream. |
263 |
264 | Returns a promise with the decoded numeric value from the *tokenizer-stream*.
265 |
266 | #### `ignore` function
267 |
268 | Advance the offset pointer with the token number of bytes provided.
269 |
270 | ```ts
271 | ignore(length: number): Promise
272 | ```
273 |
274 | | Parameter | Type | Description |
275 | |------------|--------|----------------------------------------------------------------------|
276 | | ignore | number | Numeric of bytes to ignore. Will advance the `tokenizer.position` |
277 |
278 | Returns a promise with the decoded numeric value from the *tokenizer-stream*.
279 |
280 | #### `close` function
281 | Clean up resources, such as closing a file pointer if applicable.
282 |
283 | #### `Tokenizer` attributes
284 |
285 | - `fileInfo`
286 |
287 | Optional attribute describing the file information, see [IFileInfo](#IFileInfo)
288 |
289 | - `position`
290 |
291 | Pointer to the current position in the [*tokenizer*](#tokenizer-object) stream.
292 | If a *position* is provided to a _read_ or _peek_ method, is should be, at least, equal or greater than this value.
293 |
294 | ### `IReadChunkOptions` interface
295 |
296 | Each attribute is optional:
297 |
298 | | Attribute | Type | Description |
299 | |-----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
300 | | length | number | Requested number of bytes to read. |
301 | | position | number | Position where to peek from the file. If position is null, data will be read from the [current file position](#attribute-tokenizerposition). Position may not be less then [tokenizer.position](#attribute-tokenizerposition) |
302 | | mayBeLess | boolean | If and only if set, will not throw an EOF error if less then the requested *mayBeLess* could be read. |
303 |
304 | Example usage:
305 | ```js
306 | tokenizer.peekBuffer(buffer, {mayBeLess: true});
307 | ```
308 |
309 | ### `IFileInfo` interface
310 |
311 | Provides optional metadata about the file being tokenized.
312 |
313 | | Attribute | Type | Description |
314 | |-----------|---------|---------------------------------------------------------------------------------------------------|
315 | | size | number | File size in bytes |
316 | | mimeType | number | [MIME-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) of file. |
317 | | path | number | File path |
318 | | url | boolean | File URL |
319 |
320 | ### `Token` object
321 |
322 | The *token* is basically a description what to read form the [*tokenizer-stream*](#tokenizer-object).
323 | A basic set of *token types* can be found here: [*token-types*](https://github.com/Borewit/token-types).
324 |
325 | A token is something which implements the following interface:
326 | ```ts
327 | export interface IGetToken {
328 |
329 | /**
330 | * Length in bytes of encoded value
331 | */
332 | len: number;
333 |
334 | /**
335 | * Decode value from buffer at offset
336 | * @param buf Buffer to read the decoded value from
337 | * @param off Decode offset
338 | */
339 | get(buf: Uint8Array, off: number): T;
340 | }
341 | ```
342 | The *tokenizer* reads `token.len` bytes from the *tokenizer-stream* into a Buffer.
343 | The `token.get` will be called with the Buffer. `token.get` is responsible for conversion from the buffer to the desired output type.
344 |
345 | ### Working with Web-API readable stream
346 | To convert a [Web-API readable stream](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStreamDefaultReader) into a [Node.js readable stream]((https://nodejs.org/api/stream.html#stream_readable_streams)), you can use [readable-web-to-node-stream](https://github.com/Borewit/readable-web-to-node-stream) to convert one in another.
347 |
348 | ```js
349 | import { fromWebStream } strtok3 from 'strtok3';
350 | import { ReadableWebToNodeStream } from 'readable-web-to-node-stream';
351 |
352 | (async () => {
353 |
354 | const response = await fetch(url);
355 | const readableWebStream = response.body; // Web-API readable stream
356 | const webStream = new ReadableWebToNodeStream(readableWebStream); // convert to Node.js readable stream
357 |
358 | const tokenizer = fromWebStream(webStream); // And we now have tokenizer in a web environment
359 | })();
360 | ```
361 |
362 | ## Dependencies
363 |
364 | The diagram below illustrates the primary dependencies of `strtok3`:
365 |
366 | ```mermaid
367 | graph TD;
368 | S(strtok3)-->P(peek-readable)
369 | S(strtok3)-->TO("@tokenizer/token")
370 | ```
371 |
372 | - [peek-readable](https://github.com/Borewit/peek-readable): Manages reading operations with peeking capabilities, allowing data to be previewed without advancing the read pointer.
373 | - [@tokenizer/token](https://github.com/Borewit/tokenizer-token): Provides token definitions and utilities used by `strtok3` for interpreting binary data.
374 |
375 | ## Licence
376 |
377 | This project is licensed under the [MIT License](LICENSE.txt). Feel free to use, modify, and distribute as needed.
378 |
--------------------------------------------------------------------------------
/biome.jsonc:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://biomejs.dev/schemas/1.9.3/schema.json",
3 | "organizeImports": {
4 | "enabled": false
5 | },
6 | "formatter": {
7 | "enabled": false
8 | },
9 | "linter": {
10 | "enabled": true,
11 | "rules": {
12 | "correctness": {
13 | "noUnusedImports": "error",
14 | "noNodejsModules": "error"
15 | },
16 | "recommended": true,
17 | "complexity": {
18 | "noForEach": "off"
19 | },
20 | "suspicious": {
21 | "noEmptyBlockStatements": "error",
22 | "useErrorMessage": "error"
23 | },
24 | "nursery":{
25 | "noRestrictedImports": {
26 | "level": "error",
27 | "options": {
28 | "paths": {
29 | "node:buffer": "Use Uint8Array instead of Buffer"
30 | }
31 | }
32 | }},
33 | "style":{
34 | "useConsistentBuiltinInstantiation": "error",
35 | "useThrowNewError": "error",
36 | "useThrowOnlyError": "error"
37 | }
38 | }
39 | },
40 | "files": {
41 | "ignoreUnknown": true,
42 | "ignore": [
43 | "./coverage",
44 | "./yarn",
45 | "./.pnp.*",
46 | "./lib/**/*.d.ts",
47 | "./lib/**/*.js",
48 | "./test/**/*.d.ts",
49 | "./test/**/*.js"
50 | ]
51 | },
52 | "overrides": [
53 | {
54 | "include": ["./test/**/*", "./lib/index.ts", "./lib/FileTokenizer.ts"],
55 | "linter": {
56 | "rules": {
57 | "correctness": {
58 | "noNodejsModules": "off"
59 | }
60 | }
61 | }
62 | }
63 | ]
64 | }
65 |
--------------------------------------------------------------------------------
/lib/AbstractTokenizer.ts:
--------------------------------------------------------------------------------
1 | import type { ITokenizer, IFileInfo, IReadChunkOptions, OnClose, ITokenizerOptions } from './types.js';
2 | import type { IGetToken, IToken } from '@tokenizer/token';
3 | import { EndOfStreamError } from 'peek-readable';
4 |
5 | interface INormalizedReadChunkOptions extends IReadChunkOptions {
6 | length: number;
7 | position: number;
8 | mayBeLess?: boolean;
9 | }
10 |
11 | /**
12 | * Core tokenizer
13 | */
14 | export abstract class AbstractTokenizer implements ITokenizer {
15 |
16 | private onClose?: OnClose;
17 | private numBuffer = new Uint8Array(8);
18 |
19 | public abstract fileInfo: IFileInfo;
20 |
21 | /**
22 | * Tokenizer-stream position
23 | */
24 | public position = 0;
25 |
26 |
27 | /**
28 | * Constructor
29 | * @param options Tokenizer options
30 | * @protected
31 | */
32 | protected constructor(options?: ITokenizerOptions) {
33 | this.onClose = options?.onClose;
34 | if (options?.abortSignal) {
35 | options.abortSignal.addEventListener('abort', () => {
36 | this.abort();
37 | })
38 | }
39 | }
40 |
41 | abstract supportsRandomAccess(): boolean;
42 |
43 | /**
44 | * Read buffer from tokenizer
45 | * @param buffer - Target buffer to fill with data read from the tokenizer-stream
46 | * @param options - Additional read options
47 | * @returns Promise with number of bytes read
48 | */
49 | public abstract readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise;
50 |
51 | /**
52 | * Peek (read ahead) buffer from tokenizer
53 | * @param uint8Array - Target buffer to fill with data peeked from the tokenizer-stream
54 | * @param options - Peek behaviour options
55 | * @returns Promise with number of bytes read
56 | */
57 | public abstract peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise;
58 |
59 | /**
60 | * Read a token from the tokenizer-stream
61 | * @param token - The token to read
62 | * @param position - If provided, the desired position in the tokenizer-stream
63 | * @returns Promise with token data
64 | */
65 | public async readToken(token: IGetToken, position: number = this.position): Promise {
66 | const uint8Array = new Uint8Array(token.len);
67 | const len = await this.readBuffer(uint8Array, {position});
68 | if (len < token.len)
69 | throw new EndOfStreamError();
70 | return token.get(uint8Array, 0);
71 | }
72 |
73 | /**
74 | * Peek a token from the tokenizer-stream.
75 | * @param token - Token to peek from the tokenizer-stream.
76 | * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
77 | * @returns Promise with token data
78 | */
79 | public async peekToken(token: IGetToken, position: number = this.position): Promise {
80 | const uint8Array = new Uint8Array(token.len);
81 | const len = await this.peekBuffer(uint8Array, {position});
82 | if (len < token.len)
83 | throw new EndOfStreamError();
84 | return token.get(uint8Array, 0);
85 | }
86 |
87 | /**
88 | * Read a numeric token from the stream
89 | * @param token - Numeric token
90 | * @returns Promise with number
91 | */
92 | public async readNumber(token: IToken): Promise {
93 | const len = await this.readBuffer(this.numBuffer, {length: token.len});
94 | if (len < token.len)
95 | throw new EndOfStreamError();
96 | return token.get(this.numBuffer, 0);
97 | }
98 |
99 | /**
100 | * Read a numeric token from the stream
101 | * @param token - Numeric token
102 | * @returns Promise with number
103 | */
104 | public async peekNumber(token: IToken): Promise {
105 | const len = await this.peekBuffer(this.numBuffer, {length: token.len});
106 | if (len < token.len)
107 | throw new EndOfStreamError();
108 | return token.get(this.numBuffer, 0);
109 | }
110 |
111 | /**
112 | * Ignore number of bytes, advances the pointer in under tokenizer-stream.
113 | * @param length - Number of bytes to ignore
114 | * @return resolves the number of bytes ignored, equals length if this available, otherwise the number of bytes available
115 | */
116 | public async ignore(length: number): Promise {
117 | if (this.fileInfo.size !== undefined) {
118 | const bytesLeft = this.fileInfo.size - this.position;
119 | if (length > bytesLeft) {
120 | this.position += bytesLeft;
121 | return bytesLeft;
122 | }
123 | }
124 | this.position += length;
125 | return length;
126 | }
127 |
128 | public async close(): Promise {
129 | await this.abort();
130 | await this.onClose?.();
131 | }
132 |
133 | protected normalizeOptions(uint8Array: Uint8Array, options?: IReadChunkOptions): INormalizedReadChunkOptions {
134 |
135 | if (!this.supportsRandomAccess() && options && options.position !== undefined && options.position < this.position) {
136 | throw new Error('`options.position` must be equal or greater than `tokenizer.position`');
137 | }
138 |
139 | return {
140 | ...{
141 | mayBeLess: false,
142 | offset: 0,
143 | length: uint8Array.length,
144 | position: this.position
145 | }, ...options
146 | };
147 | }
148 |
149 | public abort(): Promise {
150 | return Promise.resolve(); // Ignore abort signal
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/lib/BufferTokenizer.ts:
--------------------------------------------------------------------------------
1 | import type {ITokenizerOptions, IReadChunkOptions, IRandomAccessFileInfo, IRandomAccessTokenizer} from './types.js';
2 | import { EndOfStreamError } from 'peek-readable';
3 | import { AbstractTokenizer } from './AbstractTokenizer.js';
4 |
5 | export class BufferTokenizer extends AbstractTokenizer implements IRandomAccessTokenizer {
6 |
7 | public fileInfo: IRandomAccessFileInfo;
8 |
9 | /**
10 | * Construct BufferTokenizer
11 | * @param uint8Array - Uint8Array to tokenize
12 | * @param options Tokenizer options
13 | */
14 | constructor(private uint8Array: Uint8Array, options?: ITokenizerOptions) {
15 | super(options);
16 | this.fileInfo = {...options?.fileInfo ?? {}, ...{size: uint8Array.length}};
17 | }
18 |
19 | /**
20 | * Read buffer from tokenizer
21 | * @param uint8Array - Uint8Array to tokenize
22 | * @param options - Read behaviour options
23 | * @returns {Promise}
24 | */
25 | public async readBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise {
26 |
27 | if (options?.position) {
28 | this.position = options.position;
29 | }
30 |
31 | const bytesRead = await this.peekBuffer(uint8Array, options);
32 | this.position += bytesRead;
33 | return bytesRead;
34 | }
35 |
36 | /**
37 | * Peek (read ahead) buffer from tokenizer
38 | * @param uint8Array
39 | * @param options - Read behaviour options
40 | * @returns {Promise}
41 | */
42 | public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise {
43 |
44 | const normOptions = this.normalizeOptions(uint8Array, options);
45 |
46 | const bytes2read = Math.min(this.uint8Array.length - normOptions.position, normOptions.length);
47 | if ((!normOptions.mayBeLess) && bytes2read < normOptions.length) {
48 | throw new EndOfStreamError();
49 | }
50 | uint8Array.set(this.uint8Array.subarray(normOptions.position, normOptions.position + bytes2read));
51 | return bytes2read;
52 | }
53 |
54 | public close(): Promise {
55 | return super.close();
56 | }
57 |
58 | supportsRandomAccess(): boolean {
59 | return true;
60 | }
61 |
62 | setPosition(position: number): void {
63 | this.position = position;
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/lib/FileTokenizer.ts:
--------------------------------------------------------------------------------
1 | import { AbstractTokenizer } from './AbstractTokenizer.js';
2 | import { EndOfStreamError } from 'peek-readable';
3 | import type {IRandomAccessTokenizer, IRandomAccessFileInfo, IReadChunkOptions, ITokenizerOptions} from './types.js';
4 | import { type FileHandle, open as fsOpen } from 'node:fs/promises';
5 |
6 | interface IFileTokenizerOptions extends ITokenizerOptions {
7 | /**
8 | * Pass additional file information to the tokenizer
9 | */
10 | fileInfo: IRandomAccessFileInfo;
11 | }
12 |
13 | export class FileTokenizer extends AbstractTokenizer implements IRandomAccessTokenizer {
14 |
15 | public fileInfo: IRandomAccessFileInfo;
16 |
17 | /**
18 | * Create tokenizer from provided file path
19 | * @param sourceFilePath File path
20 | */
21 | static async fromFile(sourceFilePath: string): Promise {
22 | const fileHandle = await fsOpen(sourceFilePath, 'r');
23 | const stat = await fileHandle.stat();
24 | return new FileTokenizer(fileHandle, {fileInfo: {path: sourceFilePath, size: stat.size}});
25 | }
26 |
27 | protected constructor(private fileHandle: FileHandle, options: IFileTokenizerOptions) {
28 | super(options);
29 | this.fileInfo = options.fileInfo;
30 | }
31 |
32 | /**
33 | * Read buffer from file
34 | * @param uint8Array - Uint8Array to write result to
35 | * @param options - Read behaviour options
36 | * @returns Promise number of bytes read
37 | */
38 | public async readBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise {
39 | const normOptions = this.normalizeOptions(uint8Array, options);
40 | this.position = normOptions.position;
41 | if (normOptions.length === 0) return 0;
42 | const res = await this.fileHandle.read(uint8Array, 0, normOptions.length, normOptions.position);
43 | this.position += res.bytesRead;
44 | if (res.bytesRead < normOptions.length && (!options || !options.mayBeLess)) {
45 | throw new EndOfStreamError();
46 | }
47 | return res.bytesRead;
48 | }
49 |
50 | /**
51 | * Peek buffer from file
52 | * @param uint8Array - Uint8Array (or Buffer) to write data to
53 | * @param options - Read behaviour options
54 | * @returns Promise number of bytes read
55 | */
56 | public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise {
57 |
58 | const normOptions = this.normalizeOptions(uint8Array, options);
59 |
60 | const res = await this.fileHandle.read(uint8Array, 0, normOptions.length, normOptions.position);
61 | if ((!normOptions.mayBeLess) && res.bytesRead < normOptions.length) {
62 | throw new EndOfStreamError();
63 | }
64 | return res.bytesRead;
65 | }
66 |
67 | public async close(): Promise {
68 | await this.fileHandle.close();
69 | return super.close();
70 | }
71 |
72 | setPosition(position: number): void {
73 | this.position = position;
74 | }
75 |
76 | supportsRandomAccess(): boolean {
77 | return true;
78 | }
79 | }
80 |
81 |
82 |
--------------------------------------------------------------------------------
/lib/ReadStreamTokenizer.ts:
--------------------------------------------------------------------------------
1 | import { AbstractTokenizer } from './AbstractTokenizer.js';
2 | import { EndOfStreamError, type IStreamReader } from 'peek-readable';
3 | import type {IFileInfo, IReadChunkOptions, ITokenizerOptions} from './types.js';
4 |
5 | const maxBufferSize = 256000;
6 |
7 | export class ReadStreamTokenizer extends AbstractTokenizer {
8 |
9 | public fileInfo: IFileInfo;
10 |
11 | /**
12 | * Constructor
13 | * @param streamReader stream-reader to read from
14 | * @param options Tokenizer options
15 | */
16 | public constructor(private streamReader: IStreamReader, options?: ITokenizerOptions) {
17 | super(options);
18 | this.fileInfo = options?.fileInfo ?? {};
19 | }
20 |
21 | /**
22 | * Read buffer from tokenizer
23 | * @param uint8Array - Target Uint8Array to fill with data read from the tokenizer-stream
24 | * @param options - Read behaviour options
25 | * @returns Promise with number of bytes read
26 | */
27 | public async readBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise {
28 | const normOptions = this.normalizeOptions(uint8Array, options);
29 | const skipBytes = normOptions.position - this.position;
30 | if (skipBytes > 0) {
31 | await this.ignore(skipBytes);
32 | return this.readBuffer(uint8Array, options);
33 | }
34 | if (skipBytes < 0) {
35 | throw new Error('`options.position` must be equal or greater than `tokenizer.position`');
36 | }
37 | if (normOptions.length === 0) {
38 | return 0;
39 | }
40 | const bytesRead = await this.streamReader.read(uint8Array.subarray(0, normOptions.length), normOptions.mayBeLess);
41 | this.position += bytesRead;
42 | if ((!options || !options.mayBeLess) && bytesRead < normOptions.length) {
43 | throw new EndOfStreamError();
44 | }
45 | return bytesRead;
46 | }
47 |
48 | /**
49 | * Peek (read ahead) buffer from tokenizer
50 | * @param uint8Array - Uint8Array (or Buffer) to write data to
51 | * @param options - Read behaviour options
52 | * @returns Promise with number of bytes peeked
53 | */
54 | public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise {
55 |
56 | const normOptions = this.normalizeOptions(uint8Array, options);
57 | let bytesRead = 0;
58 |
59 | if (normOptions.position) {
60 | const skipBytes = normOptions.position - this.position;
61 | if (skipBytes > 0) {
62 | const skipBuffer = new Uint8Array(normOptions.length + skipBytes);
63 | bytesRead = await this.peekBuffer(skipBuffer, {mayBeLess: normOptions.mayBeLess});
64 | uint8Array.set(skipBuffer.subarray(skipBytes));
65 | return bytesRead - skipBytes;
66 | }
67 | if (skipBytes < 0) {
68 | throw new Error('Cannot peek from a negative offset in a stream');
69 | }
70 | }
71 |
72 | if (normOptions.length > 0) {
73 | try {
74 | bytesRead = await this.streamReader.peek(uint8Array.subarray(0, normOptions.length), normOptions.mayBeLess);
75 | } catch (err) {
76 | if (options?.mayBeLess && err instanceof EndOfStreamError) {
77 | return 0;
78 | }
79 | throw err;
80 | }
81 | if ((!normOptions.mayBeLess) && bytesRead < normOptions.length) {
82 | throw new EndOfStreamError();
83 | }
84 | }
85 |
86 | return bytesRead;
87 | }
88 |
89 | public async ignore(length: number): Promise {
90 | // debug(`ignore ${this.position}...${this.position + length - 1}`);
91 | const bufSize = Math.min(maxBufferSize, length);
92 | const buf = new Uint8Array(bufSize);
93 | let totBytesRead = 0;
94 | while (totBytesRead < length) {
95 | const remaining = length - totBytesRead;
96 | const bytesRead = await this.readBuffer(buf, {length: Math.min(bufSize, remaining)});
97 | if (bytesRead < 0) {
98 | return bytesRead;
99 | }
100 | totBytesRead += bytesRead;
101 | }
102 | return totBytesRead;
103 | }
104 |
105 | public abort(): Promise {
106 | return this.streamReader.abort();
107 | }
108 |
109 | public async close(): Promise {
110 | return this.streamReader.close();
111 | }
112 |
113 | supportsRandomAccess(): boolean {
114 | return false;
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/lib/core.ts:
--------------------------------------------------------------------------------
1 | import type { Readable } from 'node:stream';
2 | import { StreamReader, makeWebStreamReader, type AnyWebByteStream } from 'peek-readable';
3 |
4 | import { ReadStreamTokenizer } from './ReadStreamTokenizer.js';
5 | import { BufferTokenizer } from './BufferTokenizer.js';
6 | import type { ITokenizerOptions } from './types.js';
7 |
8 | export { EndOfStreamError, AbortError, type AnyWebByteStream } from 'peek-readable';
9 | export type { ITokenizer, IRandomAccessTokenizer, IFileInfo, IRandomAccessFileInfo, ITokenizerOptions, IReadChunkOptions, OnClose } from './types.js';
10 | export type { IToken, IGetToken } from '@tokenizer/token';
11 | export { AbstractTokenizer } from './AbstractTokenizer.js';
12 |
13 | /**
14 | * Construct ReadStreamTokenizer from given Stream.
15 | * Will set fileSize, if provided given Stream has set the .path property/
16 | * @param stream - Read from Node.js Stream.Readable
17 | * @param options - Tokenizer options
18 | * @returns ReadStreamTokenizer
19 | */
20 | export function fromStream(stream: Readable, options?: ITokenizerOptions): ReadStreamTokenizer {
21 | const streamReader= new StreamReader(stream);
22 | const _options: ITokenizerOptions = options ?? {};
23 | const chainedClose = _options.onClose;
24 | _options.onClose = async () => {
25 | await streamReader.close();
26 | if(chainedClose) {
27 | return chainedClose();
28 | }
29 | };
30 | return new ReadStreamTokenizer(streamReader, _options);
31 | }
32 |
33 | /**
34 | * Construct ReadStreamTokenizer from given ReadableStream (WebStream API).
35 | * Will set fileSize, if provided given Stream has set the .path property/
36 | * @param webStream - Read from Node.js Stream.Readable (must be a byte stream)
37 | * @param options - Tokenizer options
38 | * @returns ReadStreamTokenizer
39 | */
40 | export function fromWebStream(webStream: AnyWebByteStream, options?: ITokenizerOptions): ReadStreamTokenizer {
41 | const webStreamReader= makeWebStreamReader(webStream);
42 | const _options: ITokenizerOptions = options ?? {};
43 | const chainedClose = _options.onClose;
44 | _options.onClose = async () => {
45 | await webStreamReader.close();
46 | if(chainedClose) {
47 | return chainedClose();
48 | }
49 | };
50 | return new ReadStreamTokenizer(webStreamReader, _options);
51 | }
52 |
53 | /**
54 | * Construct ReadStreamTokenizer from given Buffer.
55 | * @param uint8Array - Uint8Array to tokenize
56 | * @param options - Tokenizer options
57 | * @returns BufferTokenizer
58 | */
59 | export function fromBuffer(uint8Array: Uint8Array, options?: ITokenizerOptions): BufferTokenizer {
60 | return new BufferTokenizer(uint8Array, options);
61 | }
62 |
--------------------------------------------------------------------------------
/lib/index.ts:
--------------------------------------------------------------------------------
1 | import type { Readable } from 'node:stream';
2 | import type { ReadStreamTokenizer } from './ReadStreamTokenizer.js';
3 | import { stat as fsStat } from 'node:fs/promises';
4 | import { type ITokenizerOptions, fromStream as coreFromStream } from './core.js';
5 | import {FileTokenizer} from "./FileTokenizer.js";
6 |
7 | export { FileTokenizer } from './FileTokenizer.js';
8 | export * from './core.js';
9 | export type { IToken, IGetToken } from '@tokenizer/token';
10 |
11 | interface StreamWithFile extends Readable {
12 | /**
13 | * Informal property set by `node:fs.createReadStream`
14 | */
15 | path?: string;
16 | }
17 |
18 | /**
19 | * Construct ReadStreamTokenizer from given Stream.
20 | * Will set fileSize, if provided given Stream has set the .path property.
21 | * @param stream - Node.js Stream.Readable
22 | * @param options - Pass additional file information to the tokenizer
23 | * @returns Tokenizer
24 | */
25 | export async function fromStream(stream: Readable, options?: ITokenizerOptions): Promise {
26 | const rst = coreFromStream(stream, options);
27 | if ((stream as StreamWithFile).path) {
28 | const stat = await fsStat((stream as StreamWithFile).path as string);
29 | rst.fileInfo.path = (stream as StreamWithFile).path;
30 | rst.fileInfo.size = stat.size;
31 | }
32 | return rst;
33 | }
34 |
35 | export const fromFile = FileTokenizer.fromFile;
36 |
--------------------------------------------------------------------------------
/lib/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "../tsconfig.json",
3 | "compilerOptions": {
4 | "declaration": true
5 | }
6 | }
7 |
8 |
--------------------------------------------------------------------------------
/lib/types.ts:
--------------------------------------------------------------------------------
1 | import type { IGetToken } from '@tokenizer/token';
2 |
3 | export interface IFileInfo {
4 | /**
5 | * File size in bytes
6 | */
7 | size?: number;
8 | /**
9 | * MIME-type of file
10 | */
11 | mimeType?: string;
12 |
13 | /**
14 | * File path
15 | */
16 | path?: string;
17 |
18 | /**
19 | * File URL
20 | */
21 | url?: string;
22 | }
23 |
24 | export interface IRandomAccessFileInfo extends IFileInfo {
25 | /**
26 | * File size in bytes
27 | */
28 | size: number;
29 | }
30 |
31 | export interface IReadChunkOptions {
32 |
33 | /**
34 | * Number of bytes to read.
35 | */
36 | length?: number;
37 |
38 | /**
39 | * Position where to begin reading from the file.
40 | * Default it is `tokenizer.position`.
41 | * Position may not be less than `tokenizer.position`, unless `supportsRandomAccess()` returns `true`.
42 | */
43 | position?: number;
44 |
45 | /**
46 | * If set, will not throw an EOF error if not all off the requested data could be read
47 | */
48 | mayBeLess?: boolean;
49 | }
50 |
51 | export interface IRandomAccessTokenizer extends ITokenizer {
52 |
53 | /**
54 | * Provide access to information of the underlying information stream or file.
55 | */
56 | fileInfo: IRandomAccessFileInfo;
57 |
58 | /**
59 | * Change the position (offset) of the tokenizer
60 | * @param position New position
61 | */
62 | setPosition(position: number): void;
63 | }
64 |
65 | /**
66 | * The tokenizer allows us to read or peek from the tokenizer-stream.
67 | * The tokenizer-stream is an abstraction of a stream, file or Buffer.
68 | */
69 | export interface ITokenizer {
70 |
71 | /**
72 | * Provide access to information of the underlying information stream or file.
73 | */
74 | readonly fileInfo: IFileInfo;
75 |
76 | /**
77 | * Offset in bytes (= number of bytes read) since beginning of file or stream
78 | */
79 | readonly position: number;
80 |
81 | /**
82 | * Peek (read ahead) buffer from tokenizer
83 | * @param buffer - Target buffer to fill with data peek from the tokenizer-stream
84 | * @param options - Read behaviour options
85 | * @returns Promise with number of bytes read
86 | */
87 | peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise;
88 |
89 | /**
90 | * Peek (read ahead) buffer from tokenizer
91 | * @param buffer - Target buffer to fill with data peeked from the tokenizer-stream
92 | * @param options - Additional read options
93 | * @returns Promise with number of bytes read
94 | */
95 | readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise;
96 |
97 | /**
98 | * Peek a token from the tokenizer-stream.
99 | * @param token - Token to peek from the tokenizer-stream.
100 | * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
101 | * @param maybeless - If set, will not throw an EOF error if the less then the requested length could be read.
102 | */
103 | peekToken(token: IGetToken, position?: number | null, maybeless?: boolean): Promise;
104 |
105 | /**
106 | * Read a token from the tokenizer-stream.
107 | * @param token - Token to peek from the tokenizer-stream.
108 | * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
109 | */
110 | readToken(token: IGetToken, position?: number): Promise;
111 |
112 | /**
113 | * Peek a numeric token from the stream
114 | * @param token - Numeric token
115 | * @returns Promise with number
116 | */
117 | peekNumber(token: IGetToken): Promise;
118 |
119 | /**
120 | * Read a numeric token from the stream
121 | * @param token - Numeric token
122 | * @returns Promise with number
123 | */
124 | readNumber(token: IGetToken): Promise;
125 |
126 | /**
127 | * Ignore given number of bytes
128 | * @param length - Number of bytes ignored
129 | */
130 | ignore(length: number): Promise;
131 |
132 | /**
133 | * Clean up resources.
134 | * It does not close the stream for StreamReader, but is does close the file-descriptor.
135 | */
136 | close(): Promise;
137 |
138 | /**
139 | * Abort pending asynchronous operations
140 | */
141 | abort(): Promise;
142 |
143 | /**
144 | * Returns true when the underlying file supports random access
145 | */
146 | supportsRandomAccess(): boolean;
147 | }
148 |
149 | export type OnClose = () => Promise;
150 |
151 | export interface ITokenizerOptions {
152 | /**
153 | * Pass additional file information to the tokenizer
154 | */
155 | fileInfo?: IFileInfo;
156 |
157 | /**
158 | * On tokenizer close handler
159 | */
160 | onClose?: OnClose;
161 |
162 | /**
163 | * Pass `AbortSignal` which can stop active async operations
164 | * Ref: https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal
165 | */
166 | abortSignal?: AbortSignal;
167 | }
168 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "strtok3",
3 | "version": "10.2.2",
4 | "description": "A promise based streaming tokenizer",
5 | "author": {
6 | "name": "Borewit",
7 | "url": "https://github.com/Borewit"
8 | },
9 | "funding": {
10 | "type": "github",
11 | "url": "https://github.com/sponsors/Borewit"
12 | },
13 | "scripts": {
14 | "clean": "del-cli 'lib/**/*.js' 'lib/**/*.js.map' 'lib/**/*.d.ts' 'test/**/*.js' 'test/**/*.js.map'",
15 | "compile-src": "tsc -p lib",
16 | "compile-test": "tsc -p test",
17 | "compile": "yarn run compile-src && yarn run compile-test",
18 | "build": "yarn run clean && yarn run compile",
19 | "eslint": "eslint lib test",
20 | "lint-md": "remark -u preset-lint-recommended .",
21 | "lint-ts": "biome check",
22 | "lint": "yarn run lint-md && yarn run lint-ts",
23 | "fix": "yarn run biome lint --write",
24 | "test": "mocha",
25 | "bun:test": "bun run --bun test",
26 | "test-coverage": "c8 yarn run test",
27 | "send-codacy": "c8 report --reporter=text-lcov | codacy-coverage",
28 | "start": "yarn run compile && yarn run lint && yarn run cover-test"
29 | },
30 | "engines": {
31 | "node": ">=18"
32 | },
33 | "repository": {
34 | "type": "git",
35 | "url": "https://github.com/Borewit/strtok3.git"
36 | },
37 | "license": "MIT",
38 | "type": "module",
39 | "exports": {
40 | ".": {
41 | "node": "./lib/index.js",
42 | "default": "./lib/core.js"
43 | },
44 | "./core": "./lib/core.js"
45 | },
46 | "types": "lib/index.d.ts",
47 | "files": [
48 | "lib/**/*.js",
49 | "lib/**/*.d.ts"
50 | ],
51 | "bugs": {
52 | "url": "https://github.com/Borewit/strtok3/issues"
53 | },
54 | "dependencies": {
55 | "@tokenizer/token": "^0.3.0",
56 | "peek-readable": "^7.0.0"
57 | },
58 | "devDependencies": {
59 | "@biomejs/biome": "^1.9.4",
60 | "@types/chai": "^5.2.2",
61 | "@types/chai-as-promised": "^8.0.2",
62 | "@types/debug": "^4.1.12",
63 | "@types/mocha": "^10.0.10",
64 | "@types/node": "^22.15.19",
65 | "c8": "^10.1.3",
66 | "chai": "^5.2.0",
67 | "chai-as-promised": "^8.0.1",
68 | "del-cli": "^6.0.0",
69 | "mocha": "^11.5.0",
70 | "node-readable-to-web-readable-stream": "^0.4.2",
71 | "remark-cli": "^12.0.1",
72 | "remark-preset-lint-recommended": "^7.0.1",
73 | "token-types": "^6.0.0",
74 | "ts-node": "^10.9.2",
75 | "typescript": "^5.8.3",
76 | "uint8array-extras": "^1.4.0"
77 | },
78 | "keywords": [
79 | "tokenizer",
80 | "reader",
81 | "token",
82 | "async",
83 | "promise",
84 | "parser",
85 | "decoder",
86 | "binary",
87 | "endian",
88 | "uint",
89 | "stream",
90 | "streaming"
91 | ],
92 | "packageManager": "yarn@4.9.1"
93 | }
94 |
--------------------------------------------------------------------------------
/test/resources/id3v1.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Borewit/strtok3/5e7c191bd1930140438dd48fd837515c449365a3/test/resources/id3v1.mp3
--------------------------------------------------------------------------------
/test/resources/test1.dat:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/resources/test2.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Borewit/strtok3/5e7c191bd1930140438dd48fd837515c449365a3/test/resources/test2.dat
--------------------------------------------------------------------------------
/test/resources/test3.dat:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/test.ts:
--------------------------------------------------------------------------------
1 | import { PassThrough } from 'node:stream';
2 | import * as fs from 'node:fs/promises';
3 | import { createReadStream } from 'node:fs';
4 | import { dirname } from 'node:path';
5 | import { fileURLToPath } from 'node:url';
6 |
7 | import * as Token from 'token-types';
8 | import { assert, expect, use } from 'chai';
9 | import chaiAsPromised from 'chai-as-promised';
10 | import {
11 | fromBuffer,
12 | fromFile,
13 | fromStream,
14 | fromWebStream,
15 | type ITokenizer,
16 | type IRandomAccessTokenizer
17 | } from '../lib/index.js';
18 | import Path from 'node:path';
19 | import { EndOfStreamError } from 'peek-readable';
20 |
21 | import mocha from 'mocha';
22 | import { stringToUint8Array } from 'uint8array-extras';
23 |
24 | import { DelayedStream, makeByteReadableStreamFromFile } from './util.js';
25 | import process from 'node:process';
26 |
27 | use(chaiAsPromised);
28 |
29 | const __dirname = dirname(fileURLToPath(import .meta.url));
30 |
31 | const {describe, it} = mocha;
32 |
33 | interface ITokenizerTest {
34 | name: string;
35 | loadTokenizer: (testFile: string, delay?: number, abortSignal?: AbortSignal) => Promise;
36 | hasFileInfo: boolean;
37 | abortable: boolean;
38 | randomRead: boolean;
39 | }
40 |
41 | function getResourcePath(testFile: string) {
42 | return Path.join(__dirname, 'resources', testFile);
43 | }
44 |
45 | async function getTokenizerWithData(testData: string, test: ITokenizerTest, delay?: number, abortSignal?: AbortSignal): Promise {
46 | const testPath = getResourcePath('tmp.dat');
47 | await fs.writeFile(testPath, testData, {encoding: 'latin1'});
48 | return test.loadTokenizer('tmp.dat', delay, abortSignal);
49 | }
50 |
51 | describe('Matrix tests', () => {
52 |
53 | const tokenizerTests: ITokenizerTest[] = [
54 | {
55 | name: 'fromStream()',
56 | loadTokenizer: async (testFile, delay, abortSignal?: AbortSignal) => {
57 | const stream = createReadStream(getResourcePath(testFile));
58 | const delayedStream = new DelayedStream(stream, delay);
59 | return fromStream(delayedStream, {abortSignal});
60 | },
61 | hasFileInfo: true,
62 | abortable: true,
63 | randomRead: false
64 | }, {
65 | name: 'fromWebStream()',
66 | loadTokenizer: async (testFile, delay, abortSignal?: AbortSignal) => {
67 | const fileStream = makeByteReadableStreamFromFile(Path.join(__dirname, 'resources', testFile), delay);
68 | return fromWebStream(fileStream, {abortSignal});
69 | },
70 | hasFileInfo: false,
71 | abortable: true,
72 | randomRead: false
73 | }, {
74 | name: 'fromFile()',
75 | loadTokenizer: async testFile => {
76 | return fromFile(Path.join(__dirname, 'resources', testFile));
77 | },
78 | hasFileInfo: true,
79 | abortable: false,
80 | randomRead: true
81 | }, {
82 | name: 'fromBuffer()',
83 | loadTokenizer: async testFile => {
84 | const data = await fs.readFile(Path.join(__dirname, 'resources', testFile));
85 | return fromBuffer(data);
86 | },
87 | hasFileInfo: true,
88 | abortable: false,
89 | randomRead: true
90 | }
91 | ];
92 |
93 | tokenizerTests
94 | // .filter((x, n) => n === 1)
95 | .forEach(tokenizerType => {
96 | describe(tokenizerType.name, () => {
97 |
98 | describe('tokenizer read options', () => {
99 |
100 | it('option.offset', async () => {
101 | const buf = new Uint8Array(7);
102 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
103 | assert.strictEqual(await rst.readBuffer(buf.subarray(1), {length: 6}), 6);
104 | await rst.close();
105 | });
106 |
107 | it('option.length', async () => {
108 | const buf = new Uint8Array(7);
109 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
110 | assert.strictEqual(await rst.readBuffer(buf, {length: 2}), 2);
111 | await rst.close();
112 | });
113 |
114 | it('default length', async () => {
115 | const buf = new Uint8Array(6);
116 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
117 | assert.strictEqual(await rst.readBuffer(buf.subarray(1)), 5, 'default length = buffer.length - option.offset');
118 | await rst.close();
119 | });
120 |
121 | it('option.maybeLess = true', async () => {
122 | const buffer = new Uint8Array(4);
123 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
124 | const len = await rst.readBuffer(buffer, {mayBeLess: true});
125 | assert.strictEqual(len, 3, 'should return 3 because no more bytes are available');
126 | await rst.close();
127 | });
128 |
129 | it('option.position', async () => {
130 | const buffer = new Uint8Array(5);
131 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
132 | const len = await rst.readBuffer(buffer, {position: 1});
133 | assert.strictEqual(len, 5, 'return value');
134 | assert.deepEqual(buffer, Uint8Array.from([0x02, 0x03, 0x04, 0x05, 0x06]));
135 | await rst.close();
136 | });
137 |
138 | });
139 |
140 | describe('tokenizer peek options', () => {
141 |
142 | it('option.offset', async () => {
143 | const buf = new Uint8Array(7);
144 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
145 | assert.strictEqual(await rst.peekBuffer(buf.subarray(1), {length: 6}), 6);
146 | await rst.close();
147 | });
148 |
149 | it('option.length', async () => {
150 | const buf = new Uint8Array(7);
151 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
152 | assert.strictEqual(await rst.peekBuffer(buf, {length: 2}), 2);
153 | await rst.close();
154 | });
155 |
156 | it('default length', async () => {
157 | const buf = new Uint8Array(6);
158 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
159 | assert.strictEqual(await rst.peekBuffer(buf.subarray(1)), 5, 'default length = buffer.length - option.offset');
160 | await rst.close();
161 | });
162 |
163 | it('option.maybeLess = true', async () => {
164 | const buffer = new Uint8Array(4);
165 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
166 | const len = await rst.peekBuffer(buffer, {mayBeLess: true});
167 | assert.strictEqual(len, 3, 'should return 3 because no more bytes are available');
168 | await rst.close();
169 | });
170 |
171 | it('option.position', async () => {
172 | const buffer = new Uint8Array(5);
173 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05\x06', tokenizerType);
174 | const len = await rst.peekBuffer(buffer, {position: 1});
175 | assert.strictEqual(len, 5, 'return value');
176 | assert.deepEqual(buffer, Uint8Array.from([0x02, 0x03, 0x04, 0x05, 0x06]));
177 | await rst.close();
178 | });
179 |
180 | });
181 |
182 | it('should decode buffer', async () => {
183 |
184 | const rst = await getTokenizerWithData('\x05peter', tokenizerType);
185 | // should decode UINT8 from chunk
186 | assert.strictEqual(rst.position, 0);
187 | let value: string | number = await rst.readToken(Token.UINT8);
188 | assert.strictEqual(typeof value, 'number');
189 | assert.strictEqual(value, 5, '0x05 == 5');
190 | // should decode string from chunk
191 | assert.strictEqual(rst.position, 1);
192 | value = await rst.readToken(new Token.StringType(5, 'utf-8'));
193 | assert.strictEqual(typeof value, 'string');
194 | assert.strictEqual(value, 'peter');
195 | assert.strictEqual(rst.position, 6);
196 | // should should reject at the end of the stream
197 | try {
198 | await rst.readToken(Token.UINT8);
199 | assert.fail('Should reject due to end-of-stream');
200 | } catch (err) {
201 | assert.instanceOf(err, EndOfStreamError);
202 | } finally {
203 | await rst.close();
204 | }
205 | });
206 |
207 | it('should be able to read from an absolute offset', async () => {
208 |
209 | const rst = await getTokenizerWithData('\x05peter', tokenizerType);
210 | // should decode UINT8 from chunk
211 | assert.strictEqual(rst.position, 0);
212 | const value: string | number = await rst.readToken(new Token.StringType(5, 'utf-8'), 1);
213 | assert.strictEqual(typeof value, 'string');
214 | assert.strictEqual(value, 'peter');
215 | assert.strictEqual(rst.position, 6);
216 |
217 | try {
218 | await rst.readToken(Token.UINT8);
219 | assert.fail('Should reject due to end-of-stream');
220 | } catch (err) {
221 | assert.instanceOf(err, EndOfStreamError);
222 | } finally {
223 | await rst.close();
224 | }
225 |
226 | });
227 |
228 | it('should pick length from buffer, if length is not explicit defined', async () => {
229 |
230 | const rst = await getTokenizerWithData('\x05peter', tokenizerType);
231 |
232 | const buf = new Uint8Array(4);
233 |
234 | // should decode UINT8 from chunk
235 | assert.strictEqual(rst.position, 0);
236 | const bufferLength = await rst.readBuffer(buf);
237 | assert.strictEqual(bufferLength, buf.length);
238 | assert.strictEqual(rst.position, buf.length);
239 | await rst.close();
240 | });
241 |
242 | it('should contain fileSize if constructed from file-read-stream', async () => {
243 | if (tokenizerType.hasFileInfo) {
244 | const rst = await tokenizerType.loadTokenizer('test1.dat');
245 | assert.strictEqual(rst.fileInfo.size, 16, ' ReadStreamTokenizer.fileSize.size');
246 | await rst.close();
247 | }
248 | });
249 |
250 | describe('Parsing binary numbers', () => {
251 |
252 | it('should encode signed 8-bit integer (INT8)', () => {
253 |
254 | const b = new Uint8Array(1);
255 |
256 | Token.INT8.put(b, 0, 0x00);
257 | assert.deepEqual(b, Uint8Array.from([0x00]));
258 |
259 | Token.INT8.put(b, 0, 0x22);
260 | assert.deepEqual(b, Uint8Array.from([0x22]));
261 |
262 | Token.INT8.put(b, 0, -0x22);
263 | assert.deepEqual(b, Uint8Array.from([0xde]));
264 | });
265 |
266 | it('should decode signed 8-bit integer (INT8)', async () => {
267 |
268 | const rst = await getTokenizerWithData('\x00\x7f\x80\xff\x81', tokenizerType);
269 |
270 | let value: number = await rst.readToken(Token.INT8);
271 | assert.strictEqual(typeof value, 'number');
272 | assert.strictEqual(value, 0, 'INT8 #1 == 0');
273 | value = await rst.readToken(Token.INT8);
274 | assert.strictEqual(typeof value, 'number');
275 | assert.strictEqual(value, 127, 'INT8 #2 == 127');
276 | value = await rst.readToken(Token.INT8);
277 | assert.strictEqual(typeof value, 'number');
278 | assert.strictEqual(value, -128, 'INT8 #3 == -128');
279 | value = await rst.readToken(Token.INT8);
280 | assert.strictEqual(typeof value, 'number');
281 | assert.strictEqual(value, -1, 'INT8 #4 == -1');
282 | value = await rst.readToken(Token.INT8);
283 | assert.strictEqual(typeof value, 'number');
284 | assert.strictEqual(value, -127, 'INT8 #5 == -127');
285 |
286 | await rst.close();
287 |
288 | });
289 |
290 | it('should encode signed 16-bit big-endian integer (INT16_BE)', () => {
291 |
292 | const b = new Uint8Array(2);
293 |
294 | Token.INT16_BE.put(b, 0, 0x00);
295 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00]));
296 |
297 | Token.INT16_BE.put(b, 0, 0x0f0b);
298 | assert.deepEqual(b, Uint8Array.from([0x0f, 0x0b]));
299 |
300 | Token.INT16_BE.put(b, 0, -0x0f0b);
301 | assert.deepEqual(b, Uint8Array.from([0xf0, 0xf5]));
302 | });
303 |
304 | it('should decode signed 16-bit big-endian integer (INT16_BE)', async () => {
305 |
306 | const rst = await getTokenizerWithData('\x0a\x1a\x00\x00\xff\xff\x80\x00', tokenizerType);
307 |
308 | let value: number = await rst.readToken(Token.INT16_BE);
309 | assert.strictEqual(typeof value, 'number');
310 | assert.strictEqual(value, 2586, 'INT16_BE#1');
311 | value = await rst.readToken(Token.INT16_BE);
312 | assert.strictEqual(typeof value, 'number');
313 | assert.strictEqual(value, 0, 'INT16_BE#2');
314 | value = await rst.readToken(Token.INT16_BE);
315 | assert.strictEqual(typeof value, 'number');
316 | assert.strictEqual(value, -1, 'INT16_BE#3');
317 | value = await rst.readToken(Token.INT16_BE);
318 | assert.strictEqual(typeof value, 'number');
319 | assert.strictEqual(value, -32768, 'INT16_BE#4');
320 |
321 | await rst.close();
322 | });
323 |
324 | it('should encode signed 24-bit big-endian integer (INT24_BE)', async () => {
325 |
326 | const b = new Uint8Array(3);
327 |
328 | Token.INT24_BE.put(b, 0, 0x00);
329 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00]));
330 |
331 | Token.INT24_BE.put(b, 0, 0x0f0ba0);
332 | assert.deepEqual(b, Uint8Array.from([0x0f, 0x0b, 0xa0]));
333 |
334 | Token.INT24_BE.put(b, 0, -0x0f0bcc);
335 | assert.deepEqual(b, Uint8Array.from([0xf0, 0xf4, 0x34]));
336 | });
337 |
338 | it('should decode signed 24-bit big-endian integer (INT24_BE)', async () => {
339 |
340 | const rst = await getTokenizerWithData('\x00\x00\x00\xff\xff\xff\x10\x00\xff\x80\x00\x00', tokenizerType);
341 |
342 | let value: number = await rst.readToken(Token.INT24_BE);
343 | assert.strictEqual(typeof value, 'number');
344 | assert.strictEqual(value, 0, 'INT24_BE#1');
345 | value = await rst.readToken(Token.INT24_BE);
346 | assert.strictEqual(typeof value, 'number');
347 | assert.strictEqual(value, -1, 'INT24_BE#2');
348 | value = await rst.readToken(Token.INT24_BE);
349 | assert.strictEqual(typeof value, 'number');
350 | assert.strictEqual(value, 1048831, 'INT24_BE#3');
351 | value = await rst.readToken(Token.INT24_BE);
352 | assert.strictEqual(typeof value, 'number');
353 | assert.strictEqual(value, -8388608, 'INT24_BE#4');
354 | await rst.close();
355 | });
356 |
357 | // ToDo: test decoding: INT24_LE
358 |
359 | it('should encode signed 32-bit big-endian integer (INT32_BE)', () => {
360 |
361 | const b = new Uint8Array(4);
362 |
363 | Token.INT32_BE.put(b, 0, 0x00);
364 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0x00]));
365 |
366 | Token.INT32_BE.put(b, 0, 0x0f0bcca0);
367 | assert.deepEqual(b, Uint8Array.from([0x0f, 0x0b, 0xcc, 0xa0]));
368 |
369 | Token.INT32_BE.put(b, 0, -0x0f0bcca0);
370 | assert.deepEqual(b, Uint8Array.from([0xf0, 0xf4, 0x33, 0x60]));
371 | });
372 |
373 | it('should decode signed 32-bit big-endian integer (INT32_BE)', async () => {
374 |
375 | const rst = await getTokenizerWithData('\x00\x00\x00\x00\xff\xff\xff\xff\x00\x10\x00\xff\x80\x00\x00\x00', tokenizerType);
376 |
377 | let value: number = await rst.readToken(Token.INT32_BE);
378 | assert.strictEqual(typeof value, 'number');
379 | assert.strictEqual(value, 0, 'INT32_BE #1');
380 | value = await rst.readToken(Token.INT32_BE);
381 | assert.strictEqual(typeof value, 'number');
382 | assert.strictEqual(value, -1, 'INT32_BE #2');
383 | value = await rst.readToken(Token.INT32_BE);
384 | assert.strictEqual(typeof value, 'number');
385 | assert.strictEqual(value, 1048831, 'INT32_BE #3');
386 | value = await rst.readToken(Token.INT32_BE);
387 | assert.strictEqual(typeof value, 'number');
388 | assert.strictEqual(value, -2147483648, 'INT32_BE #4');
389 | await rst.close();
390 | });
391 |
392 | it('should encode signed 8-bit big-endian integer (INT8)', () => {
393 |
394 | const b = new Uint8Array(1);
395 |
396 | Token.UINT8.put(b, 0, 0x00);
397 | assert.deepEqual(b, Uint8Array.from([0x00]));
398 |
399 | Token.UINT8.put(b, 0, 0xff);
400 | assert.deepEqual(b, Uint8Array.from([0xff]));
401 | });
402 |
403 | it('should decode unsigned 8-bit integer (UINT8)', async () => {
404 |
405 | const rst = await getTokenizerWithData('\x00\x1a\xff', tokenizerType);
406 |
407 | let value: number = await rst.readToken(Token.UINT8);
408 | assert.strictEqual(typeof value, 'number');
409 | assert.strictEqual(value, 0, 'UINT8 #1');
410 | value = await rst.readToken(Token.UINT8);
411 | assert.strictEqual(typeof value, 'number');
412 | assert.strictEqual(value, 26, 'UINT8 #2');
413 | value = await rst.readToken(Token.UINT8);
414 | assert.strictEqual(typeof value, 'number');
415 | assert.strictEqual(value, 255, 'UINT8 #3');
416 | await rst.close();
417 | });
418 |
419 | it('should encode unsigned 16-bit big-endian integer (UINT16_LE)', () => {
420 |
421 | const b = new Uint8Array(4);
422 |
423 | Token.UINT16_LE.put(b, 0, 0x00);
424 | Token.UINT16_LE.put(b, 2, 0xffaa);
425 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0xaa, 0xff]));
426 | });
427 |
428 | it('should encode unsigned 16-bit little-endian integer (UINT16_BE)', () => {
429 | const b = new Uint8Array(4);
430 | Token.UINT16_BE.put(b, 0, 0xf);
431 | Token.UINT16_BE.put(b, 2, 0xffaa);
432 | assert.deepEqual(b, Uint8Array.from([0x00, 0x0f, 0xff, 0xaa]));
433 | });
434 |
435 | it('should encode unsigned 16-bit mixed little/big-endian integers', () => {
436 | const b = new Uint8Array(4);
437 | Token.UINT16_BE.put(b, 0, 0xffaa);
438 | Token.UINT16_LE.put(b, 2, 0xffaa);
439 | assert.deepEqual(b, Uint8Array.from([0xff, 0xaa, 0xaa, 0xff]));
440 | });
441 |
442 | it('should decode unsigned mixed 16-bit big/little-endian integer', async () => {
443 |
444 | const rst = await getTokenizerWithData('\x1a\x00\x1a\x00\x1a\x00\x1a\x00', tokenizerType);
445 |
446 | let value: number = await rst.readToken(Token.UINT16_LE);
447 | assert.strictEqual(typeof value, 'number');
448 | assert.strictEqual(value, 0x001a, 'UINT16_LE #1');
449 | value = await rst.readToken(Token.UINT16_BE);
450 | assert.strictEqual(typeof value, 'number');
451 | assert.strictEqual(value, 0x1a00, 'UINT16_BE #2');
452 | value = await rst.readToken(Token.UINT16_LE);
453 | assert.strictEqual(typeof value, 'number');
454 | assert.strictEqual(value, 0x001a, 'UINT16_BE #3');
455 | value = await rst.readToken(Token.UINT16_BE);
456 | assert.strictEqual(typeof value, 'number');
457 | assert.strictEqual(value, 0x1a00, 'UINT16_LE #4');
458 |
459 | await rst.close();
460 | });
461 |
462 | it('should encode unsigned 24-bit little-endian integer (UINT24_LE)', () => {
463 |
464 | const b = new Uint8Array(3);
465 |
466 | Token.UINT24_LE.put(b, 0, 0x00);
467 | assert.deepEqual(b, Uint8Array.from([0x00, 0x000, 0x00]));
468 |
469 | Token.UINT24_LE.put(b, 0, 0xff);
470 | assert.deepEqual(b, Uint8Array.from([0xff, 0x00, 0x00]));
471 |
472 | Token.UINT24_LE.put(b, 0, 0xaabbcc);
473 | assert.deepEqual(b, Uint8Array.from([0xcc, 0xbb, 0xaa]));
474 | });
475 |
476 | it('should encode unsigned 24-bit big-endian integer (UINT24_BE)', () => {
477 |
478 | const b = new Uint8Array(3);
479 |
480 | Token.UINT24_BE.put(b, 0, 0x00);
481 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00]));
482 |
483 | Token.UINT24_BE.put(b, 0, 0xff);
484 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0xff]));
485 |
486 | Token.UINT24_BE.put(b, 0, 0xaabbcc);
487 | assert.deepEqual(b, Uint8Array.from([0xaa, 0xbb, 0xcc]));
488 | });
489 |
490 | it('should decode signed 24-bit big/little-endian integer (UINT24_LE/INT24_BE)', async () => {
491 |
492 | const rst = await getTokenizerWithData('\x1a\x1a\x00\x1a\x1a\x00\x1a\x1a\x00\x1a\x1a\x00', tokenizerType);
493 |
494 | let value: number = await rst.readToken(Token.UINT24_LE);
495 | assert.strictEqual(typeof value, 'number');
496 | assert.strictEqual(value, 0x001a1a, 'INT24_LE#1');
497 | value = await rst.readToken(Token.UINT24_BE);
498 | assert.strictEqual(typeof value, 'number');
499 | assert.strictEqual(value, 0x1a1a00, 'INT24_BE#2');
500 | value = await rst.readToken(Token.UINT24_LE);
501 | assert.strictEqual(typeof value, 'number');
502 | assert.strictEqual(value, 0x001a1a, 'INT24_LE#3');
503 | value = await rst.readToken(Token.UINT24_BE);
504 | assert.strictEqual(typeof value, 'number');
505 | assert.strictEqual(value, 0x1a1a00, 'INT24_BE#4');
506 |
507 | await rst.close();
508 | });
509 |
510 | it('should encode unsigned 32-bit little-endian integer (UINT32_LE)', () => {
511 |
512 | const b = new Uint8Array(4);
513 |
514 | Token.UINT32_LE.put(b, 0, 0x00);
515 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0x00]));
516 |
517 | Token.UINT32_LE.put(b, 0, 0xff);
518 | assert.deepEqual(b, Uint8Array.from([0xff, 0x00, 0x00, 0x00]));
519 |
520 | Token.UINT32_LE.put(b, 0, 0xaabbccdd);
521 | assert.deepEqual(b, Uint8Array.from([0xdd, 0xcc, 0xbb, 0xaa]));
522 | });
523 |
524 | it('should encode unsigned 32-bit big-endian integer (INT32_BE)', () => {
525 |
526 | const b = new Uint8Array(4);
527 |
528 | Token.UINT32_BE.put(b, 0, 0x00);
529 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0x00]));
530 |
531 | Token.UINT32_BE.put(b, 0, 0xff);
532 | assert.deepEqual(b, Uint8Array.from([0x00, 0x00, 0x00, 0xff]));
533 |
534 | Token.UINT32_BE.put(b, 0, 0xaabbccdd);
535 | assert.deepEqual(b, Uint8Array.from([0xaa, 0xbb, 0xcc, 0xdd]));
536 | });
537 |
538 | it('should decode unsigned 32-bit little/big-endian integer (UINT32_LE/UINT32_BE)', async () => {
539 |
540 | const rst = await getTokenizerWithData('\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00\x1a\x00', tokenizerType);
541 |
542 | let value: number = await rst.readToken(Token.UINT32_LE);
543 | assert.strictEqual(typeof value, 'number');
544 | assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
545 | value = await rst.readToken(Token.UINT32_BE);
546 | assert.strictEqual(typeof value, 'number');
547 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
548 | value = await rst.readToken(Token.UINT32_LE);
549 | assert.strictEqual(typeof value, 'number');
550 | assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
551 | value = await rst.readToken(Token.UINT32_BE);
552 | assert.strictEqual(typeof value, 'number');
553 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
554 |
555 | await rst.close();
556 | });
557 |
558 | });
559 |
560 | it('Transparency', async function() {
561 |
562 | this.timeout(5000);
563 |
564 | const size = 10 * 1024;
565 | const buf = new Uint8Array(size);
566 |
567 | for (let i = 0; i < size; ++i) {
568 | buf[i] = i % 255;
569 | }
570 |
571 | const testFile = 'test2.dat';
572 | const pathTestFile = Path.join(__dirname, 'resources', testFile);
573 | await fs.writeFile(pathTestFile, buf);
574 |
575 | const rst = await tokenizerType.loadTokenizer(testFile);
576 | let expected = 0;
577 |
578 | try {
579 | let v: number;
580 | do {
581 | v = await rst.readNumber(Token.UINT8);
582 | assert.strictEqual(v, expected % 255, `offset=${expected}`);
583 | ++expected;
584 | } while (v > 0);
585 | } catch (err) {
586 | assert.instanceOf(err, EndOfStreamError);
587 | assert.strictEqual(expected, size, 'total number of parsed bytes');
588 | }
589 |
590 | await rst.close();
591 | });
592 |
593 | it('Handle peek token', async () => {
594 |
595 | async function peekOnData(tokenizer: ITokenizer): Promise {
596 | assert.strictEqual(tokenizer.position, 0);
597 |
598 | let value = await tokenizer.peekToken(Token.UINT32_LE);
599 | assert.strictEqual(typeof value, 'number');
600 | assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
601 | assert.strictEqual(tokenizer.position, 0);
602 |
603 | value = await tokenizer.peekToken(Token.UINT32_LE);
604 | assert.strictEqual(typeof value, 'number');
605 | assert.strictEqual(value, 0x001a001a, 'UINT24_LE sequential peek #2');
606 | assert.strictEqual(tokenizer.position, 0);
607 | value = await tokenizer.readToken(Token.UINT32_LE);
608 |
609 | assert.strictEqual(typeof value, 'number');
610 | assert.strictEqual(value, 0x001a001a, 'UINT24_LE #3');
611 | assert.strictEqual(tokenizer.position, 4);
612 | value = await tokenizer.readToken(Token.UINT32_BE);
613 | assert.strictEqual(typeof value, 'number');
614 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
615 | assert.strictEqual(tokenizer.position, 8);
616 | value = await tokenizer.readToken(Token.UINT32_LE);
617 |
618 | assert.strictEqual(typeof value, 'number');
619 | assert.strictEqual(value, 0x001a001a, 'UINT32_LE #5');
620 | assert.strictEqual(tokenizer.position, 12);
621 | value = await tokenizer.readToken(Token.UINT32_BE);
622 |
623 | assert.strictEqual(typeof value, 'number');
624 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #6');
625 | assert.strictEqual(tokenizer.position, 16);
626 |
627 | }
628 |
629 | const rst = await tokenizerType.loadTokenizer('test1.dat');
630 |
631 | if (rst.supportsRandomAccess()) {
632 | assert.strictEqual(rst.fileInfo.size, 16, 'check file size property');
633 | }
634 | await peekOnData(rst);
635 | await rst.close();
636 | });
637 |
638 | it('Overlapping peeks', async () => {
639 |
640 | const rst = await getTokenizerWithData('\x01\x02\x03\x04\x05', tokenizerType);
641 | const peekBuffer = new Uint8Array(3);
642 | const readBuffer = new Uint8Array(1);
643 |
644 | assert.strictEqual(0, rst.position);
645 | let len = await rst.peekBuffer(peekBuffer, {length: 3}); // Peek #1
646 | assert.strictEqual(3, len);
647 | assert.deepEqual(peekBuffer, stringToUint8Array('\x01\x02\x03'), 'Peek #1');
648 | assert.strictEqual(rst.position, 0);
649 | len = await rst.readBuffer(readBuffer, {length: 1}); // Read #1
650 | assert.strictEqual(len, 1);
651 | assert.strictEqual(rst.position, 1);
652 | assert.deepEqual(readBuffer, stringToUint8Array('\x01'), 'Read #1');
653 | len = await rst.peekBuffer(peekBuffer, {length: 3}); // Peek #2
654 | assert.strictEqual(len, 3);
655 | assert.strictEqual(rst.position, 1);
656 | assert.deepEqual(peekBuffer, stringToUint8Array('\x02\x03\x04'), 'Peek #2');
657 | len = await rst.readBuffer(readBuffer, {length: 1}); // Read #2
658 | assert.strictEqual(len, 1);
659 | assert.strictEqual(rst.position, 2);
660 | assert.deepEqual(readBuffer, stringToUint8Array('\x02'), 'Read #2');
661 | len = await rst.peekBuffer(peekBuffer, {length: 3}); // Peek #3
662 | assert.strictEqual(len, 3);
663 | assert.strictEqual(rst.position, 2);
664 | assert.deepEqual(peekBuffer, stringToUint8Array('\x03\x04\x05'), 'Peek #3');
665 | len = await rst.readBuffer(readBuffer, {length: 1}); // Read #3
666 | assert.strictEqual(len, 1);
667 | assert.strictEqual(rst.position, 3);
668 | assert.deepEqual(readBuffer, stringToUint8Array('\x03'), 'Read #3');
669 | len = await rst.peekBuffer(peekBuffer, {length: 2}); // Peek #4
670 | assert.strictEqual(len, 2, '3 bytes requested to peek, only 2 bytes left');
671 | assert.strictEqual(rst.position, 3);
672 | assert.deepEqual(peekBuffer, stringToUint8Array('\x04\x05\x05'), 'Peek #4');
673 | len = await rst.readBuffer(readBuffer, {length: 1}); // Read #4
674 | assert.strictEqual(len, 1);
675 | assert.strictEqual(rst.position, 4);
676 | assert.deepEqual(readBuffer, stringToUint8Array('\x04'), 'Read #4');
677 |
678 | await rst.close();
679 | });
680 |
681 | it('should be able to read at position ahead', async () => {
682 |
683 | const rst = await getTokenizerWithData('\x05peter', tokenizerType);
684 | // should decode string from chunk
685 | assert.strictEqual(rst.position, 0);
686 | const value = await rst.readToken(new Token.StringType(5, 'utf-8'), 1);
687 | assert.strictEqual(typeof value, 'string');
688 | assert.strictEqual(value, 'peter');
689 | assert.strictEqual(rst.position, 6);
690 | // should should reject at the end of the stream
691 | try {
692 | await rst.readToken(Token.UINT8);
693 | assert.fail('Should reject due to end-of-stream');
694 | } catch (err) {
695 | assert.instanceOf(err, EndOfStreamError);
696 | } finally {
697 | await rst.close();
698 | }
699 | });
700 |
701 | it('should be able to peek at position ahead', async () => {
702 |
703 | const rst = await getTokenizerWithData('\x05peter', tokenizerType);
704 | // should decode string from chunk
705 | assert.strictEqual(rst.position, 0);
706 | const value = await rst.peekToken(new Token.StringType(5, 'latin1'), 1);
707 | assert.strictEqual(typeof value, 'string');
708 | assert.strictEqual(value, 'peter');
709 | assert.strictEqual(rst.position, 0);
710 |
711 | await rst.close();
712 | });
713 |
714 | it('number', async () => {
715 | const tokenizer = await tokenizerType.loadTokenizer('test3.dat');
716 | assert.isDefined(tokenizer.fileInfo, 'tokenizer.fileInfo');
717 | // @ts-ignore
718 | await tokenizer.ignore(1);
719 | const x = await tokenizer.peekNumber(Token.INT32_BE);
720 | assert.strictEqual(x, 33752069);
721 |
722 | await tokenizer.close();
723 | });
724 |
725 | it('should throw an Error if we reach EOF while peeking a number', async () => {
726 | const tokenizer = await tokenizerType.loadTokenizer('test3.dat');
727 | if (tokenizerType.hasFileInfo) {
728 | assert.isDefined(tokenizer.fileInfo, 'tokenizer.fileInfo');
729 | }
730 | // @ts-ignore
731 | await tokenizer.ignore(2);
732 | try {
733 | await tokenizer.peekNumber(Token.INT32_BE);
734 | assert.fail('Should throw Error: End-Of-File');
735 | } catch (err) {
736 | assert.instanceOf(err, EndOfStreamError);
737 | }
738 | await tokenizer.close();
739 | });
740 |
741 | it('should be able to handle multiple ignores', async () => {
742 | const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
743 | let value = await tokenizer.readToken(Token.UINT32_LE);
744 | assert.strictEqual(typeof value, 'number');
745 | assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
746 | await tokenizer.ignore(Token.UINT32_BE.len);
747 | await tokenizer.ignore(Token.UINT32_LE.len);
748 | value = await tokenizer.readToken(Token.UINT32_BE);
749 | assert.strictEqual(typeof value, 'number');
750 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
751 | await tokenizer.close();
752 | });
753 |
754 | it('should be able to ignore (skip)', async () => {
755 |
756 | const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
757 | assert.strictEqual(tokenizer.position, 0);
758 | await tokenizer.ignore(4);
759 | assert.strictEqual(tokenizer.position, 4);
760 | let value = await tokenizer.readToken(Token.UINT32_BE);
761 | assert.strictEqual(typeof value, 'number');
762 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
763 | value = await tokenizer.readToken(Token.UINT32_LE);
764 | assert.strictEqual(typeof value, 'number');
765 | assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
766 | value = await tokenizer.readToken(Token.UINT32_BE);
767 | assert.strictEqual(typeof value, 'number');
768 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
769 | await tokenizer.close();
770 | });
771 |
772 | describe('End-Of-File exception behaviour', () => {
773 |
774 | it('should not throw an Error if we read exactly until the end of the file', async () => {
775 |
776 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
777 | const num = await rst.readToken(Token.UINT24_BE);
778 | assert.strictEqual(num, 9000000);
779 | await rst.close();
780 | });
781 |
782 | it('readBuffer()', async () => {
783 |
784 | const testFile = 'test1.dat';
785 |
786 | const stat = await fs.stat(getResourcePath(testFile));
787 | const tokenizer = await tokenizerType.loadTokenizer(testFile);
788 | const buf = new Uint8Array(stat.size);
789 | const bytesRead = await tokenizer.readBuffer(buf);
790 | assert.ok(typeof bytesRead === 'number', 'readBuffer promise should provide a number');
791 | assert.strictEqual(stat.size, bytesRead);
792 | try {
793 | await tokenizer.readBuffer(buf);
794 | assert.fail('Should throw EOF');
795 | } catch (err) {
796 | assert.instanceOf(err, EndOfStreamError);
797 | } finally {
798 | await tokenizer.close();
799 | }
800 | });
801 |
802 | it('should handle zero byte read', async () => {
803 |
804 | const rst = await getTokenizerWithData('\x00\x00\x00', tokenizerType);
805 | const uint8Array = await rst.readToken(new Token.Uint8ArrayType(0));
806 | assert.strictEqual(uint8Array.length, 0);
807 | await rst.close();
808 | });
809 |
810 | it('should not throw an Error if we read exactly until the end of the file', async () => {
811 |
812 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
813 | const num = await rst.readToken(Token.UINT24_BE);
814 | assert.strictEqual(num, 9000000);
815 | await rst.close();
816 | });
817 |
818 | it('should be thrown if a token EOF reached in the middle of a token', async () => {
819 |
820 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
821 | try {
822 | await rst.readToken(Token.INT32_BE);
823 | assert.fail('It should throw EndOfFile Error');
824 | } catch (err) {
825 | assert.instanceOf(err, EndOfStreamError);
826 | } finally {
827 | await rst.close();
828 | }
829 | });
830 |
831 | it('should throw an EOF if we read to buffer', async () => {
832 | const buffer = new Uint8Array(4);
833 |
834 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
835 | try {
836 | await rst.readBuffer(buffer);
837 | assert.fail('It should throw EndOfFile Error');
838 | } catch (err) {
839 | assert.instanceOf(err, EndOfStreamError);
840 | } finally {
841 | await rst.close();
842 | }
843 | });
844 |
845 | it('should throw an EOF if we peek to buffer', async () => {
846 |
847 | const buffer = new Uint8Array(4);
848 | const rst = await getTokenizerWithData('\x89\x54\x40', tokenizerType);
849 | try {
850 | await rst.peekBuffer(buffer);
851 | assert.fail('It should throw EndOfFile Error');
852 | } catch (err) {
853 | assert.instanceOf(err, EndOfStreamError);
854 | } finally {
855 | await rst.close();
856 | }
857 | });
858 |
859 | });
860 |
861 | it('should be able to read from a file', async () => {
862 |
863 | const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
864 | if (tokenizerType.hasFileInfo) {
865 | assert.strictEqual(tokenizer.fileInfo.size, 16, 'check file size property');
866 | }
867 | let value = await tokenizer.readToken(Token.UINT32_LE);
868 | assert.strictEqual(typeof value, 'number');
869 | assert.strictEqual(value, 0x001a001a, 'UINT24_LE #1');
870 | value = await tokenizer.readToken(Token.UINT32_BE);
871 | assert.strictEqual(typeof value, 'number');
872 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
873 | value = await tokenizer.readToken(Token.UINT32_LE);
874 | assert.strictEqual(typeof value, 'number');
875 | assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
876 | value = await tokenizer.readToken(Token.UINT32_BE);
877 | assert.strictEqual(typeof value, 'number');
878 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
879 | await tokenizer.close();
880 | });
881 |
882 | it('should be able to parse the IgnoreType-token', async () => {
883 | const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
884 | await tokenizer.readToken(new Token.IgnoreType(4));
885 | let value = await tokenizer.readToken(Token.UINT32_BE);
886 | assert.strictEqual(typeof value, 'number');
887 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #2');
888 | value = await tokenizer.readToken(Token.UINT32_LE);
889 | assert.strictEqual(typeof value, 'number');
890 | assert.strictEqual(value, 0x001a001a, 'UINT32_LE #3');
891 | value = await tokenizer.readToken(Token.UINT32_BE);
892 | assert.strictEqual(typeof value, 'number');
893 | assert.strictEqual(value, 0x1a001a00, 'UINT32_BE #4');
894 | await tokenizer.close();
895 | });
896 |
897 | it('should be able to read 0 bytes from a file', async () => {
898 | const bufZero = new Uint8Array(0);
899 | const tokenizer = await tokenizerType.loadTokenizer('test1.dat');
900 | try {
901 | await tokenizer.readBuffer(bufZero);
902 | } finally {
903 | await tokenizer.close();
904 | }
905 | });
906 |
907 | if (tokenizerType.abortable) {
908 |
909 | describe('Abort delayed read', () => {
910 |
911 | it('without aborting', async () => {
912 | const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500);
913 | try {
914 | const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
915 | assert.strictEqual(await promise, '123');
916 | } finally {
917 | await fileReadStream.close();
918 | }
919 | });
920 |
921 | it('abort async operation using `abort()`', async function() {
922 | if (process.versions.bun) {
923 | this.skip(); // Fails with Bun 1.2
924 | }
925 | const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500);
926 | try {
927 | const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
928 | await fileReadStream.abort();
929 | await expect(promise).to.be.rejectedWith(Error);
930 | } finally {
931 | await fileReadStream.close();
932 | }
933 | });
934 |
935 | it('abort async operation using `close()`', async function() {
936 | if (process.versions.bun) {
937 | this.skip(); // Fails with Bun 1.2
938 | }
939 | const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500);
940 | const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
941 | await fileReadStream.close();
942 | await expect(promise).to.be.rejectedWith(Error);
943 | });
944 |
945 | it('abort async operation using `AbortController`', async function() {
946 |
947 | if (process.versions.bun) {
948 | this.skip(); // Fails with Bun 1.2
949 | }
950 |
951 | const abortController = new AbortController();
952 | const fileReadStream = await getTokenizerWithData('123', tokenizerType, 500, abortController.signal);
953 | try {
954 | const promise = fileReadStream.readToken(new Token.StringType(3, 'utf-8'), 0);
955 | abortController.abort();
956 | await expect(promise).to.be.rejectedWith(Error);
957 | } finally {
958 | await fileReadStream.close();
959 | }
960 | });
961 |
962 | });
963 | }
964 |
965 | }); // End of test "Tokenizer-types"
966 | });
967 |
968 | describe('Random-read-access', async () => {
969 |
970 | tokenizerTests
971 | .filter(tokenizerType => tokenizerType.randomRead)
972 | .forEach(tokenizerType => {
973 | describe(tokenizerType.name, () => {
974 |
975 | it('Read ID3v1 header at the end of the file', async () => {
976 | const tokenizer = await tokenizerType.loadTokenizer('id3v1.mp3') as IRandomAccessTokenizer;
977 | try {
978 | assert.isTrue(tokenizer.supportsRandomAccess(), 'Tokenizer should support random reads');
979 | const id3HeaderSize = 128;
980 | const id3Header = new Uint8Array(id3HeaderSize);
981 | await tokenizer.readBuffer(id3Header, {position: tokenizer.fileInfo.size - id3HeaderSize});
982 | const id3Tag = new TextDecoder('utf-8').decode(id3Header.subarray(0, 3));
983 | assert.strictEqual(id3Tag, 'TAG');
984 | assert.strictEqual(tokenizer.position, tokenizer.fileInfo.size, 'Tokenizer position should be at the end of the file');
985 | tokenizer.setPosition(0);
986 | assert.strictEqual(tokenizer.position, 0, 'Tokenizer position should be at the beginning of the file');
987 | } finally {
988 | await tokenizer.close();
989 | }
990 | });
991 |
992 | it('Be able to random read from position 0', async () => {
993 | const tokenizer = await fromFile(getResourcePath('id3v1.mp3'));
994 | try {
995 | // Advance tokenizer.position
996 | await tokenizer.ignore(20);
997 | const mpegSync = new Uint8Array(2);
998 | await tokenizer.readBuffer(mpegSync, {position: 0});
999 | assert.strictEqual(mpegSync[0], 255, 'First sync byte');
1000 | assert.strictEqual(mpegSync[1], 251, 'Second sync byte');
1001 | } finally {
1002 | await tokenizer.close();
1003 | }
1004 |
1005 | });
1006 | });
1007 | });
1008 |
1009 | });
1010 | });
1011 |
1012 | describe('fromStream with mayBeLess flag', () => {
1013 |
1014 | it('mayBeLess=true', async () => {
1015 | // Initialize empty stream
1016 | const stream = new PassThrough();
1017 | const tokenizer = await fromStream(stream);
1018 | try {
1019 | stream.end();
1020 |
1021 | // Try to read 5 bytes from empty stream, with mayBeLess flag enabled
1022 | const buffer = new Uint8Array(5);
1023 | const bytesRead = await tokenizer.peekBuffer(buffer, {mayBeLess: true});
1024 | assert.strictEqual(bytesRead, 0);
1025 | } finally {
1026 | await tokenizer.close();
1027 | }
1028 | });
1029 |
1030 | it('mayBeLess=false', async () => {
1031 | // Initialize empty stream
1032 | const stream = new PassThrough();
1033 | const tokenizer = await fromStream(stream);
1034 | try {
1035 | stream.end();
1036 |
1037 | // Try to read 5 bytes from empty stream, with mayBeLess flag enabled
1038 | const buffer = new Uint8Array(5);
1039 | await tokenizer.peekBuffer(buffer, {mayBeLess: false});
1040 | } catch (err) {
1041 | if (err instanceof Error) {
1042 | assert.strictEqual(err.message, 'End-Of-Stream');
1043 | } else {
1044 | assert.fail('Expected: err instanceof Error');
1045 | }
1046 | return;
1047 | } finally {
1048 | if (tokenizer) {
1049 | await tokenizer.close();
1050 | }
1051 | }
1052 | assert.fail('Should throw End-Of-Stream error');
1053 | });
1054 |
1055 | });
1056 |
1057 | it('should determine the file size using a file stream', async () => {
1058 | const stream = createReadStream(Path.join(__dirname, 'resources', 'test1.dat'));
1059 | const tokenizer = await fromStream(stream);
1060 | try {
1061 | assert.isDefined(tokenizer.fileInfo, '`fileInfo` should be defined');
1062 | assert.strictEqual(tokenizer.fileInfo.size, 16, 'fileInfo.size');
1063 | } finally {
1064 | await tokenizer.close();
1065 | }
1066 | });
1067 |
1068 | it('should release stream after close', async () => {
1069 |
1070 | const fileStream = makeByteReadableStreamFromFile(Path.join(__dirname, 'resources', 'test1.dat'), 0);
1071 | assert.isFalse(fileStream.locked, 'stream is unlocked before initializing tokenizer');
1072 | const webStreamTokenizer = fromWebStream(fileStream);
1073 | assert.isTrue(fileStream.locked, 'stream is locked after initializing tokenizer');
1074 | await webStreamTokenizer.close();
1075 | assert.isFalse(fileStream.locked, 'stream is unlocked after closing tokenizer');
1076 | });
1077 |
--------------------------------------------------------------------------------
/test/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "../tsconfig.json"
3 | }
4 |
--------------------------------------------------------------------------------
/test/util.ts:
--------------------------------------------------------------------------------
1 | import { createReadStream } from 'node:fs';
2 | import { Transform, Readable } from 'node:stream';
3 | import { makeByteReadableStreamFromNodeReadable } from 'node-readable-to-web-readable-stream';
4 |
5 | export function makeByteReadableStreamFromFile(filename: string, delay = 0): ReadableStream {
6 |
7 | // Create a Node.js Readable stream
8 | const nodeReadable = createReadStream(filename);
9 |
10 | // Create a Transform stream to introduce delay
11 | const delayTransform = new Transform({
12 | transform(chunk, encoding, callback) {
13 | setTimeout(() => callback(null, chunk), delay);
14 | }
15 | });
16 |
17 | // Pipe through the delay transform
18 | const delayedNodeStream = nodeReadable.pipe(delayTransform);
19 |
20 | return makeByteReadableStreamFromNodeReadable(delayedNodeStream);
21 | }
22 |
23 | export class DelayedStream extends Readable {
24 |
25 | private buffer: (Uint8Array | null)[];
26 | private isReading: boolean;
27 | private path: string | undefined;
28 |
29 | constructor(private sourceStream: Readable, private delay = 0) {
30 | super();
31 | this.path = (sourceStream as unknown as {path: string}).path;
32 | this.buffer = [];
33 | this.isReading = false;
34 |
35 | this.sourceStream.on('data', (chunk) => {
36 | this.buffer.push(chunk);
37 | this.emitDelayed();
38 | });
39 |
40 | this.sourceStream.on('end', () => {
41 | this.buffer.push(null); // Signal the end of the stream
42 | this.emitDelayed();
43 | });
44 | }
45 |
46 | _read() {
47 | if (!this.isReading && this.buffer.length > 0) {
48 | this.emitDelayed();
49 | }
50 | }
51 |
52 | emitDelayed() {
53 | if (this.isReading) return;
54 |
55 | if (this.buffer.length > 0) {
56 | this.isReading = true;
57 | const chunk = this.buffer.shift();
58 |
59 | setTimeout(() => {
60 | this.push(chunk);
61 | this.isReading = false;
62 |
63 | if (this.buffer.length > 0) {
64 | this.emitDelayed();
65 | }
66 | }, this.delay);
67 | }
68 | }
69 | }
70 |
71 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "inlineSources": false,
4 | "module": "node16",
5 | "moduleResolution": "node16",
6 | "target": "ES2020",
7 | "esModuleInterop": true,
8 | "strict": true,
9 | "verbatimModuleSyntax": true
10 | }
11 | }
12 |
13 |
--------------------------------------------------------------------------------