├── test ├── package.json └── data-model.test.ts ├── .prettierrc.js ├── packages ├── dom │ ├── .npmignore │ ├── tsconfig.json │ ├── package.json │ ├── src │ │ ├── range │ │ │ ├── index.ts │ │ │ ├── cartesian.ts │ │ │ └── match.ts │ │ ├── text-quote │ │ │ ├── index.ts │ │ │ ├── describe.ts │ │ │ └── match.ts │ │ ├── text-position │ │ │ ├── index.ts │ │ │ ├── match.ts │ │ │ └── describe.ts │ │ ├── index.ts │ │ ├── owner-document.ts │ │ ├── to-range.ts │ │ ├── css.ts │ │ ├── text-node-chunker.ts │ │ ├── highlight-text.ts │ │ └── normalize-range.ts │ └── test │ │ ├── css │ │ ├── match-cases.ts │ │ ├── match.test.ts │ │ └── describe.test.ts │ │ ├── text-position │ │ ├── describe.test.ts │ │ ├── match-cases.ts │ │ └── match.test.ts │ │ ├── range │ │ └── cartesian.test.ts │ │ ├── utils.ts │ │ ├── text-quote │ │ ├── describe.test.ts │ │ └── match.test.ts │ │ └── highlight-text │ │ └── highlight-text.test.ts ├── selector │ ├── .npmignore │ ├── tsconfig.json │ ├── package.json │ └── src │ │ ├── text │ │ ├── index.ts │ │ ├── describe-text-position.ts │ │ ├── match-text-position.ts │ │ ├── chunker.ts │ │ ├── code-point-seeker.ts │ │ ├── match-text-quote.ts │ │ └── describe-text-quote.ts │ │ ├── index.ts │ │ ├── refinable.ts │ │ └── types.ts └── apache-annotator │ ├── .npmignore │ ├── tsconfig.json │ ├── package.json │ └── src │ ├── dom.ts │ └── selector.ts ├── typedoc.json ├── .eslintignore ├── SECURITY.md ├── NOTICE ├── tsconfig.test.json ├── CODE_OF_CONDUCT.md ├── tsconfig.json ├── lerna.json ├── .editorconfig ├── tsconfig.base.json ├── DISCLAIMER-WIP ├── .mocharc.js ├── README.md ├── web ├── webpack.config.js ├── style.css ├── index.html └── index.js ├── Makefile ├── package.json ├── babel.config.js ├── .eslintrc.js ├── LICENSE └── LICENSES └── Apache-2.0.txt /test/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "module" 3 | } 4 | -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | singleQuote: true, 3 | trailingComma: 'all', 4 | }; 5 | -------------------------------------------------------------------------------- /packages/dom/.npmignore: -------------------------------------------------------------------------------- 1 | *.d.ts.map 2 | tsconfig.json 3 | tsconfig.tsbuildinfo 4 | /src 5 | /test 6 | -------------------------------------------------------------------------------- /packages/selector/.npmignore: -------------------------------------------------------------------------------- 1 | *.d.ts.map 2 | tsconfig.json 3 | tsconfig.tsbuildinfo 4 | /src 5 | /test 6 | -------------------------------------------------------------------------------- /packages/apache-annotator/.npmignore: -------------------------------------------------------------------------------- 1 | *.d.ts.map 2 | tsconfig.json 3 | tsconfig.tsbuildinfo 4 | /src 5 | /test 6 | -------------------------------------------------------------------------------- /typedoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "disableSources": true, 3 | "entryPoints": ["packages/apache-annotator/src/"], 4 | "readme": "none" 5 | } 6 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | !.eslintrc.js 2 | !.mocharc.js 3 | !/packages/*/src/types 4 | **/*.d.ts 5 | /coverage 6 | /docs 7 | /packages/*/lib 8 | /web/dist 9 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | Please follow the Apache Software Foundation's Security Team instructions when 2 | reporting any vulnerabilities: 3 | https://www.apache.org/security/ 4 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Annotator 2 | Copyright 2016-2022 The Apache Software Foundation 3 | 4 | This product includes software developed at The Apache Software 5 | Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /packages/selector/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.base.json", 3 | "include": ["src"], 4 | "compilerOptions": { 5 | "outDir": "lib", 6 | "rootDir": "src" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tsconfig.test.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "include": ["test", "packages/*/test"], 4 | "references": [ 5 | { "path": "packages/dom" }, 6 | { "path": "packages/selector" } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | Please be aware that this and every Apache Software Foundation project is 2 | governed by the official Apache Software Foundation 3 | [Code of Conduct](https://www.apache.org/foundation/policies/conduct.html). 4 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "packages/apache-annotator" }, 5 | { "path": "packages/dom" }, 6 | { "path": "packages/selector" }, 7 | { "path": "tsconfig.test.json"} 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /lerna.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.3.0", 3 | "command": { 4 | "publish": { 5 | "preDistTag": "dev", 6 | "preid": "dev" 7 | }, 8 | "version": { 9 | "gitTagVersion": false, 10 | "push": false 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /packages/dom/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.base.json", 3 | "include": ["src"], 4 | "compilerOptions": { 5 | "outDir": "lib", 6 | "rootDir": "src" 7 | }, 8 | "references": [ 9 | { "path": "../selector" } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | insert_final_newline = true 9 | max_line_length = 80 10 | trim_trailing_whitespace = true 11 | 12 | [Makefile] 13 | indent_size = 8 14 | indent_style = tab 15 | -------------------------------------------------------------------------------- /packages/apache-annotator/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.base.json", 3 | "include": ["src"], 4 | "compilerOptions": { 5 | "outDir": "lib", 6 | "rootDir": "src" 7 | }, 8 | "references": [ 9 | { "path": "../dom" }, 10 | { "path": "../selector" } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /tsconfig.base.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowSyntheticDefaultImports": true, 4 | "composite": true, 5 | "declaration": true, 6 | "declarationMap": true, 7 | "downlevelIteration": true, 8 | "emitDeclarationOnly": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "isolatedModules": true, 11 | "lib": [ 12 | "dom", 13 | "dom.iterable", 14 | "es2020" 15 | ], 16 | "module": "es2020", 17 | "moduleResolution": "node", 18 | "noPropertyAccessFromIndexSignature": true, 19 | "skipLibCheck": true, 20 | "strict": true, 21 | "target": "es2017" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /packages/selector/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@apache-annotator/selector", 3 | "version": "0.3.0", 4 | "description": "Web Annotation selector for engine.", 5 | "homepage": "https://annotator.apache.org", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/apache/incubator-annotator.git", 9 | "directory": "packages/selector" 10 | }, 11 | "license": "Apache-2.0", 12 | "author": "Apache Software Foundation", 13 | "type": "module", 14 | "exports": "./lib/index.js", 15 | "main": "./lib/index.js", 16 | "dependencies": { 17 | "@babel/runtime-corejs3": "^7.13.10" 18 | }, 19 | "engines": { 20 | "node": "^14.15 || ^15.4 || >=16" 21 | }, 22 | "publishConfig": { 23 | "access": "public" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /packages/dom/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@apache-annotator/dom", 3 | "version": "0.3.0", 4 | "description": "Utilities for annotation of the Document Object Model.", 5 | "homepage": "https://annotator.apache.org", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/apache/incubator-annotator.git", 9 | "directory": "packages/dom" 10 | }, 11 | "license": "Apache-2.0", 12 | "author": "Apache Software Foundation", 13 | "type": "module", 14 | "exports": "./lib/index.js", 15 | "main": "./lib/index.js", 16 | "dependencies": { 17 | "@apache-annotator/selector": "^0.3.0", 18 | "@babel/runtime-corejs3": "^7.13.10", 19 | "@medv/finder": "^2.1.0" 20 | }, 21 | "engines": { 22 | "node": "^14.15 || ^15.4 || >=16" 23 | }, 24 | "publishConfig": { 25 | "access": "public" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /packages/apache-annotator/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "apache-annotator", 3 | "version": "0.3.0", 4 | "description": "Apache Annotator provides annotation enabling code for browsers, servers, and humans.", 5 | "homepage": "https://annotator.apache.org", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/apache/incubator-annotator.git", 9 | "directory": "packages/apache-annotator" 10 | }, 11 | "license": "Apache-2.0", 12 | "author": "Apache Software Foundation", 13 | "type": "module", 14 | "exports": { 15 | "./*": "./lib/*.js" 16 | }, 17 | "dependencies": { 18 | "@apache-annotator/dom": "^0.3.0", 19 | "@apache-annotator/selector": "^0.3.0", 20 | "@babel/runtime-corejs3": "^7.13.10" 21 | }, 22 | "engines": { 23 | "node": "^14.15 || ^15.4 || >=16" 24 | }, 25 | "publishConfig": { 26 | "access": "public" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /packages/dom/src/range/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | export * from './match.js'; 25 | -------------------------------------------------------------------------------- /packages/dom/src/text-quote/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | export * from './describe.js'; 25 | export * from './match.js'; 26 | -------------------------------------------------------------------------------- /packages/dom/src/text-position/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | export * from './describe.js'; 25 | export * from './match.js'; 26 | -------------------------------------------------------------------------------- /packages/dom/src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | export * from './css.js'; 25 | export * from './range/index.js'; 26 | export * from './text-quote/index.js'; 27 | export * from './text-position/index.js'; 28 | export * from './highlight-text.js'; 29 | -------------------------------------------------------------------------------- /DISCLAIMER-WIP: -------------------------------------------------------------------------------- 1 | Apache Annotator is an effort undergoing incubation at The Apache Software 2 | Foundation (ASF), sponsored by the name of Apache Incubator. Incubation is 3 | required of all newly accepted projects until a further review indicates that 4 | the infrastructure, communications, and decision making process have stabilized 5 | in a manner consistent with other successful ASF projects. While incubation 6 | status is not necessarily a reflection of the completeness or stability of the 7 | code, it does indicate that the project has yet to be fully endorsed by the ASF. 8 | 9 | Some of the incubating project’s releases may not be fully compliant with ASF 10 | policy. For example, releases may have incomplete or un-reviewed licensing 11 | conditions. What follows is a list of known issues the project is currently 12 | aware of (note that this list, by definition, is likely to be incomplete): 13 | 14 | If you are planning to incorporate this work into your product/project, please 15 | be aware that you will need to conduct a thorough licensing review to determine 16 | the overall implications of including this work. For the current status of this 17 | project through the Apache Incubator visit: 18 | https://incubator.apache.org/projects/annotator.html 19 | -------------------------------------------------------------------------------- /packages/selector/src/text/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | export * from './describe-text-quote.js'; 25 | export * from './match-text-quote.js'; 26 | export * from './describe-text-position.js'; 27 | export * from './match-text-position.js'; 28 | export * from './chunker.js'; 29 | -------------------------------------------------------------------------------- /packages/selector/src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | export type { 25 | Matcher, 26 | Selector, 27 | CssSelector, 28 | RangeSelector, 29 | TextPositionSelector, 30 | TextQuoteSelector, 31 | } from './types.js'; 32 | export * from './text/index.js'; 33 | export * from './refinable.js'; 34 | -------------------------------------------------------------------------------- /.mocharc.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | module.exports = { 25 | extension: ['.ts'], 26 | ignore: ['node_modules'], 27 | loader: 'babel-register-esm', 28 | require: ['global-jsdom/register'], 29 | timeout: 5000, 30 | watchFiles: [ 31 | './test/**/*.ts', 32 | './packages/*/src/**/*.ts', 33 | './packages/*/test/**/*.ts', 34 | ], 35 | }; 36 | -------------------------------------------------------------------------------- /packages/apache-annotator/src/dom.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /** 25 | * This module provides functions for handling annotations in the context of an 26 | * HTML DOM; in other words, a web page. 27 | * 28 | * The module’s main functionality is *matching* (or *‘anchoring’*) a {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#selectors 29 | * | Selector} to the DOM, i.e. finding which piece of a web page it refers to; 30 | * and, vice versa, *describing* a selection of the page as a Selector. 31 | * 32 | * @module 33 | */ 34 | 35 | export * from '@apache-annotator/dom'; 36 | -------------------------------------------------------------------------------- /packages/apache-annotator/src/selector.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /** 25 | * This module provides types and generic functions for handling {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#selectors 26 | * | Selector}s. 27 | * 28 | * Annotation tool developers should not need most of the functions contained 29 | * in this module, but would instead mainly use the module made for the specific 30 | * context (document type) they are dealing with. See {@link dom}, currently the 31 | * only such module. 32 | * 33 | * @module 34 | */ 35 | 36 | export * from '@apache-annotator/selector'; 37 | -------------------------------------------------------------------------------- /packages/dom/src/owner-document.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /** 25 | * Get the ownerDocument for either a range or a node. 26 | * 27 | * @param nodeOrRange the node or range for which to get the owner document. 28 | */ 29 | export function ownerDocument(nodeOrRange: Node | Range): Document { 30 | const node = isRange(nodeOrRange) ? nodeOrRange.startContainer : nodeOrRange; 31 | // node.ownerDocument is null iff node is itself a Document. 32 | return node.ownerDocument ?? (node as Document); 33 | } 34 | 35 | function isRange(nodeOrRange: Node | Range): nodeOrRange is Range { 36 | return 'startContainer' in nodeOrRange; 37 | } 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Apache Annotator](http://annotator.apache.org/) (incubating) [![Build Status](https://github.com/apache/incubator-annotator/actions/workflows/node.js.yml/badge.svg)](https://github.com/apache/incubator-annotator/actions/workflows/node.js.yml) 2 | 3 | Apache Annotator (incubating) provides libraries to enable annotation related 4 | software, with an initial focus on identification of textual fragments in 5 | browser environments. 6 | 7 | ## Installation, usage, API documentation 8 | 9 | See documentation on the website: 10 | 11 | ## How to build 12 | 13 | Building Annatator libraries requires [Node.JS](https://nodejs.org/) (>= 18). 14 | All other dependencies are automatically installed as part of the build. 15 | 16 | * `npm run build` -- builds the project 17 | * `npm test` -- runs the tests 18 | * `npm run start` -- starts the demo application 19 | 20 | ## Getting Involved 21 | 22 | * Join the [mailing list](http://mail-archives.apache.org/mod_mbox/incubator-annotator-dev/). Send an email to 23 | dev-subscribe@apache-annotator.apache.org to subscribe. 24 | * Browse the [issue tracker](https://github.com/apache/incubator-annotator/issues) and file new issues if you encounter problems. 25 | * Read or contribute to the [wiki](https://github.com/apache/incubator-annotator/wiki). 26 | 27 | # License 28 | 29 | This project is available as open source under the terms of the Apache 2.0 License. 30 | For accurate information, please check individual files. 31 | 32 | # Disclaimer 33 | 34 | Apache Annotator is currently undergoing incubation at The Apache Software 35 | Foundation. 36 | 37 | See the accompanying [DISCLAIMER](./DISCLAIMER-WIP) file for details. 38 | -------------------------------------------------------------------------------- /web/webpack.config.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | const path = require('path'); 25 | 26 | module.exports = { 27 | context: __dirname, 28 | entry: ['./index.html', './index.js', './style.css'], 29 | module: { 30 | rules: [ 31 | { 32 | test: /\.[jt]s$/, 33 | exclude: /node_modules/, 34 | use: 'babel-loader', 35 | }, 36 | { 37 | exclude: /\.[jt]s$/, 38 | use: [ 39 | { 40 | loader: 'file-loader', 41 | options: { 42 | name: '[path][name].[ext]', 43 | }, 44 | }, 45 | ], 46 | }, 47 | ], 48 | }, 49 | output: { 50 | // Note this directory is imported by the annotator website 51 | path: path.resolve(__dirname, 'dist'), 52 | }, 53 | }; 54 | -------------------------------------------------------------------------------- /packages/dom/src/to-range.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { ownerDocument } from './owner-document.js'; 25 | 26 | /** 27 | * Returns a range that exactly selects the contents of the given node. 28 | * 29 | * This function is idempotent: If the given argument is already a range, it 30 | * simply returns that range. 31 | * 32 | * @param nodeOrRange The node/range to convert to a range if it is not already 33 | * a range. 34 | */ 35 | export function toRange(nodeOrRange: Node | Range): Range { 36 | if (isRange(nodeOrRange)) { 37 | return nodeOrRange; 38 | } else { 39 | const node = nodeOrRange; 40 | const range = ownerDocument(node).createRange(); 41 | range.selectNodeContents(node); 42 | return range; 43 | } 44 | } 45 | 46 | function isRange(nodeOrRange: Node | Range): nodeOrRange is Range { 47 | return 'startContainer' in nodeOrRange; 48 | } 49 | -------------------------------------------------------------------------------- /web/style.css: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | body { 25 | font-family: system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, sans-serif; 26 | line-height: 1.5; 27 | margin: 2rem; 28 | } 29 | 30 | h1, h2, h3, h4, h5, h6 { 31 | font-weight: 500; 32 | } 33 | 34 | h1 { font-size: 1.4em; } 35 | h2 { font-size: 1.2em; } 36 | h3 { font-size: 1.1em; } 37 | 38 | header { 39 | border-bottom: 1px solid lightgrey; 40 | font-size: 0.8em; 41 | } 42 | 43 | nav { 44 | margin: 2rem; 45 | } 46 | 47 | main > *:not(.full-width) { 48 | max-width: 40rem; 49 | margin: 2rem auto; 50 | } 51 | 52 | blockquote { 53 | font-style: italic; 54 | } 55 | 56 | li { 57 | margin-top: 1em; 58 | } 59 | 60 | mark { 61 | background-color: rgba(255, 255, 0, 0.5); 62 | outline: 1px solid rgba(255, 100, 0, 0.8); 63 | } 64 | 65 | .columns { 66 | display: flex; 67 | flex-flow: row wrap; 68 | } 69 | 70 | .column { 71 | min-width: 10rem; 72 | width: 10rem; 73 | flex-grow: 1; 74 | margin: 1rem; 75 | } 76 | -------------------------------------------------------------------------------- /packages/dom/test/css/match-cases.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { CssSelector } from '@apache-annotator/selector'; 25 | 26 | export const testCases: { 27 | [name: string]: { 28 | html: string; 29 | selector: CssSelector; 30 | scopeXPath?: string; 31 | expected: string[]; 32 | }; 33 | } = { 34 | simple: { 35 | html: 'lorem ipsum dolor amet yada yada', 36 | selector: { 37 | type: 'CssSelector', 38 | value: 'i:nth-child(2)', 39 | }, 40 | expected: ['//b/i[2]'], 41 | }, 42 | 'multiple matches': { 43 | html: 'lorem ipsum dolor amet yada yada', 44 | selector: { 45 | type: 'CssSelector', 46 | value: 'i', 47 | }, 48 | expected: ['//b/i[1]', '//b/i[2]', '//b/i[3]'], 49 | }, 50 | 'with scope': { 51 | html: 'lorem ipsum dolor amet yada yada', 52 | selector: { 53 | type: 'CssSelector', 54 | value: 'i', 55 | }, 56 | scopeXPath: '//u', 57 | expected: ['//u/i[1]', '//u/i[2]'], 58 | }, 59 | }; 60 | -------------------------------------------------------------------------------- /packages/dom/test/text-position/describe.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import { describeTextPosition } from '../../src/text-position/describe.js'; 26 | import { hydrateRange } from '../utils.js'; 27 | import { testCases } from './match-cases.js'; 28 | 29 | const domParser = new DOMParser(); 30 | 31 | describe('createTextPositionSelectorMatcher', () => { 32 | describe('inverts test cases of text position matcher', () => { 33 | for (const [name, { html, selector, expected }] of Object.entries( 34 | testCases, 35 | )) { 36 | const range = expected[0]; 37 | it(`case: '${name}'`, async () => { 38 | const doc = domParser.parseFromString(html, 'text/html'); 39 | const result = await describeTextPosition( 40 | hydrateRange(range, doc), 41 | doc, 42 | ); 43 | assert.deepEqual(result, selector); 44 | }); 45 | } 46 | }); 47 | 48 | it('works with a scope', () => { 49 | // TODO 50 | }); 51 | 52 | it('works with split text nodes', () => { 53 | // TODO 54 | }); 55 | 56 | it('works with code points split across text nodes', () => { 57 | // TODO 58 | }); 59 | }); 60 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | # use this file except in compliance with the License. You may obtain a copy of 3 | # the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | # License for the specific language governing permissions and limitations under 11 | # the License. 12 | # 13 | # SPDX-FileCopyrightText: The Apache Software Foundation 14 | # SPDX-License-Identifier: Apache-2.0 15 | 16 | # What is the prerelease version? 17 | vsn_pre = $(shell git describe --tags --always --first-parent \ 18 | | grep -Eo -- '(-rc\.[0-9]+)?$$' \ 19 | 2>/dev/null) 20 | 21 | # What is the release version? 22 | vsn_rel = $(shell git describe --tags --always --first-parent \ 23 | | grep -Eo -- '^v[0-9]+\.[0-9]\.[0-9]+' \ 24 | | tail -c +2 \ 25 | 2>/dev/null) 26 | 27 | # What is the release tag? 28 | vsn_tag = $(shell git describe --tags --always --first-parent \ 29 | | grep -Eo -- '^v[0-9]+\.[0-9]\.[0-9]+(-rc.[0-9]+)?$$' \ 30 | 2>/dev/null) 31 | 32 | distdir = apache-annotator-$(vsn_rel)-incubating 33 | disttar = apache-annotator-$(vsn_rel)$(vsn_pre)-incubating.tar.gz 34 | 35 | .PHONY: all 36 | all: build 37 | 38 | .PHONY: build 39 | build: 40 | @npm install 41 | 42 | .PHONY: clean 43 | clean: 44 | @npm run clean 45 | 46 | .PHONY: check 47 | check: lint test 48 | 49 | .PHONY: lint 50 | lint: build 51 | @npm run lint 52 | 53 | .PHONY: test 54 | test: build 55 | @npm run test 56 | 57 | ifeq ($(vsn_tag),) 58 | 59 | .PHONY: dist 60 | dist: 61 | $(error No tag found for release) 62 | 63 | else 64 | 65 | .PHONY: dist 66 | dist: 67 | @rm -rf $(distdir) 68 | @git archive --output $(disttar) --prefix $(distdir)/ $(vsn_tag) 69 | @echo "Done: $(disttar)" 70 | 71 | endif 72 | 73 | .PHONY: distcheck 74 | distcheck: dist 75 | @tar xzf $(disttar) 76 | @make -C $(distdir) check 77 | 78 | .PHONY: distsign 79 | distsign: dist 80 | @gpg -ab $(disttar) 81 | @sha256sum $(disttar) > $(disttar).sha256 82 | @sha512sum $(disttar) > $(disttar).sha512 83 | -------------------------------------------------------------------------------- /packages/dom/test/css/match.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import type { CssSelector } from '@apache-annotator/selector'; 26 | import { createCssSelectorMatcher } from '../../src/css.js'; 27 | import { evaluateXPath } from '../utils.js'; 28 | import { testCases } from './match-cases.js'; 29 | 30 | const domParser = new DOMParser(); 31 | 32 | describe('CreateCssSelectorMatcher', () => { 33 | for (const [name, { html, selector, scopeXPath, expected }] of Object.entries( 34 | testCases, 35 | )) { 36 | it(`works for case: '${name}'`, async () => { 37 | const doc = domParser.parseFromString(html, 'text/html'); 38 | 39 | const scopeElement = scopeXPath ? evaluateXPath(doc, scopeXPath) : doc; 40 | const scope = doc.createRange(); 41 | scope.selectNodeContents(scopeElement); 42 | 43 | await testMatcher(doc, scope, selector, expected); 44 | }); 45 | } 46 | }); 47 | 48 | async function testMatcher( 49 | doc: Document, 50 | scope: Range, 51 | selector: CssSelector, 52 | expected: string[], 53 | ) { 54 | const matcher = createCssSelectorMatcher(selector); 55 | const matches = []; 56 | for await (const value of matcher(scope)) matches.push(value); 57 | assert.equal(matches.length, expected.length, 'Unexpected number of matches'); 58 | matches.forEach((match, i) => { 59 | const expectedElement = evaluateXPath(doc, expected[i]); 60 | assert.equal(match, expectedElement); 61 | }); 62 | } 63 | -------------------------------------------------------------------------------- /packages/dom/test/css/describe.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import { describeCss } from '../../src/css.js'; 26 | import { evaluateXPath } from '../utils.js'; 27 | import { testCases } from './match-cases.js'; 28 | 29 | const domParser = new DOMParser(); 30 | 31 | describe('describeCss', () => { 32 | describe('inverts test cases of css matcher', () => { 33 | for (const [name, { html, scopeXPath, expected }] of Object.entries( 34 | testCases, 35 | )) { 36 | for (let i = 0; i < expected.length; i++) { 37 | const elementXPath = expected[i]; 38 | it(`case: '${name}' (${i + 1}/${expected.length})`, async () => { 39 | const doc = domParser.parseFromString(html, 'text/html'); 40 | const element = evaluateXPath(doc, elementXPath) as HTMLElement; 41 | const scopeElement = scopeXPath 42 | ? (evaluateXPath(doc, scopeXPath) as HTMLElement) 43 | : undefined; 44 | const cssSelector = await describeCss(element, scopeElement); 45 | 46 | // We do not require a specific value for the selector, just 47 | // that it uniquely matches the same element again. 48 | const matchingElements = (scopeElement ?? doc).querySelectorAll( 49 | cssSelector.value, 50 | ); 51 | assert.equal( 52 | matchingElements.length, 53 | 1, 54 | 'Expected a selector with a single match', 55 | ); 56 | assert.equal(matchingElements[0], element); 57 | }); 58 | } 59 | } 60 | }); 61 | }); 62 | -------------------------------------------------------------------------------- /packages/dom/test/range/cartesian.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import { cartesian } from '../../src/range/cartesian.js'; 26 | 27 | async function* gen1() { 28 | yield 1; 29 | yield Promise.resolve(2); 30 | yield 3; 31 | } 32 | 33 | async function* gen2() { 34 | yield 4; 35 | } 36 | 37 | async function* gen3() { 38 | yield 5; 39 | yield 6; 40 | } 41 | 42 | describe('cartesian', () => { 43 | it('yields the cartesian product of the yielded items', async () => { 44 | const cart = cartesian(gen1(), gen2(), gen3()); 45 | 46 | const expected = [ 47 | [1, 4, 5], 48 | [2, 4, 5], 49 | [3, 4, 5], 50 | [1, 4, 6], 51 | [2, 4, 6], 52 | [3, 4, 6], 53 | ]; 54 | 55 | const actual: number[][] = []; 56 | for await (const value of cart) { 57 | actual.push(value); 58 | } 59 | 60 | assert.deepEqual(actual, expected, 'yields the expected items'); 61 | }); 62 | 63 | it('re-raises exceptions and closes iterators', async () => { 64 | let didClose = false; 65 | const error = new Error(); 66 | 67 | async function* throws() { 68 | yield 1; 69 | throw error; 70 | } 71 | 72 | async function* works() { 73 | try { 74 | yield 2; 75 | yield 3; 76 | } finally { 77 | didClose = true; 78 | } 79 | } 80 | 81 | try { 82 | // eslint-disable-next-line 83 | const cart = cartesian(throws(), works()); 84 | await cart.next(); 85 | await cart.next(); 86 | } catch (e) { 87 | assert.strictEqual(error, e, 're-raises an error from an iterable'); 88 | assert.strictEqual(didClose, true, 'closes the iterators'); 89 | } 90 | }); 91 | }); 92 | -------------------------------------------------------------------------------- /packages/selector/src/text/describe-text-position.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { TextPositionSelector } from '../types.js'; 25 | import type { Chunk, Chunker, ChunkRange } from './chunker.js'; 26 | import { CodePointSeeker } from './code-point-seeker.js'; 27 | import { TextSeeker } from './seeker.js'; 28 | 29 | /** 30 | * Returns a {@link TextPositionSelector} that points at the target text within 31 | * the given scope. 32 | * 33 | * This is an abstract implementation of the function’s logic, which expects a 34 | * generic {@link Chunker} to represent the text, and a {@link ChunkRange} to 35 | * represent the target. 36 | * 37 | * See {@link dom.describeTextPosition} for a wrapper around 38 | * this implementation which applies it to the text of an HTML DOM. 39 | * 40 | * @param target - The range of characters that the selector should describe 41 | * @param scope - The text, presented as a {@link Chunker}, which contains the 42 | * target range, and relative to which its position will be measured 43 | * @returns The {@link TextPositionSelector} that describes `target` relative 44 | * to `scope` 45 | * 46 | * @public 47 | */ 48 | export async function describeTextPosition>( 49 | target: ChunkRange, 50 | scope: Chunker, 51 | ): Promise { 52 | const codeUnitSeeker = new TextSeeker(scope); 53 | const codePointSeeker = new CodePointSeeker(codeUnitSeeker); 54 | 55 | codePointSeeker.seekToChunk(target.startChunk, target.startIndex); 56 | const start = codePointSeeker.position; 57 | codePointSeeker.seekToChunk(target.endChunk, target.endIndex); 58 | const end = codePointSeeker.position; 59 | return { 60 | type: 'TextPositionSelector', 61 | start, 62 | end, 63 | }; 64 | } 65 | -------------------------------------------------------------------------------- /packages/dom/src/text-position/match.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { Matcher, TextPositionSelector } from '@apache-annotator/selector'; 25 | import { textPositionSelectorMatcher as abstractTextPositionSelectorMatcher } from '@apache-annotator/selector'; 26 | import { TextNodeChunker } from '../text-node-chunker.js'; 27 | 28 | /** 29 | * Find the range of text corresponding to the given {@link 30 | * TextPositionSelector}. 31 | * 32 | * The start and end positions are measured relative to the first text character 33 | * in the given scope. 34 | * 35 | * The function is curried, taking first the selector and then the scope. 36 | * 37 | * Its end result is an (async) generator producing a single {@link https://developer.mozilla.org/en-US/docs/Web/API/Range 38 | * | Range} to represent the match (unlike e.g. a {@link TextQuoteSelector}, a 39 | * TextPositionSelector cannot have multiple matches). 40 | * 41 | * @example 42 | * ``` 43 | * const selector = { type: 'TextPositionSelector', start: 702, end: 736 }; 44 | * const scope = document.body; 45 | * const matches = textQuoteSelectorMatcher(selector)(scope); 46 | * const match = (await matches.next()).value; 47 | * // ⇒ Range { startContainer: #text, startOffset: 64, endContainer: #text, 48 | * // endOffset: 98, … } 49 | * ``` 50 | * 51 | * @param selector - The {@link TextPositionSelector} to be anchored. 52 | * @returns A {@link Matcher} function that applies `selector` within a given 53 | * `scope`. 54 | * 55 | * @public 56 | */ 57 | export function createTextPositionSelectorMatcher( 58 | selector: TextPositionSelector, 59 | ): Matcher { 60 | const abstractMatcher = abstractTextPositionSelectorMatcher(selector); 61 | 62 | return async function* matchAll(scope) { 63 | const textChunks = new TextNodeChunker(scope); 64 | 65 | const matches = abstractMatcher(textChunks); 66 | 67 | for await (const abstractMatch of matches) { 68 | yield textChunks.chunkRangeToRange(abstractMatch); 69 | } 70 | }; 71 | } 72 | -------------------------------------------------------------------------------- /packages/dom/src/text-position/describe.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { TextPositionSelector } from '@apache-annotator/selector'; 25 | import { describeTextPosition as abstractDescribeTextPosition } from '@apache-annotator/selector'; 26 | import { ownerDocument } from '../owner-document.js'; 27 | import { TextNodeChunker } from '../text-node-chunker.js'; 28 | import { toRange } from '../to-range.js'; 29 | 30 | /** 31 | * Returns a {@link TextPositionSelector} that points at the target text within 32 | * the given scope. 33 | * 34 | * When no scope is given, the position is described relative to the document 35 | * as a whole. Note this means all the characters in all Text nodes are counted 36 | * to determine the target’s position, including those in the `` and 37 | * whitespace, hence even a minor modification could make the selector point to 38 | * a different text than its original target. 39 | * 40 | * @example 41 | * ``` 42 | * const target = window.getSelection().getRangeAt(0); 43 | * const selector = await describeTextPosition(target); 44 | * console.log(selector); 45 | * // { 46 | * // type: 'TextPositionSelector', 47 | * // start: 702, 48 | * // end: 736 49 | * // } 50 | * ``` 51 | * 52 | * @param range - The {@link https://developer.mozilla.org/en-US/docs/Web/API/Range 53 | * | Range} whose text content will be described. 54 | * @param scope - A Node or Range that serves as the ‘document’ for purposes of 55 | * finding occurrences and determining prefix and suffix. Defaults to the full 56 | * Document that contains `range`. 57 | * @returns The selector describing `range` within `scope`. 58 | * 59 | * @public 60 | */ 61 | export async function describeTextPosition( 62 | range: Range, 63 | scope?: Node | Range, 64 | ): Promise { 65 | scope = toRange(scope ?? ownerDocument(range)); 66 | 67 | const textChunks = new TextNodeChunker(scope); 68 | if (textChunks.currentChunk === null) 69 | throw new RangeError('Scope does not contain any Text nodes.'); 70 | 71 | return await abstractDescribeTextPosition( 72 | textChunks.rangeToChunkRange(range), 73 | textChunks, 74 | ); 75 | } 76 | -------------------------------------------------------------------------------- /packages/selector/src/text/match-text-position.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { TextPositionSelector } from '../types.js'; 25 | import type { Chunk, ChunkRange, Chunker } from './chunker.js'; 26 | import { CodePointSeeker } from './code-point-seeker.js'; 27 | import { TextSeeker } from './seeker.js'; 28 | 29 | /** 30 | * Find the range of text corresponding to the given {@link TextPositionSelector}. 31 | * 32 | * This is an abstract implementation of the function’s logic, which expects a 33 | * generic {@link Chunker} to represent the text, and returns an (async) 34 | * generator producing a single {@link ChunkRange} to represent the match. 35 | * (unlike e.g. TextQuoteSelector, it cannot result in multiple matches). 36 | * 37 | * See {@link dom.createTextPositionSelectorMatcher} for a 38 | * wrapper around this implementation which applies it to the text of an HTML 39 | * DOM. 40 | * 41 | * The function is curried, taking first the selector and then the text. 42 | * 43 | * @example 44 | * ``` 45 | * const selector = { type: 'TextPositionSelector', start: 702, end: 736 }; 46 | * const matches = textPositionSelectorMatcher(selector)(textChunks); 47 | * const match = (await matches.next()).value; 48 | * console.log(match); 49 | * // ⇒ { startChunk: { … }, startIndex: 64, endChunk: { … }, endIndex: 98 } 50 | * ``` 51 | * 52 | * @param selector - the {@link TextPositionSelector} to be anchored 53 | * @returns a {@link Matcher} function that applies `selector` to a given text 54 | * 55 | * @public 56 | */ 57 | export function textPositionSelectorMatcher( 58 | selector: TextPositionSelector, 59 | ): >( 60 | scope: Chunker, 61 | ) => AsyncGenerator, void, void> { 62 | const { start, end } = selector; 63 | 64 | return async function* matchAll>( 65 | textChunks: Chunker, 66 | ) { 67 | const codeUnitSeeker = new TextSeeker(textChunks); 68 | const codePointSeeker = new CodePointSeeker(codeUnitSeeker); 69 | 70 | codePointSeeker.seekTo(start); 71 | const startChunk = codeUnitSeeker.currentChunk; 72 | const startIndex = codeUnitSeeker.offsetInChunk; 73 | codePointSeeker.seekTo(end); 74 | const endChunk = codeUnitSeeker.currentChunk; 75 | const endIndex = codeUnitSeeker.offsetInChunk; 76 | 77 | yield { startChunk, startIndex, endChunk, endIndex }; 78 | }; 79 | } 80 | -------------------------------------------------------------------------------- /packages/dom/src/text-quote/describe.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { 25 | TextQuoteSelector, 26 | DescribeTextQuoteOptions, 27 | } from '@apache-annotator/selector'; 28 | import { describeTextQuote as abstractDescribeTextQuote } from '@apache-annotator/selector'; 29 | import { ownerDocument } from '../owner-document.js'; 30 | import { TextNodeChunker } from '../text-node-chunker.js'; 31 | import { toRange } from '../to-range.js'; 32 | 33 | /** 34 | * Returns a {@link TextQuoteSelector} that unambiguously describes the given 35 | * range of text, within the given scope. 36 | * 37 | * The selector will contain the *exact* target quote, and in case this quote 38 | * appears multiple times in the text, sufficient context around the quote will 39 | * be included in the selector’s *prefix* and *suffix* attributes to 40 | * disambiguate. By default, more prefix and suffix are included than strictly 41 | * required; both in order to be robust against slight modifications, and in an 42 | * attempt to not end halfway a word (mainly for the sake of human readability). 43 | * 44 | * @example 45 | * ``` 46 | * const target = window.getSelection().getRangeAt(0); 47 | * const selector = await describeTextQuote(target); 48 | * console.log(selector); 49 | * // { 50 | * // type: 'TextQuoteSelector', 51 | * // exact: 'ipsum', 52 | * // prefix: 'Lorem ', 53 | * // suffix: ' dolor' 54 | * // } 55 | * ``` 56 | * 57 | * @param range - The {@link https://developer.mozilla.org/en-US/docs/Web/API/Range 58 | * | Range} whose text content will be described 59 | * @param scope - A Node or Range that serves as the ‘document’ for purposes of 60 | * finding occurrences and determining prefix and suffix. Defaults to the full 61 | * Document that contains `range`. 62 | * @param options - Options to fine-tune the function’s behaviour. 63 | * @returns The selector unambiguously describing `range` within `scope`. 64 | * 65 | * @public 66 | */ 67 | export async function describeTextQuote( 68 | range: Range, 69 | scope?: Node | Range, 70 | options: DescribeTextQuoteOptions = {}, 71 | ): Promise { 72 | const scopeAsRange = toRange(scope ?? ownerDocument(range)); 73 | 74 | const chunker = new TextNodeChunker(scopeAsRange); 75 | 76 | return await abstractDescribeTextQuote( 77 | chunker.rangeToChunkRange(range), 78 | () => new TextNodeChunker(scopeAsRange), 79 | options, 80 | ); 81 | } 82 | -------------------------------------------------------------------------------- /packages/selector/src/refinable.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { Matcher, Selector } from './types.js'; 25 | 26 | /** 27 | * A Refinable selector can have the `refinedBy` attribute, whose value must be 28 | * of the same type (possibly again refined, recursively). 29 | * 30 | * See {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#refinement-of-selection 31 | * | §4.2.9 Refinement of Selection} in the Web Annotation Data Model. 32 | * 33 | * @example 34 | * Example value of type `Refinable`: 35 | * 36 | * { 37 | * type: "CssSelector", 38 | * …, 39 | * refinedBy: { 40 | * type: "TextQuoteSelector", 41 | * …, 42 | * refinedBy: { … }, // again either a CssSelector or TextQuoteSelector 43 | * } 44 | * } 45 | */ 46 | export type Refinable = T & { refinedBy?: Refinable }; 47 | 48 | /** 49 | * Wrap a matcher creation function so that it supports refinement of selection. 50 | * 51 | * See {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#refinement-of-selection 52 | * | §4.2.9 Refinement of Selection} in the Web Annotation Data Model. 53 | * 54 | * @param matcherCreator - The function to wrap; it will be executed both for 55 | * {@link Selector}s passed to the returned wrapper function, and for any 56 | * refining Selector those might contain (and any refinement of that, etc.). 57 | * 58 | * @public 59 | */ 60 | export function makeRefinable< 61 | TSelector extends Selector, 62 | TScope, 63 | // To enable refinement, the implementation’s Match object must be usable as a 64 | // Scope object itself. 65 | TMatch extends TScope 66 | >( 67 | matcherCreator: (selector: Refinable) => Matcher, 68 | ): (selector: Refinable) => Matcher { 69 | return function createMatcherWithRefinement( 70 | sourceSelector: Refinable, 71 | ): Matcher { 72 | const matcher = matcherCreator(sourceSelector); 73 | 74 | if (sourceSelector.refinedBy) { 75 | const refiningSelector = createMatcherWithRefinement( 76 | sourceSelector.refinedBy, 77 | ); 78 | 79 | return async function* matchAll(scope) { 80 | for await (const match of matcher(scope)) { 81 | yield* refiningSelector(match); 82 | } 83 | }; 84 | } 85 | 86 | return matcher; 87 | }; 88 | } 89 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "annotator", 3 | "private": true, 4 | "description": "Apache Annotator provides annotation enabling code for browsers, servers, and humans.", 5 | "homepage": "https://annotator.apache.org", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/apache/incubator-annotator.git" 9 | }, 10 | "license": "Apache-2.0", 11 | "author": "Apache Software Foundation", 12 | "workspaces": [ 13 | "packages/*" 14 | ], 15 | "scripts": { 16 | "build": "concurrently npm:build:*", 17 | "build:js": "lerna exec --parallel -- babel -d lib -s -x .ts --env-name production --root-mode upward src", 18 | "build:misc": "lerna exec --parallel -- shx cp ../../DISCLAIMER-WIP ../../LICENSE ../../NOTICE ../../README.md .", 19 | "build:types": "tsc --build", 20 | "clean": "tsc --build --clean && lerna exec -- shx rm -rf DISCLAIMER-WIP LICENSE NOTICE README.md coverage docs lib web/dist", 21 | "docs": "tsc --build && typedoc", 22 | "lint": "eslint .", 23 | "prepare": "is-ci || shx test -d .git || exit 0 && husky install", 24 | "prepublishOnly": "npm run build", 25 | "publish": "lerna publish", 26 | "publish:ci": "npm run publish --canary --exact --force-publish '*' --no-verify-access --yes minor", 27 | "start": "npm run web:server", 28 | "test": "cross-env BABEL_ENV=test c8 -a -r html -r text mocha packages/**/*.test.ts", 29 | "test:watch": "cross-env BABEL_ENV=test mocha -p -w packages/**/*.test.ts", 30 | "validate": "cross-env BABEL_ENV=test mocha test/**/*.test.ts", 31 | "web:build": "webpack --config=web/webpack.config.js --mode development", 32 | "web:server": "webpack-dev-server --config=web/webpack.config.js --hot --mode development" 33 | }, 34 | "devDependencies": { 35 | "@babel/cli": "^7.13.14", 36 | "@babel/core": "^7.13.14", 37 | "@babel/plugin-proposal-class-properties": "^7.13.0", 38 | "@babel/plugin-transform-runtime": "^7.13.10", 39 | "@babel/preset-env": "^7.13.12", 40 | "@babel/preset-typescript": "^7.13.0", 41 | "@types/mocha": "^10.0.2", 42 | "@types/node-fetch": "^2.6.6", 43 | "@types/resolve": "^1.20.3", 44 | "@typescript-eslint/eslint-plugin": "^6.7.3", 45 | "@typescript-eslint/parser": "^6.7.3", 46 | "ajv": "^8.12.0", 47 | "babel-loader": "^9.1.3", 48 | "babel-plugin-istanbul": "^6.1.1", 49 | "babel-plugin-module-resolver": "^5.0.0", 50 | "babel-plugin-preserve-comment-header": "^1.0.1", 51 | "babel-register-esm": "^1.2.5", 52 | "c8": "^8.0.1", 53 | "concurrently": "^8.2.1", 54 | "cross-env": "^7.0.3", 55 | "eslint": "^8.50.0", 56 | "eslint-config-prettier": "^9.0.0", 57 | "eslint-import-resolver-babel-module": "^5.3.2", 58 | "eslint-plugin-import": "^2.28.1", 59 | "eslint-plugin-node": "^11.1.0", 60 | "file-loader": "^6.2.0", 61 | "global-jsdom": "^9.1.0", 62 | "husky": "^8.0.3", 63 | "is-ci": "^3.0.1", 64 | "jsdom": "^22.1.0", 65 | "lerna": "^7.3.0", 66 | "lint-staged": "^14.0.1", 67 | "mocha": "^10.2.0", 68 | "node-fetch": "^3.3.2", 69 | "prettier": "^3.0.3", 70 | "resolve": "^1.22.6", 71 | "shx": "^0.3.4", 72 | "typedoc": "^0.25.1", 73 | "typescript": "^5.2.2", 74 | "web-annotation-tests": "https://github.com/w3c/web-annotation-tests", 75 | "webpack": "^5.88.2", 76 | "webpack-cli": "^5.1.4", 77 | "webpack-dev-server": "^4.15.1" 78 | }, 79 | "engines": { 80 | "node": ">=18" 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /packages/dom/src/range/cartesian.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /** 25 | * Generates the Cartesian product of the sets generated by the given iterables. 26 | * 27 | * 𝑆₁ × ... × 𝑆ₙ = { (𝑒₁,...,𝑒ₙ) | 𝑒ᵢ ∈ 𝑆ᵢ } 28 | */ 29 | export async function* cartesian( 30 | ...iterables: (Iterable | AsyncIterable)[] 31 | ): AsyncGenerator { 32 | // Create iterators for traversing each iterable and tagging every value 33 | // with the index of its source iterable. 34 | const iterators = iterables.map((iterable, index) => { 35 | const generator = async function* () { 36 | for await (const value of iterable) { 37 | yield { index, value }; 38 | } 39 | return { index }; 40 | }; 41 | return generator(); 42 | }); 43 | 44 | try { 45 | // Track the number of non-exhausted iterators. 46 | let active = iterators.length; 47 | 48 | // Track all the values of each iterator in a log. 49 | const logs = iterators.map(() => []) as T[][]; 50 | 51 | // Track the promise of the next value of each iterator. 52 | const nexts = iterators.map((it) => it.next()); 53 | 54 | // Iterate the values of all the iterators in parallel and yield tuples from 55 | // the partial product of each new value and the existing logs of the other 56 | // iterators. 57 | while (active) { 58 | // Wait for the next result. 59 | const result = await Promise.race(nexts); 60 | const { index } = result.value; 61 | 62 | // If the iterator has exhausted all the values, set the promise 63 | // of its next value to never resolve. 64 | if (result.done) { 65 | active--; 66 | nexts[index] = new Promise(() => undefined); 67 | continue; 68 | } 69 | 70 | // Append the new value to the log. 71 | const { value } = result.value; 72 | logs[index].push(value); 73 | 74 | // Record the promise of the next value. 75 | nexts[index] = iterators[index].next(); 76 | 77 | // Create a scratch input for computing a partial product. 78 | const scratch = [...logs]; 79 | scratch[index] = [value]; 80 | 81 | // Synchronously compute and yield tuples of the partial product. 82 | yield* scratch.reduce( 83 | (acc, next) => acc.flatMap((v) => next.map((w) => [...v, w])), 84 | [[]] as T[][], 85 | ); 86 | } 87 | } finally { 88 | const closeAll = iterators.map((it, index) => it.return({ index })); 89 | await Promise.all(closeAll); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | const path = require('path'); 25 | const { resolvePath } = require('babel-plugin-module-resolver'); 26 | 27 | const packagePath = path.join(__dirname, 'packages'); 28 | 29 | module.exports = (api) => { 30 | const ENV = api.env(); 31 | const DEV = ENV === 'development'; 32 | const TEST = ENV === 'test'; 33 | 34 | // Options for the @babel/env preset. 35 | const envOptions = { 36 | // Use minimal syntax fixes where possible 37 | // Note: This setting may become the default in Babel 8. 38 | bugfixes: true, 39 | // Do not transform module syntax. 40 | modules: false, 41 | }; 42 | 43 | // Options for the @babel/typescript preset. 44 | const typescriptOptions = { 45 | // Opt in to a Babel 8 default. 46 | allowDeclareFields: true, 47 | // Be explicit about type-only imports. 48 | onlyRemoveTypeImports: true, 49 | }; 50 | 51 | // Options for the module-resolver plugin. 52 | // Used for resolving source files during development. 53 | const resolverOptions = { 54 | ...(DEV || TEST 55 | ? { 56 | alias: { 57 | '^@apache-annotator/([^/]+)$': ([, name]) => 58 | path.join(packagePath, name, 'src', 'index.ts'), 59 | }, 60 | resolvePath(sourcePath, currentFile, opts) { 61 | if ( 62 | currentFile.startsWith(packagePath) && 63 | currentFile.endsWith('.ts') && 64 | sourcePath.startsWith('.') && 65 | sourcePath.endsWith('.js') 66 | ) { 67 | return sourcePath.replace(/\.js$/, '.ts'); 68 | } 69 | return resolvePath(sourcePath, currentFile, opts); 70 | }, 71 | } 72 | : null), 73 | }; 74 | 75 | // Options for the @babel/transform-runtime plugin. 76 | const runtimeOptions = { 77 | // Use corejs version 3. 78 | corejs: { version: 3, proposals: true }, 79 | }; 80 | 81 | return { 82 | plugins: [ 83 | '@babel/plugin-proposal-class-properties', 84 | ['@babel/transform-runtime', runtimeOptions], 85 | ['module-resolver', resolverOptions], 86 | 'preserve-comment-header', 87 | ...(TEST ? ['istanbul'] : []), 88 | ], 89 | presets: [ 90 | ['@babel/env', envOptions], 91 | ['@babel/typescript', typescriptOptions], 92 | ], 93 | targets: { 94 | browsers: 'defaults', 95 | esmodules: true, 96 | node: TEST ? 'current' : '14.15', 97 | }, 98 | }; 99 | }; 100 | -------------------------------------------------------------------------------- /test/data-model.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /* global process */ 25 | 26 | import { strict as assert } from 'assert'; 27 | import fs from 'fs'; 28 | import { URL } from 'url'; 29 | import Ajv from 'ajv'; 30 | import fetch from 'node-fetch'; 31 | import resolve from 'resolve'; 32 | 33 | // file or URL location 34 | let url = ''; 35 | // find the URL parameter (which might be a relative path to a file) 36 | let found_url = false; 37 | process.argv.forEach((val, index) => { 38 | if (val.startsWith('--url')) { 39 | found_url = true; 40 | if (val[5] === '=') { 41 | url = val.split('=')[1]; 42 | } else { 43 | // assume the next parameter is a URL 44 | url = process.argv[index + 1]; 45 | } 46 | } 47 | }); 48 | 49 | function requireJSON(name: string): Record { 50 | const resolvedPath = resolve.sync(name); 51 | const data = fs.readFileSync(resolvedPath).toString(); 52 | return JSON.parse(data) as Record; 53 | } 54 | 55 | const DEFINITIONS = [ 56 | 'annotations', 57 | 'bodyTarget', 58 | 'choiceSet', 59 | 'collections', 60 | 'id', 61 | 'otherProperties', 62 | 'specificResource', 63 | ].map((name) => requireJSON(`web-annotation-tests/definitions/${name}.json`)); 64 | 65 | const MUSTS = requireJSON( 66 | 'web-annotation-tests/annotations/annotationMusts.test', 67 | ); 68 | 69 | const META_SCHEMA = requireJSON('ajv/lib/refs/json-schema-draft-04.json'); 70 | 71 | const ajv = new Ajv({ meta: false }); 72 | ajv.addMetaSchema(META_SCHEMA); 73 | DEFINITIONS.forEach((schema) => ajv.addSchema(schema)); 74 | 75 | describe('Test JSON against Schemas', () => { 76 | let data: Record; 77 | 78 | before(async function () { 79 | if (!found_url) { 80 | this.skip(); 81 | } else { 82 | // load the data from the file or URL 83 | const url_parsed = new URL(url); 84 | if (url_parsed.pathname !== url_parsed.href) { 85 | const data_response = await fetch(url_parsed.href); 86 | data = (await data_response.json()) as Record; 87 | } else { 88 | // assume we have a local file and use that 89 | data = JSON.parse( 90 | fs.readFileSync(url_parsed.pathname, 'utf8'), 91 | ) as Record; 92 | } 93 | 94 | if (!data) { 95 | this.skip(); 96 | } 97 | } 98 | }); 99 | 100 | const assertions = MUSTS['assertions'] as [string]; 101 | assertions.forEach((schemaPath: string) => { 102 | const schema = requireJSON(`web-annotation-tests/${schemaPath}`); 103 | it(schema['title'] as string, () => { 104 | const valid = ajv.validate(schema, data); 105 | assert.ok(valid, ajv.errorsText()); 106 | }); 107 | }); 108 | }); 109 | -------------------------------------------------------------------------------- /packages/dom/src/text-quote/match.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { Matcher, TextQuoteSelector } from '@apache-annotator/selector'; 25 | import { textQuoteSelectorMatcher as abstractTextQuoteSelectorMatcher } from '@apache-annotator/selector'; 26 | import { TextNodeChunker, EmptyScopeError } from '../text-node-chunker.js'; 27 | 28 | /** 29 | * Find occurrences in a text matching the given {@link 30 | * TextQuoteSelector}. 31 | * 32 | * This performs an exact search for the selector’s quote (including prefix and 33 | * suffix) within the text contained in the given scope (a {@link 34 | * https://developer.mozilla.org/en-US/docs/Web/API/Range | Range}). 35 | * 36 | * Note the match is based on strict character-by-character equivalence, i.e. 37 | * it is sensitive to whitespace, capitalisation, etc. 38 | * 39 | * The function is curried, taking first the selector and then the scope. 40 | * 41 | * As there may be multiple matches for a given selector (when its prefix and 42 | * suffix attributes are not sufficient to disambiguate it), the matcher will 43 | * return an (async) generator that produces each match in the order they are 44 | * found in the text. 45 | * 46 | * *XXX Modifying the DOM (e.g. to highlight the text) while the search is still 47 | * running can mess up and result in an error or an infinite loop. See [issue 48 | * #112](https://github.com/apache/incubator-annotator/issues/112).* 49 | * 50 | * @example 51 | * ``` 52 | * // Find the word ‘banana’. 53 | * const selector = { type: 'TextQuoteSelector', exact: 'banana' }; 54 | * const scope = document.body; 55 | * 56 | * // Read all matches. 57 | * const matches = textQuoteSelectorMatcher(selector)(scope); 58 | * for await (match of matches) console.log(match); 59 | * // ⇒ Range { startContainer: #text, startOffset: 187, endContainer: #text, 60 | * // endOffset: 193, … } 61 | * // ⇒ Range { startContainer: #text, startOffset: 631, endContainer: #text, 62 | * // endOffset: 637, … } 63 | * ``` 64 | * 65 | * @param selector - The {@link TextQuoteSelector} to be anchored. 66 | * @returns A {@link Matcher} function that applies `selector` within a given 67 | * `scope`. 68 | * 69 | * @public 70 | */ 71 | export function createTextQuoteSelectorMatcher( 72 | selector: TextQuoteSelector, 73 | ): Matcher { 74 | const abstractMatcher = abstractTextQuoteSelectorMatcher(selector); 75 | 76 | return async function* matchAll(scope) { 77 | let textChunks; 78 | try { 79 | textChunks = new TextNodeChunker(scope); 80 | } catch (err) { 81 | // An empty range contains no matches. 82 | if (err instanceof EmptyScopeError) return; 83 | else throw err; 84 | } 85 | 86 | for await (const abstractMatch of abstractMatcher(textChunks)) { 87 | yield textChunks.chunkRangeToRange(abstractMatch); 88 | } 89 | }; 90 | } 91 | -------------------------------------------------------------------------------- /packages/selector/src/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /** 25 | * A {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#selectors 26 | * | Selector} object of the Web Annotation Data Model. 27 | * 28 | * Corresponds to RDF class {@link http://www.w3.org/ns/oa#Selector} 29 | * 30 | * @public 31 | */ 32 | export interface Selector { 33 | /** 34 | * A Selector can be refined by another Selector. 35 | * 36 | * See {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#refinement-of-selection 37 | * | §4.2.9 Refinement of Selection} in the Web Annotation Data Model. 38 | * 39 | * Corresponds to RDF property {@link http://www.w3.org/ns/oa#refinedBy} 40 | */ 41 | refinedBy?: Selector; 42 | } 43 | 44 | /** 45 | * The {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#css-selector 46 | * | CssSelector} of the Web Annotation Data Model. 47 | * 48 | * Corresponds to RDF class {@link http://www.w3.org/ns/oa#CssSelector} 49 | * 50 | * @public 51 | */ 52 | export interface CssSelector extends Selector { 53 | type: 'CssSelector'; 54 | value: string; 55 | } 56 | 57 | /** 58 | * The {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-quote-selector 59 | * | TextQuoteSelector} of the Web Annotation Data Model. 60 | * 61 | * Corresponds to RDF class {@link http://www.w3.org/ns/oa#TextQuoteSelector} 62 | * 63 | * @public 64 | */ 65 | export interface TextQuoteSelector extends Selector { 66 | type: 'TextQuoteSelector'; 67 | exact: string; 68 | prefix?: string; 69 | suffix?: string; 70 | } 71 | 72 | /** 73 | * The {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-position-selector 74 | * | TextPositionSelector} of the Web Annotation Data Model. 75 | * 76 | * Corresponds to RDF class {@link http://www.w3.org/ns/oa#TextPositionSelector} 77 | * 78 | * @public 79 | */ 80 | export interface TextPositionSelector extends Selector { 81 | type: 'TextPositionSelector'; 82 | start: number; // more precisely: non-negative integer 83 | end: number; // more precisely: non-negative integer 84 | } 85 | 86 | /** 87 | * The {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#range-selector 88 | * | RangeSelector} of the Web Annotation Data Model. 89 | * 90 | * Corresponds to RDF class {@link http://www.w3.org/ns/oa#RangeSelector} 91 | * 92 | * @public 93 | */ 94 | export interface RangeSelector extends Selector { 95 | type: 'RangeSelector'; 96 | startSelector: T; 97 | endSelector: T; 98 | } 99 | 100 | /** 101 | * A function that finds the match(es) in the given (sub)document (the ‘scope’) 102 | * corresponding to some (prespecified) selector(s). 103 | * 104 | * @public 105 | */ 106 | export interface Matcher { 107 | (scope: TScope): AsyncGenerator; 108 | } 109 | -------------------------------------------------------------------------------- /packages/dom/test/text-position/match-cases.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { TextPositionSelector } from '@apache-annotator/selector'; 25 | import type { RangeInfo } from '../utils.js'; 26 | 27 | export const testCases: { 28 | [name: string]: { 29 | html: string; 30 | selector: TextPositionSelector; 31 | expected: RangeInfo[]; 32 | }; 33 | } = { 34 | simple: { 35 | html: 'l😃rem ipsum dolor amet yada yada', 36 | selector: { 37 | type: 'TextPositionSelector', 38 | start: 12, 39 | end: 20, 40 | }, 41 | expected: [ 42 | { 43 | startContainerXPath: '//b/text()', 44 | startOffset: 13, 45 | endContainerXPath: '//b/text()', 46 | endOffset: 21, 47 | }, 48 | ], 49 | }, 50 | 'first characters': { 51 | html: 'l😃rem ipsum dolor amet yada yada', 52 | selector: { 53 | type: 'TextPositionSelector', 54 | start: 0, 55 | end: 11, 56 | }, 57 | expected: [ 58 | { 59 | startContainerXPath: '//b/text()', 60 | startOffset: 0, 61 | endContainerXPath: '//b/text()', 62 | endOffset: 12, 63 | }, 64 | ], 65 | }, 66 | 'last characters': { 67 | html: 'l😃rem ipsum dolor amet yada yada', 68 | selector: { 69 | type: 'TextPositionSelector', 70 | start: 23, 71 | end: 32, 72 | }, 73 | expected: [ 74 | { 75 | startContainerXPath: '//b/text()', 76 | startOffset: 24, 77 | endContainerXPath: '//b/text()', 78 | endOffset: 33, 79 | }, 80 | ], 81 | }, 82 | 'across elements': { 83 | html: 'l😃rem ipsum dolor amet yada yada', 84 | selector: { 85 | type: 'TextPositionSelector', 86 | start: 12, 87 | end: 20, 88 | }, 89 | expected: [ 90 | { 91 | startContainerXPath: '//b/text()[2]', 92 | startOffset: 1, 93 | endContainerXPath: '//u/text()', 94 | endOffset: 2, 95 | }, 96 | ], 97 | }, 98 | 'exact element contents': { 99 | html: 'l😃rem ipsum dolor amet yada yada', 100 | selector: { 101 | type: 'TextPositionSelector', 102 | start: 6, 103 | end: 17, 104 | }, 105 | expected: [ 106 | { 107 | startContainerXPath: '//i/text()', 108 | startOffset: 0, 109 | endContainerXPath: '//b/text()[2]', 110 | endOffset: 0, 111 | }, 112 | ], 113 | }, 114 | 'text inside ': { 115 | html: 'l😃rem ipsum dolor ametyada yada', 116 | selector: { 117 | type: 'TextPositionSelector', 118 | start: 18, 119 | end: 22, 120 | }, 121 | expected: [ 122 | { 123 | startContainerXPath: '//title/text()', 124 | startOffset: 19, 125 | endContainerXPath: '//b/text()[1]', 126 | endOffset: 0, 127 | }, 128 | ], 129 | }, 130 | 'empty quote': { 131 | html: 'l😃rem', 132 | selector: { 133 | type: 'TextPositionSelector', 134 | start: 3, 135 | end: 3, 136 | }, 137 | expected: [ 138 | { 139 | startContainerXPath: '//b/text()', 140 | startOffset: 4, 141 | endContainerXPath: '//b/text()', 142 | endOffset: 4, 143 | }, 144 | ], 145 | }, 146 | }; 147 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | module.exports = { 25 | root: true, 26 | extends: ['eslint:recommended', 'plugin:import/recommended', 'prettier'], 27 | plugins: ['import'], 28 | rules: { 29 | 'import/extensions': ['error', 'ignorePackages'], 30 | 'import/first': 'error', 31 | 'import/newline-after-import': 'error', 32 | 'import/no-absolute-path': 'error', 33 | 'import/no-default-export': 'error', 34 | 'import/order': [ 35 | 'error', 36 | { 37 | alphabetize: { 38 | order: 'asc', 39 | }, 40 | groups: [ 41 | 'builtin', 42 | 'external', 43 | 'internal', 44 | 'parent', 45 | 'sibling', 46 | 'index', 47 | ], 48 | 'newlines-between': 'never', 49 | }, 50 | ], 51 | 'import/unambiguous': 'error', 52 | 'no-constant-condition': 'off', 53 | }, 54 | settings: { 55 | 'import/internal-regex': '^@apache-annotator/', 56 | 'import/resolver': { 57 | 'babel-module': { 58 | babelOptions: { 59 | root: __dirname, 60 | }, 61 | }, 62 | }, 63 | }, 64 | overrides: [ 65 | { 66 | files: [ 67 | '.eslintrc.js', 68 | '.mocharc.js', 69 | '.prettierrc.js', 70 | 'babel-register.js', 71 | 'babel.config.js', 72 | 'nyc.config.js', 73 | 'web/webpack.config.js', 74 | ], 75 | env: { 76 | es2017: true, 77 | node: true, 78 | }, 79 | globals: { 80 | globalThis: 'readonly', 81 | }, 82 | parserOptions: { 83 | ecmaVersion: 2019, 84 | }, 85 | plugins: ['node'], 86 | rules: { 87 | 'no-console': 'off', 88 | 'import/no-default-export': 'off', 89 | 'import/unambiguous': 'off', 90 | 'node/no-unsupported-features': 'error', 91 | }, 92 | }, 93 | { 94 | files: ['**/*.ts'], 95 | extends: [ 96 | 'plugin:@typescript-eslint/recommended', 97 | 'plugin:@typescript-eslint/recommended-requiring-type-checking', 98 | 'plugin:import/typescript', 99 | ], 100 | parserOptions: { 101 | ecmaVersion: 2020, 102 | project: ['./tsconfig.test.json', './packages/*/tsconfig.json'], 103 | tsconfigRootDir: __dirname, 104 | EXPERIMENTAL_useSourceOfProjectReferenceRedirect: true, 105 | }, 106 | plugins: ['@typescript-eslint'], 107 | rules: { 108 | 'import/no-unresolved': 'off', 109 | '@typescript-eslint/consistent-type-imports': 'error', 110 | '@typescript-eslint/no-duplicate-imports': 'error', 111 | '@typescript-eslint/no-explicit-any': 'off', 112 | '@typescript-eslint/no-unused-vars': [ 113 | 'error', 114 | { argsIgnorePattern: '^_' }, 115 | ], 116 | '@typescript-eslint/require-await': 'off', 117 | }, 118 | }, 119 | { 120 | files: ['**/@types/**/*.d.ts'], 121 | rules: { 122 | 'import/no-default-export': 'off', 123 | 'import/unambiguous': 'off', 124 | }, 125 | }, 126 | { 127 | files: ['packages/*/test/**/*.ts', 'test/**/*.ts'], 128 | rules: { 129 | 'import/no-relative-parent-imports': 'off', 130 | }, 131 | }, 132 | { 133 | files: ['web/**/*.js'], 134 | env: { 135 | browser: true, 136 | es2020: true, 137 | }, 138 | parserOptions: { 139 | ecmaVersion: 2020, 140 | sourceType: 'module', 141 | }, 142 | }, 143 | ], 144 | }; 145 | -------------------------------------------------------------------------------- /packages/dom/test/utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import { ownerDocument } from '../src/owner-document.js'; 26 | 27 | // RangeInfo serialises a Range’s start and end containers as XPaths. 28 | export type RangeInfo = { 29 | startContainerXPath: string; 30 | startOffset: number; 31 | endContainerXPath: string; 32 | endOffset: number; 33 | }; 34 | 35 | export function evaluateXPath(doc: Document, xpath: string): Node { 36 | const result = doc.evaluate( 37 | xpath, 38 | doc, 39 | null, 40 | XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, 41 | ); 42 | const nodes = new Array(result.snapshotLength) 43 | .fill(undefined) 44 | .map((_, i) => result.snapshotItem(i)); 45 | assert.equal( 46 | nodes.length, 47 | 1, 48 | `Test suite contains XPath with ${nodes.length} results instead of 1: '${xpath}'`, 49 | ); 50 | return nodes[0] as Node; 51 | } 52 | 53 | export function hydrateRange(rangeInfo: RangeInfo, doc: Document): Range { 54 | const range = doc.createRange(); 55 | range.setStart( 56 | evaluateXPath(doc, rangeInfo.startContainerXPath), 57 | rangeInfo.startOffset, 58 | ); 59 | range.setEnd( 60 | evaluateXPath(doc, rangeInfo.endContainerXPath), 61 | rangeInfo.endOffset, 62 | ); 63 | return range; 64 | } 65 | 66 | export function assertRangeEquals(match: Range, expected: RangeInfo): void { 67 | const doc = ownerDocument(match); 68 | if (expected === undefined) { 69 | assert.fail(`Unexpected match: ${prettyRange(match)}`); 70 | } 71 | const expectedStartContainer = evaluateXPath( 72 | doc, 73 | expected.startContainerXPath, 74 | ); 75 | const expectedEndContainer = evaluateXPath(doc, expected.endContainerXPath); 76 | assert( 77 | match.startContainer === expectedStartContainer, 78 | `unexpected start container: ${prettyNodeName(match.startContainer)}; ` + 79 | `expected ${prettyNodeName(expectedStartContainer)}`, 80 | ); 81 | assert.equal(match.startOffset, expected.startOffset); 82 | assert( 83 | match.endContainer === evaluateXPath(doc, expected.endContainerXPath), 84 | `unexpected end container: ${prettyNodeName(match.endContainer)}; ` + 85 | `expected ${prettyNodeName(expectedEndContainer)}`, 86 | ); 87 | assert.equal(match.endOffset, expected.endOffset); 88 | } 89 | 90 | function prettyNodeName(node: Node) { 91 | switch (node.nodeType) { 92 | case Node.TEXT_NODE: { 93 | const text = (node as Text).nodeValue || ''; 94 | return `#text "${text.length > 50 ? text.substring(0, 50) + '…' : text}"`; 95 | } 96 | case Node.ELEMENT_NODE: 97 | return `<${(node as Element).tagName.toLowerCase()}>`; 98 | default: 99 | return node.nodeName.toLowerCase(); 100 | } 101 | } 102 | 103 | function prettyRange(range: Range): string { 104 | let s = 'Range('; 105 | if ( 106 | range.startContainer.nodeType === Node.TEXT_NODE && 107 | range.startContainer.parentNode 108 | ) 109 | s += prettyNodeName(range.startContainer.parentNode) + ' → '; 110 | s += prettyNodeName(range.startContainer) + `: ${range.startOffset}`; 111 | if (range.endContainer !== range.startContainer) { 112 | s += ' … '; 113 | if ( 114 | range.endContainer.nodeType === Node.TEXT_NODE && 115 | range.endContainer.parentNode && 116 | range.endContainer.parentNode !== range.startContainer.parentNode 117 | ) 118 | s += prettyNodeName(range.endContainer.parentNode) + ' → '; 119 | s += prettyNodeName(range.endContainer) + ' : '; 120 | } else { 121 | s += '…'; 122 | } 123 | s += range.endOffset; 124 | s += ')'; 125 | return s; 126 | } 127 | -------------------------------------------------------------------------------- /packages/dom/src/css.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { finder } from '@medv/finder'; 25 | import type { CssSelector, Matcher } from '@apache-annotator/selector'; 26 | import { ownerDocument } from './owner-document.js'; 27 | import { toRange } from './to-range.js'; 28 | 29 | /** 30 | * Find the elements corresponding to the given {@link 31 | * CssSelector}. 32 | * 33 | * The given CssSelector returns all elements within `scope` that it matches. 34 | * 35 | * The function is curried, taking first the selector and then the scope. 36 | * 37 | * As there may be multiple matches for a given selector, the matcher will 38 | * return an (async) iterable that produces each match in the order they are 39 | * found in the document. 40 | * 41 | * Note that the Web Annotation specification does not mention whether an 42 | * ‘ambiguous’ CssSelector should indeed match all elements that match the 43 | * selector value, or perhaps only the first. This implementation returns all 44 | * matches to give users the freedom to follow either interpretation. This is 45 | * also in line with more clearly defined behaviour of the TextQuoteSelector: 46 | * 47 | * > “If […] the user agent discovers multiple matching text sequences, then the 48 | * > selection SHOULD be treated as matching all of the matches.” 49 | * 50 | * Note that if `scope` is *not* a Document, the [Web Annotation Data Model](https://www.w3.org/TR/2017/REC-annotation-model-20170223/#css-selector) 51 | * leaves the behaviour undefined. This implementation will, in such a case, 52 | * evaluate the selector relative to the document containing the scope, but only 53 | * return those matches that are fully enclosed within the scope. There might be 54 | * edge cases where this is not a perfect inverse of {@link describeCss}. 55 | * 56 | * @example 57 | * ``` 58 | * const matches = createCssSelectorMatcher({ 59 | * type: 'CssSelector', 60 | * value: '#target', 61 | * }); 62 | * for await (const match of matches) { 63 | * console.log(match); 64 | * } 65 | * //
66 | * ``` 67 | * 68 | * @param selector - The {@link CssSelector} to be anchored. 69 | * @returns A {@link Matcher} function that applies `selector` to a given 70 | * `scope`. 71 | * 72 | * @public 73 | */ 74 | export function createCssSelectorMatcher( 75 | selector: CssSelector, 76 | ): Matcher { 77 | return async function* matchAll(scope) { 78 | scope = toRange(scope); 79 | const document = ownerDocument(scope); 80 | for (const element of document.querySelectorAll(selector.value)) { 81 | const range = document.createRange(); 82 | range.selectNode(element); 83 | 84 | if ( 85 | scope.isPointInRange(range.startContainer, range.startOffset) && 86 | scope.isPointInRange(range.endContainer, range.endOffset) 87 | ) { 88 | yield element; 89 | } 90 | } 91 | }; 92 | } 93 | 94 | /** 95 | * Returns a {@link CssSelector} that unambiguously describes the given 96 | * element, within the given scope. 97 | * 98 | * @example 99 | * ``` 100 | * const target = document.getElementById('targetelement').firstElementChild; 101 | * const selector = await describeCss(target); 102 | * console.log(selector); 103 | * // { 104 | * // type: 'CssSelector', 105 | * // value: '#targetelement > :nth-child(1)' 106 | * // } 107 | * ``` 108 | * 109 | * @param element - The element that the selector should describe. 110 | * @param scope - The node that serves as the ‘document’ for purposes of finding 111 | * an unambiguous selector. Defaults to the Document that contains `element`. 112 | * @returns The selector unambiguously describing `element` within `scope`. 113 | */ 114 | export async function describeCss( 115 | element: HTMLElement, 116 | scope: Element = element.ownerDocument.documentElement, 117 | ): Promise { 118 | const selector = finder(element, { root: scope }); 119 | return { 120 | type: 'CssSelector', 121 | value: selector, 122 | }; 123 | } 124 | -------------------------------------------------------------------------------- /web/index.html: -------------------------------------------------------------------------------- 1 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | Apache Annotator (incubating) demo 29 | 30 | 53 | 54 | 55 | 56 |
57 |

Apache Annotator (incubating)

58 |
59 |
60 |

Selector Demonstration

61 | 62 |

This page demonstrates Web Annotation 63 | Selectors, 64 | standardised JSON objects that describe a selection inside a document with sufficient information to find it back.

65 |

This demo’s source code can be found in the project repo (also mirrored on GitHub)

66 | 67 |
68 |
69 |

Select text here

70 |

Hello, annotated world! 🙂 To annotate, or not to annotate, that is the question.

71 |

Try selecting some text in this paragraph above. 72 | Upon a change of selection, a 73 | TextQuoteSelector 74 | will be created, that describes what was selected.

75 |
76 | The selector can work either 77 |
78 | 79 | ; or 80 |
81 | 82 | . 83 |
84 |
85 |
86 |

Text is found here

87 |

Hello, annotated world! 🙂 To annotate, or not to annotate, that is the question.

88 |

The selector is ‘anchored’ here: the segment it describes is found and highlighted.

89 |
90 |
91 |

The selector as JSON:

92 |

 93 |       
94 |
95 |

Here are other selectors you can anchor in the text above:

96 | 106 |
107 | 108 | 109 | -------------------------------------------------------------------------------- /packages/dom/src/range/match.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { 25 | Matcher, 26 | RangeSelector, 27 | Selector, 28 | } from '@apache-annotator/selector'; 29 | import { ownerDocument } from '../owner-document.js'; 30 | import { toRange } from '../to-range.js'; 31 | import { cartesian } from './cartesian.js'; 32 | 33 | /** 34 | * Find the range(s) corresponding to the given {@link RangeSelector}. 35 | * 36 | * As a RangeSelector itself nests two further selectors, one needs to pass a 37 | * `createMatcher` function that will be used to process those nested selectors. 38 | * 39 | * The function is curried, taking first the `createMatcher` function, then the 40 | * selector, and then the scope. 41 | * 42 | * As there may be multiple matches for the start & end selectors, the resulting 43 | * matcher will return an (async) iterable, that produces a match for each 44 | * possible pair of matches of the nested selectors (except those where its end 45 | * would precede its start). *(Note that this behaviour is a rather free 46 | * interpretation of the Web Annotation Data Model spec, which is silent about 47 | * the possibility of multiple matches for RangeSelectors)* 48 | * 49 | * @example 50 | * By using a matcher for {@link TextQuoteSelector}s, one 51 | * could create a matcher for text quotes with ellipsis to select a phrase 52 | * “ipsum … amet,”: 53 | * ``` 54 | * const selector = { 55 | * type: 'RangeSelector', 56 | * startSelector: { 57 | * type: 'TextQuoteSelector', 58 | * exact: 'ipsum ', 59 | * }, 60 | * endSelector: { 61 | * type: 'TextQuoteSelector', 62 | * // Because the end of a RangeSelector is *exclusive*, we will present the 63 | * // latter part of the quote as the *prefix* so it will be part of the 64 | * // match. 65 | * exact: '', 66 | * prefix: ' amet,', 67 | * } 68 | * }; 69 | * const createRangeSelectorMatcher = 70 | * makeCreateRangeSelectorMatcher(createTextQuoteMatcher); 71 | * const match = createRangeSelectorMatcher(selector)(document.body); 72 | * console.log(match) 73 | * // ⇒ Range { startContainer: #text, startOffset: 6, endContainer: #text, 74 | * // endOffset: 27, … } 75 | * ``` 76 | * 77 | * @example 78 | * To support RangeSelectors that might themselves contain RangeSelectors, 79 | * recursion can be created by supplying the resulting matcher creator function 80 | * as the `createMatcher` parameter: 81 | * ``` 82 | * const createWhicheverMatcher = (selector) => { 83 | * const innerCreateMatcher = { 84 | * TextQuoteSelector: createTextQuoteSelectorMatcher, 85 | * TextPositionSelector: createTextPositionSelectorMatcher, 86 | * RangeSelector: makeCreateRangeSelectorMatcher(createWhicheverMatcher), 87 | * }[selector.type]; 88 | * return innerCreateMatcher(selector); 89 | * }); 90 | * ``` 91 | * 92 | * @param createMatcher - The function used to process nested selectors. 93 | * @returns A function that, given a RangeSelector `selector`, creates a {@link 94 | * Matcher} function that can apply it to a given `scope`. 95 | * 96 | * @public 97 | */ 98 | export function makeCreateRangeSelectorMatcher( 99 | createMatcher: ( 100 | selector: T, 101 | ) => Matcher, 102 | ): (selector: RangeSelector) => Matcher { 103 | return function createRangeSelectorMatcher(selector) { 104 | const startMatcher = createMatcher(selector.startSelector); 105 | const endMatcher = createMatcher(selector.endSelector); 106 | 107 | return async function* matchAll(scope) { 108 | const startMatches = startMatcher(scope); 109 | const endMatches = endMatcher(scope); 110 | 111 | const pairs = cartesian(startMatches, endMatches); 112 | 113 | for await (let [start, end] of pairs) { 114 | start = toRange(start); 115 | end = toRange(end); 116 | 117 | const result = ownerDocument(scope).createRange(); 118 | result.setStart(start.startContainer, start.startOffset); 119 | // Note that a RangeSelector’s match *excludes* the endSelector’s match, 120 | // hence we take the end’s startContainer & startOffset. 121 | result.setEnd(end.startContainer, end.startOffset); 122 | 123 | if (!result.collapsed) yield result; 124 | } 125 | }; 126 | }; 127 | } 128 | -------------------------------------------------------------------------------- /web/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /* global info, module, source, target, form */ 25 | 26 | import { 27 | makeCreateRangeSelectorMatcher, 28 | createTextQuoteSelectorMatcher, 29 | describeTextQuote, 30 | createTextPositionSelectorMatcher, 31 | describeTextPosition, 32 | highlightText, 33 | } from '@apache-annotator/dom'; 34 | import { makeRefinable } from '@apache-annotator/selector'; 35 | 36 | const EXAMPLE_SELECTORS = [ 37 | { 38 | type: 'TextQuoteSelector', 39 | exact: 'not', 40 | }, 41 | { 42 | type: 'RangeSelector', 43 | startSelector: { 44 | type: 'TextQuoteSelector', 45 | exact: 'ann', 46 | }, 47 | endSelector: { 48 | type: 'TextQuoteSelector', 49 | exact: '!', 50 | }, 51 | }, 52 | { 53 | type: 'TextQuoteSelector', 54 | exact: 'annotated world', 55 | refinedBy: { 56 | type: 'TextQuoteSelector', 57 | exact: 'tat', 58 | }, 59 | }, 60 | { 61 | type: 'TextQuoteSelector', 62 | exact: 'To annotate, or not to annotate,', 63 | refinedBy: { 64 | type: 'RangeSelector', 65 | startSelector: { 66 | type: 'TextQuoteSelector', 67 | exact: 'To annotate', 68 | refinedBy: { 69 | type: 'TextQuoteSelector', 70 | exact: 'annotate', 71 | }, 72 | }, 73 | endSelector: { 74 | type: 'TextQuoteSelector', 75 | exact: 'not to annotate', 76 | refinedBy: { 77 | type: 'TextQuoteSelector', 78 | exact: ' to', 79 | }, 80 | }, 81 | refinedBy: { 82 | type: 'TextQuoteSelector', 83 | exact: 'o', 84 | }, 85 | }, 86 | }, 87 | ]; 88 | 89 | let moduleState = { 90 | cleanupFunctions: [], 91 | }; 92 | 93 | function cleanup() { 94 | let removeHighlight; 95 | while ((removeHighlight = moduleState.cleanupFunctions.shift())) { 96 | removeHighlight(); 97 | } 98 | target.normalize(); 99 | info.innerText = ''; 100 | } 101 | 102 | const createMatcher = makeRefinable((selector) => { 103 | const innerCreateMatcher = { 104 | TextQuoteSelector: createTextQuoteSelectorMatcher, 105 | TextPositionSelector: createTextPositionSelectorMatcher, 106 | RangeSelector: makeCreateRangeSelectorMatcher(createMatcher), 107 | }[selector.type]; 108 | 109 | if (!innerCreateMatcher) { 110 | throw new Error(`Unsupported selector type: ${selector.type}`); 111 | } 112 | 113 | return innerCreateMatcher(selector); 114 | }); 115 | 116 | async function anchor(selector) { 117 | const matchAll = createMatcher(selector); 118 | const ranges = []; 119 | 120 | // First collect all matches, and only then highlight them; to avoid 121 | // modifying the DOM while the matcher is running. 122 | for await (const range of matchAll(target)) { 123 | ranges.push(range); 124 | } 125 | 126 | for (const range of ranges) { 127 | const removeHighlight = highlightText(range); 128 | moduleState.cleanupFunctions.push(removeHighlight); 129 | } 130 | 131 | info.innerText += JSON.stringify(selector, null, 2) + '\n\n'; 132 | } 133 | 134 | async function onSelectionChange() { 135 | cleanup(); 136 | const describeMode = form.describeMode.value; 137 | const selection = document.getSelection(); 138 | for (let i = 0; i < selection.rangeCount; i++) { 139 | const range = selection.getRangeAt(i); 140 | const selector = 141 | describeMode === 'TextPosition' 142 | ? await describeTextPosition(range, source) 143 | : await describeTextQuote(range, source, { minimumQuoteLength: 10 }); 144 | await anchor(selector); 145 | } 146 | } 147 | 148 | function onSelectorExampleClick(event) { 149 | const exampleNumber = event.target.dataset.runExample; 150 | if (!exampleNumber) return; 151 | const selector = EXAMPLE_SELECTORS[exampleNumber]; 152 | cleanup(); 153 | anchor(selector); 154 | event.preventDefault(); 155 | } 156 | 157 | function addEventListeners() { 158 | document.addEventListener('selectionchange', onSelectionChange); 159 | form.addEventListener('change', onSelectionChange); 160 | document.addEventListener('click', onSelectorExampleClick); 161 | } 162 | addEventListeners(); 163 | 164 | function removeEventListeners() { 165 | document.removeEventListener('selectionchange', onSelectionChange); 166 | form.removeEventListener('change', onSelectionChange); 167 | document.removeEventListener('click', onSelectorExampleClick); 168 | } 169 | 170 | if (module.hot) { 171 | module.hot.accept(); 172 | module.hot.dispose((data) => { 173 | removeEventListeners(); 174 | data.state = moduleState; 175 | }); 176 | if (module.hot.data?.state) { 177 | moduleState = module.hot.data.state; 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /packages/selector/src/text/chunker.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | /** 25 | * Represents a piece of text in any kind of ‘file’. 26 | * 27 | * Its purpose is to enable generic algorithms to deal with text content of any 28 | * type of ‘file’ that consists of many pieces of text (e.g. a DOM, PDF, …). 29 | * Each Chunk represents one piece of text ({@link Chunk.data}). An object 30 | * implementing this interface would typically have other attributes as well to 31 | * map the chunk back to its position in the file (e.g. a Text node in the DOM). 32 | * 33 | * @typeParam TData - Piece of text, typically `string` 34 | * 35 | * @public 36 | */ 37 | export interface Chunk { 38 | /** 39 | * The piece of text this chunk represents. 40 | */ 41 | readonly data: TData; 42 | equals?(otherChunk: this): boolean; 43 | } 44 | 45 | /** 46 | * Test two {@link Chunk}s for equality. 47 | * 48 | * Equality here means that both represent the same piece of text (i.e. at the 49 | * same position) in the file. It compares using the custom {@link Chunk.equals} 50 | * method if either chunk defines one, and falls back to checking the objects’ 51 | * identity (i.e. `chunk1 === chunk2`). 52 | * 53 | * @public 54 | */ 55 | export function chunkEquals(chunk1: Chunk, chunk2: Chunk): boolean { 56 | if (chunk1.equals) return chunk1.equals(chunk2); 57 | if (chunk2.equals) return chunk2.equals(chunk1); 58 | return chunk1 === chunk2; 59 | } 60 | 61 | /** 62 | * Points at a range of characters between two points inside {@link Chunk}s. 63 | * 64 | * Analogous to the DOM’s ({@link https://developer.mozilla.org/en-US/docs/Web/API/AbstractRange 65 | * | Abstract}){@link https://developer.mozilla.org/en-US/docs/Web/API/Range | 66 | * Range}. Each index expresses an offset inside the value of the corresponding 67 | * {@link Chunk.data}, and can equal the length of that data in order to point 68 | * to the position right after the chunk’s last character. 69 | * 70 | * @public 71 | */ 72 | export interface ChunkRange> { 73 | startChunk: TChunk; 74 | startIndex: number; 75 | endChunk: TChunk; 76 | endIndex: number; 77 | } 78 | 79 | /** 80 | * Test two {@link ChunkRange}s for equality. 81 | * 82 | * Equality here means equality of each of their four properties (i.e. 83 | * {@link startChunk}, {@link startIndex}, 84 | * {@link endChunk}, and {@link endIndex}). 85 | * For the `startChunk`s and `endChunk`s, this function uses the custom 86 | * {@link Chunk.equals} method if defined. 87 | * 88 | * Note that if the start/end of one range points at the end of a chunk, and the 89 | * other to the start of a subsequent chunk, they are not considered equal, even 90 | * though semantically they may be representing the same range of characters. To 91 | * test for such semantic equivalence, ensure that both inputs are normalised: 92 | * typically this means the range is shrunk to its narrowest equivalent, and (if 93 | * it is empty) positioned at its first equivalent. 94 | * 95 | * @public 96 | */ 97 | export function chunkRangeEquals( 98 | range1: ChunkRange, 99 | range2: ChunkRange, 100 | ): boolean { 101 | return ( 102 | chunkEquals(range1.startChunk, range2.startChunk) && 103 | chunkEquals(range1.endChunk, range2.endChunk) && 104 | range1.startIndex === range2.startIndex && 105 | range1.endIndex === range2.endIndex 106 | ); 107 | } 108 | 109 | /** 110 | * Presents the pieces of text contained in some underlying ‘file’ as a sequence 111 | * of {@link Chunk}s. 112 | * 113 | * Rather than presenting a list of all pieces, the `Chunker` provides methods 114 | * to walk through the file piece by piece. This permits implementations to read 115 | * and convert the file to `Chunk`s lazily. 116 | * 117 | * For those familiar with the DOM APIs, it is similar to a NodeIterator (but 118 | * unlike NodeIterator, it has no concept of being ‘before’ or ‘after’ a chunk). 119 | * 120 | * @typeParam TChunk - (sub)type of `Chunk` being used. 121 | * 122 | * @public 123 | */ 124 | export interface Chunker> { 125 | /** 126 | * The chunk currently being pointed at. 127 | * 128 | * Initially, this should normally be the first chunk in the file. 129 | */ 130 | readonly currentChunk: TChunk; 131 | 132 | /** 133 | * Point {@link currentChunk} at the chunk following it, and return that chunk. 134 | * If there are no chunks following it, keep `currentChunk` unchanged and 135 | * return null. 136 | */ 137 | nextChunk(): TChunk | null; 138 | 139 | /** 140 | * Point {@link currentChunk} at the chunk preceding it, and return that chunk. 141 | * If there are no chunks preceding it, keep `currentChunk` unchanged and 142 | * return null. 143 | */ 144 | previousChunk(): TChunk | null; 145 | 146 | /** 147 | * Test if a given `chunk` is before the {@link currentChunk|current 148 | * chunk}. 149 | * 150 | * Returns true if `chunk` is before `this.currentChunk`, false otherwise 151 | * (i.e. if `chunk` follows it or is the current chunk). 152 | * 153 | * The given `chunk` need not necessarily be obtained from the same `Chunker`, 154 | * but the chunkers would need to represent the same file. Otherwise behaviour 155 | * is unspecified (an implementation might throw or just return `false`). 156 | * 157 | * @param chunk - A chunk, typically obtained from the same `Chunker`. 158 | */ 159 | precedesCurrentChunk(chunk: TChunk): boolean; 160 | } 161 | -------------------------------------------------------------------------------- /packages/dom/src/text-node-chunker.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { Chunk, Chunker, ChunkRange } from '@apache-annotator/selector'; 25 | import { normalizeRange } from './normalize-range.js'; 26 | import { ownerDocument } from './owner-document.js'; 27 | import { toRange } from './to-range.js'; 28 | 29 | export interface PartialTextNode extends Chunk { 30 | readonly node: Text; 31 | readonly startOffset: number; 32 | readonly endOffset: number; 33 | } 34 | 35 | export class EmptyScopeError extends TypeError { 36 | constructor(message?: string) { 37 | super(message || 'Scope contains no text nodes.'); 38 | } 39 | } 40 | 41 | export class OutOfScopeError extends TypeError { 42 | constructor(message?: string) { 43 | super( 44 | message || 45 | 'Cannot convert node to chunk, as it falls outside of chunker’s scope.', 46 | ); 47 | } 48 | } 49 | 50 | export class TextNodeChunker implements Chunker { 51 | private scope: Range; 52 | private iter: NodeIterator; 53 | 54 | get currentChunk(): PartialTextNode { 55 | const node = this.iter.referenceNode; 56 | 57 | // This test should not actually be needed, but it keeps TypeScript happy. 58 | if (!isText(node)) throw new EmptyScopeError(); 59 | 60 | return this.nodeToChunk(node); 61 | } 62 | 63 | nodeToChunk(node: Text): PartialTextNode { 64 | if (!this.scope.intersectsNode(node)) throw new OutOfScopeError(); 65 | 66 | const startOffset = 67 | node === this.scope.startContainer ? this.scope.startOffset : 0; 68 | const endOffset = 69 | node === this.scope.endContainer ? this.scope.endOffset : node.length; 70 | 71 | return { 72 | node, 73 | startOffset, 74 | endOffset, 75 | data: node.data.substring(startOffset, endOffset), 76 | equals(other) { 77 | return ( 78 | other.node === this.node && 79 | other.startOffset === this.startOffset && 80 | other.endOffset === this.endOffset 81 | ); 82 | }, 83 | }; 84 | } 85 | 86 | rangeToChunkRange(range: Range): ChunkRange { 87 | range = range.cloneRange(); 88 | 89 | // Take the part of the range that falls within the scope. 90 | if (range.compareBoundaryPoints(Range.START_TO_START, this.scope) === -1) 91 | range.setStart(this.scope.startContainer, this.scope.startOffset); 92 | if (range.compareBoundaryPoints(Range.END_TO_END, this.scope) === 1) 93 | range.setEnd(this.scope.endContainer, this.scope.endOffset); 94 | 95 | // Ensure it starts and ends at text nodes. 96 | const textRange = normalizeRange(range, this.scope); 97 | 98 | const startChunk = this.nodeToChunk(textRange.startContainer); 99 | const startIndex = textRange.startOffset - startChunk.startOffset; 100 | const endChunk = this.nodeToChunk(textRange.endContainer); 101 | const endIndex = textRange.endOffset - endChunk.startOffset; 102 | 103 | return { startChunk, startIndex, endChunk, endIndex }; 104 | } 105 | 106 | chunkRangeToRange(chunkRange: ChunkRange): Range { 107 | const range = ownerDocument(this.scope).createRange(); 108 | // The `+…startOffset` parts are only relevant for the first chunk, as it 109 | // might start within a text node. 110 | range.setStart( 111 | chunkRange.startChunk.node, 112 | chunkRange.startIndex + chunkRange.startChunk.startOffset, 113 | ); 114 | range.setEnd( 115 | chunkRange.endChunk.node, 116 | chunkRange.endIndex + chunkRange.endChunk.startOffset, 117 | ); 118 | return range; 119 | } 120 | 121 | /** 122 | * @param scope A Range that overlaps with at least one text node. 123 | */ 124 | constructor(scope: Node | Range) { 125 | this.scope = toRange(scope); 126 | this.iter = ownerDocument(scope).createNodeIterator( 127 | this.scope.commonAncestorContainer, 128 | NodeFilter.SHOW_TEXT, 129 | { 130 | acceptNode: (node: Text) => { 131 | return this.scope.intersectsNode(node) 132 | ? NodeFilter.FILTER_ACCEPT 133 | : NodeFilter.FILTER_REJECT; 134 | }, 135 | }, 136 | ); 137 | 138 | // Move the iterator to after the start (= root) node. 139 | this.iter.nextNode(); 140 | // If the start node is not a text node, move it to the first text node. 141 | if (!isText(this.iter.referenceNode)) { 142 | const nextNode = this.iter.nextNode(); 143 | if (nextNode === null) throw new EmptyScopeError(); 144 | } 145 | } 146 | 147 | nextChunk(): PartialTextNode | null { 148 | // Move the iterator to after the current node, so nextNode() will cause a jump. 149 | if (this.iter.pointerBeforeReferenceNode) this.iter.nextNode(); 150 | 151 | if (this.iter.nextNode()) return this.currentChunk; 152 | else return null; 153 | } 154 | 155 | previousChunk(): PartialTextNode | null { 156 | if (!this.iter.pointerBeforeReferenceNode) this.iter.previousNode(); 157 | 158 | if (this.iter.previousNode()) return this.currentChunk; 159 | else return null; 160 | } 161 | 162 | precedesCurrentChunk(chunk: PartialTextNode): boolean { 163 | if (this.currentChunk === null) return false; 164 | return !!( 165 | this.currentChunk.node.compareDocumentPosition(chunk.node) & 166 | Node.DOCUMENT_POSITION_PRECEDING 167 | ); 168 | } 169 | } 170 | 171 | function isText(node: Node): node is Text { 172 | return node.nodeType === Node.TEXT_NODE; 173 | } 174 | -------------------------------------------------------------------------------- /packages/dom/test/text-quote/describe.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import { describeTextQuote } from '../../src/text-quote/describe.js'; 26 | import { hydrateRange, evaluateXPath } from '../utils.js'; 27 | import type { DescribeTextQuoteTestCases } from './describe-cases.js'; 28 | import { 29 | testCasesWithMinimumQuoteLength, 30 | testCasesWithMaxWordLength, 31 | testCasesWithMinimalContext, 32 | testCasesWithoutOptions, 33 | } from './describe-cases.js'; 34 | import { testCases as testMatchCases } from './match-cases.js'; 35 | 36 | const domParser = new DOMParser(); 37 | 38 | function runTestCases(testCases: DescribeTextQuoteTestCases) { 39 | for (const [name, { html, range, expected, options }] of Object.entries( 40 | testCases, 41 | )) { 42 | it(`works for case: ${name}`, async () => { 43 | const doc = domParser.parseFromString(html, 'text/html'); 44 | const result = await describeTextQuote( 45 | hydrateRange(range, doc), 46 | doc, 47 | options, 48 | ); 49 | assert.deepEqual(result, expected); 50 | }); 51 | } 52 | } 53 | 54 | describe('describeTextQuote', () => { 55 | describe('without options', () => { 56 | runTestCases(testCasesWithoutOptions); 57 | }); 58 | 59 | describe('with minimal context', () => { 60 | runTestCases(testCasesWithMinimalContext); 61 | }); 62 | 63 | describe('with minimum quote length', () => { 64 | runTestCases(testCasesWithMinimumQuoteLength); 65 | }); 66 | 67 | describe('with max word length', () => { 68 | runTestCases(testCasesWithMaxWordLength); 69 | }); 70 | 71 | it('works with custom scope', async () => { 72 | const { html, range, options } = testCasesWithMinimalContext[ 73 | 'minimal prefix' 74 | ]; 75 | const doc = domParser.parseFromString(html, 'text/html'); 76 | const scope = doc.createRange(); 77 | scope.setStart(evaluateXPath(doc, '//b/text()'), 15); 78 | scope.setEnd(evaluateXPath(doc, '//b/text()'), 30); // "not to annotate" 79 | const result = await describeTextQuote( 80 | hydrateRange(range, doc), 81 | scope, 82 | options, 83 | ); 84 | assert.deepEqual(result, { 85 | type: 'TextQuoteSelector', 86 | exact: 'anno', 87 | prefix: '', // no prefix needed in this scope. 88 | suffix: '', 89 | }); 90 | }); 91 | 92 | it('strips part of the range outside the scope', async () => { 93 | const { html, range, options } = testCasesWithMinimalContext['no context']; 94 | const doc = domParser.parseFromString(html, 'text/html'); 95 | const scope = doc.createRange(); 96 | scope.setStart(evaluateXPath(doc, '//b/text()'), 6); 97 | scope.setEnd(evaluateXPath(doc, '//b/text()'), 17); // "ipsum dolor" 98 | const result = await describeTextQuote( 99 | hydrateRange(range, doc), 100 | scope, 101 | options, 102 | ); 103 | assert.deepEqual(result, { 104 | type: 'TextQuoteSelector', 105 | exact: 'dolor', 106 | prefix: '', 107 | suffix: '', 108 | }); 109 | }); 110 | 111 | it('works if the range equals the scope', async () => { 112 | const { html, range, expected, options } = testCasesWithMinimalContext[ 113 | 'no context' 114 | ]; 115 | const doc = domParser.parseFromString(html, 'text/html'); 116 | const result = await describeTextQuote( 117 | hydrateRange(range, doc), 118 | hydrateRange(range, doc), 119 | options, 120 | ); 121 | assert.deepEqual(result, expected); 122 | }); 123 | 124 | it('works if range does not contain Text nodes', async () => { 125 | const html = `Try quoting this image: — would that work?`; 126 | const doc = domParser.parseFromString(html, 'text/html'); 127 | const range = document.createRange(); 128 | range.selectNode(evaluateXPath(doc, '//img')); 129 | const result = await describeTextQuote(range, doc); 130 | assert.deepEqual(result, { 131 | type: 'TextQuoteSelector', 132 | exact: '', 133 | prefix: 'image: ', 134 | suffix: ' —', 135 | }); 136 | }); 137 | 138 | describe('inverts test cases of text quote matcher', () => { 139 | const applicableTestCases = Object.entries(testMatchCases).filter( 140 | ([_, { expected }]) => expected.length > 0, 141 | ); 142 | 143 | for (const [name, { html, selector, expected }] of applicableTestCases) { 144 | it(`case: '${name}'`, async () => { 145 | const doc = domParser.parseFromString(html, 'text/html'); 146 | for (const rangeInfo of expected) { 147 | const range = hydrateRange(rangeInfo, doc); 148 | const result = await describeTextQuote(range, doc); 149 | assert.equal(result.exact, selector.exact); 150 | // Our result may have a different combination of prefix/suffix; only check for obvious inconsistency. 151 | if (selector.prefix && result.prefix) 152 | assert( 153 | selector.prefix.endsWith( 154 | result.prefix.substring( 155 | result.prefix.length - selector.prefix.length, 156 | ), 157 | ), 158 | 'Inconsistent prefixes', 159 | ); 160 | if (selector.suffix && result.suffix) 161 | assert( 162 | selector.suffix.startsWith( 163 | result.suffix.substring(0, selector.suffix.length), 164 | ), 165 | 'Inconsistent suffixes', 166 | ); 167 | } 168 | }); 169 | } 170 | }); 171 | }); 172 | -------------------------------------------------------------------------------- /packages/dom/src/highlight-text.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { ownerDocument } from './owner-document.js'; 25 | import { toRange } from './to-range.js'; 26 | 27 | /** 28 | * Wrap each text node in a given Node or Range with a `` or other 29 | * element. 30 | * 31 | * If a Range is given that starts and/or ends within a Text node, that node 32 | * will be split in order to only wrap the contained part in the mark element. 33 | * 34 | * The highlight can be removed again by calling the function that cleans up the 35 | * wrapper elements. Note that this might not perfectly restore the DOM to its 36 | * previous state: text nodes that were split are not merged again. One could 37 | * consider running `range.commonAncestorContainer.normalize()` afterwards to 38 | * join all adjacent text nodes. 39 | * 40 | * @param target - The Node/Range containing the text. If it is a Range, note 41 | * that as highlighting modifies the DOM, the Range may be unusable afterwards. 42 | * @param tagName - The element used to wrap text nodes. Defaults to `'mark'`. 43 | * @param attributes - An object defining any attributes to be set on the 44 | * wrapper elements, e.g. its `class`. 45 | * @returns A function that removes the created highlight. 46 | * 47 | * @public 48 | */ 49 | export function highlightText( 50 | target: Node | Range, 51 | tagName = 'mark', 52 | attributes: Record = {}, 53 | ): () => void { 54 | // First put all nodes in an array (splits start and end nodes if needed) 55 | const nodes = textNodesInRange(toRange(target)); 56 | 57 | // Highlight each node 58 | const highlightElements: HTMLElement[] = []; 59 | for (const node of nodes) { 60 | const highlightElement = wrapNodeInHighlight(node, tagName, attributes); 61 | highlightElements.push(highlightElement); 62 | } 63 | 64 | // Return a function that cleans up the highlightElements. 65 | function removeHighlights() { 66 | // Remove each of the created highlightElements. 67 | for (const highlightElement of highlightElements) { 68 | removeHighlight(highlightElement); 69 | } 70 | } 71 | return removeHighlights; 72 | } 73 | 74 | // Return an array of the text nodes in the range. Split the start and end nodes if required. 75 | function textNodesInRange(range: Range): Text[] { 76 | // If the start or end node is a text node and only partly in the range, split it. 77 | if (isTextNode(range.startContainer) && range.startOffset > 0) { 78 | const endOffset = range.endOffset; // (this may get lost when the splitting the node) 79 | const createdNode = range.startContainer.splitText(range.startOffset); 80 | if (range.endContainer === range.startContainer) { 81 | // If the end was in the same container, it will now be in the newly created node. 82 | range.setEnd(createdNode, endOffset - range.startOffset); 83 | } 84 | range.setStart(createdNode, 0); 85 | } 86 | if ( 87 | isTextNode(range.endContainer) && 88 | range.endOffset < range.endContainer.length 89 | ) { 90 | range.endContainer.splitText(range.endOffset); 91 | } 92 | 93 | // Collect the text nodes. 94 | const walker = ownerDocument(range).createTreeWalker( 95 | range.commonAncestorContainer, 96 | NodeFilter.SHOW_TEXT, 97 | { 98 | acceptNode: (node) => 99 | range.intersectsNode(node) 100 | ? NodeFilter.FILTER_ACCEPT 101 | : NodeFilter.FILTER_REJECT, 102 | }, 103 | ); 104 | walker.currentNode = range.startContainer; 105 | 106 | // // Optimise by skipping nodes that are explicitly outside the range. 107 | // const NodeTypesWithCharacterOffset = [ 108 | // Node.TEXT_NODE, 109 | // Node.PROCESSING_INSTRUCTION_NODE, 110 | // Node.COMMENT_NODE, 111 | // ]; 112 | // if (!NodeTypesWithCharacterOffset.includes(range.startContainer.nodeType)) { 113 | // if (range.startOffset < range.startContainer.childNodes.length) { 114 | // walker.currentNode = range.startContainer.childNodes[range.startOffset]; 115 | // } else { 116 | // walker.nextSibling(); // TODO verify this is correct. 117 | // } 118 | // } 119 | 120 | const nodes: Text[] = []; 121 | if (isTextNode(walker.currentNode)) nodes.push(walker.currentNode); 122 | while (walker.nextNode() && range.comparePoint(walker.currentNode, 0) !== 1) 123 | nodes.push(walker.currentNode as Text); 124 | return nodes; 125 | } 126 | 127 | // Replace [node] with [node] 128 | function wrapNodeInHighlight( 129 | node: ChildNode, 130 | tagName: string, 131 | attributes: Record, 132 | ): HTMLElement { 133 | const document = node.ownerDocument as Document; 134 | const highlightElement = document.createElement(tagName); 135 | Object.keys(attributes).forEach((key) => { 136 | highlightElement.setAttribute(key, attributes[key]); 137 | }); 138 | const tempRange = document.createRange(); 139 | tempRange.selectNode(node); 140 | tempRange.surroundContents(highlightElement); 141 | return highlightElement; 142 | } 143 | 144 | // Remove a highlight element created with wrapNodeInHighlight. 145 | function removeHighlight(highlightElement: HTMLElement) { 146 | // If it has somehow been removed already, there is nothing to be done. 147 | if (!highlightElement.parentNode) return; 148 | if (highlightElement.childNodes.length === 1) { 149 | highlightElement.replaceWith(highlightElement.firstChild as Node); 150 | } else { 151 | // If the highlight somehow contains multiple nodes now, move them all. 152 | while (highlightElement.firstChild) { 153 | highlightElement.parentNode.insertBefore( 154 | highlightElement.firstChild, 155 | highlightElement, 156 | ); 157 | } 158 | highlightElement.remove(); 159 | } 160 | } 161 | 162 | function isTextNode(node: Node): node is Text { 163 | return node.nodeType === Node.TEXT_NODE; 164 | } 165 | -------------------------------------------------------------------------------- /packages/dom/src/normalize-range.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { ownerDocument } from './owner-document.js'; 25 | 26 | /** 27 | * TextRange is a Range that guarantees to always have Text nodes as its start 28 | * and end nodes. To ensure the type remains correct, it also restricts usage 29 | * of methods that would modify these nodes (note that a user can simply cast 30 | * the TextRange back to a Range to remove these restrictions). 31 | */ 32 | export interface TextRange extends Range { 33 | readonly startContainer: Text; 34 | readonly endContainer: Text; 35 | cloneRange(): TextRange; 36 | 37 | // Allow only Text nodes to be passed to these methods. 38 | insertNode(node: Text): void; 39 | selectNodeContents(node: Text): void; 40 | setEnd(node: Text, offset: number): void; 41 | setStart(node: Text, offset: number): void; 42 | 43 | // Do not allow these methods to be used at all. 44 | selectNode(node: never): void; 45 | setEndAfter(node: never): void; 46 | setEndBefore(node: never): void; 47 | setStartAfter(node: never): void; 48 | setStartBefore(node: never): void; 49 | surroundContents(newParent: never): void; 50 | } 51 | 52 | /** 53 | * Normalise a {@link https://developer.mozilla.org/en-US/docs/Web/API/Range | 54 | * Range} such that ranges spanning the same text become exact equals. 55 | * 56 | * *Note: in this context ‘text’ means any characters, including whitespace.* 57 | 58 | * Normalises a range such that both its start and end are text nodes, and that 59 | * if there are equivalent text selections it takes the narrowest option (i.e. 60 | * it prefers the start not to be at the end of a text node, and vice versa). 61 | * 62 | * If there is no text between the start and end, they thus collapse onto one a 63 | * single position; and if there are multiple equivalent positions, it takes the 64 | * first one; or, if scope is passed, the first equivalent falling within scope. 65 | * 66 | * Note that if the given range does not contain non-empty text nodes, it may 67 | * end up pointing at a text node outside of it (before it if possible, else 68 | * after). If the document does not contain any text nodes, an error is thrown. 69 | */ 70 | export function normalizeRange(range: Range, scope?: Range): TextRange { 71 | const document = ownerDocument(range); 72 | const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT, { 73 | acceptNode(node: Text) { 74 | return !scope || scope.intersectsNode(node) 75 | ? NodeFilter.FILTER_ACCEPT 76 | : NodeFilter.FILTER_REJECT; 77 | }, 78 | }); 79 | 80 | let [startContainer, startOffset] = snapBoundaryPointToTextNode( 81 | range.startContainer, 82 | range.startOffset, 83 | ); 84 | 85 | // If we point at the end of a text node, move to the start of the next one. 86 | // The step is repeated to skip over empty text nodes. 87 | walker.currentNode = startContainer; 88 | while (startOffset === startContainer.length && walker.nextNode()) { 89 | startContainer = walker.currentNode as Text; 90 | startOffset = 0; 91 | } 92 | 93 | // Set the range’s start; note this might move its end too. 94 | range.setStart(startContainer, startOffset); 95 | 96 | let [endContainer, endOffset] = snapBoundaryPointToTextNode( 97 | range.endContainer, 98 | range.endOffset, 99 | ); 100 | 101 | // If we point at the start of a text node, move to the end of the previous one. 102 | // The step is repeated to skip over empty text nodes. 103 | walker.currentNode = endContainer; 104 | while (endOffset === 0 && walker.previousNode()) { 105 | endContainer = walker.currentNode as Text; 106 | endOffset = endContainer.length; 107 | } 108 | 109 | // Set the range’s end; note this might move its start too. 110 | range.setEnd(endContainer, endOffset); 111 | 112 | return range as TextRange; 113 | } 114 | 115 | // Given an arbitrary boundary point, this returns either: 116 | // - that same boundary point, if its node is a text node; 117 | // - otherwise the first boundary point after it whose node is a text node, if any; 118 | // - otherwise, the last boundary point before it whose node is a text node. 119 | // If the document has no text nodes, it throws an error. 120 | function snapBoundaryPointToTextNode( 121 | node: Node, 122 | offset: number, 123 | ): [Text, number] { 124 | if (isText(node)) return [node, offset]; 125 | 126 | // Find the node at or right after the boundary point. 127 | let curNode: Node; 128 | if (isCharacterData(node)) { 129 | curNode = node; 130 | } else if (offset < node.childNodes.length) { 131 | curNode = node.childNodes[offset]; 132 | } else { 133 | curNode = node; 134 | while (curNode.nextSibling === null) { 135 | if (curNode.parentNode === null) 136 | // Boundary point is at end of document 137 | throw new Error('not implemented'); // TODO 138 | curNode = curNode.parentNode; 139 | } 140 | curNode = curNode.nextSibling; 141 | } 142 | 143 | if (isText(curNode)) return [curNode, 0]; 144 | 145 | // Walk to the next text node, or the last if there is none. 146 | const document = node.ownerDocument ?? (node as Document); 147 | const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT); 148 | walker.currentNode = curNode; 149 | if (walker.nextNode() !== null) { 150 | return [walker.currentNode as Text, 0]; 151 | } else if (walker.previousNode() !== null) { 152 | return [walker.currentNode as Text, (walker.currentNode as Text).length]; 153 | } else { 154 | throw new Error('Document contains no text nodes.'); 155 | } 156 | } 157 | 158 | function isText(node: Node): node is Text { 159 | return node.nodeType === Node.TEXT_NODE; 160 | } 161 | 162 | function isCharacterData(node: Node): node is CharacterData { 163 | return ( 164 | node.nodeType === Node.PROCESSING_INSTRUCTION_NODE || 165 | node.nodeType === Node.COMMENT_NODE || 166 | node.nodeType === Node.TEXT_NODE 167 | ); 168 | } 169 | -------------------------------------------------------------------------------- /packages/dom/test/text-position/match.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import type { TextPositionSelector } from '@apache-annotator/selector'; 26 | import { createTextPositionSelectorMatcher } from '../../src/text-position/match.js'; 27 | import { evaluateXPath, assertRangeEquals } from '../utils.js'; 28 | import type { RangeInfo } from '../utils.js'; 29 | import { testCases } from './match-cases.js'; 30 | 31 | const domParser = new DOMParser(); 32 | 33 | describe('createTextPositionSelectorMatcher', () => { 34 | for (const [name, { html, selector, expected }] of Object.entries( 35 | testCases, 36 | )) { 37 | it(`works for case: '${name}'`, async () => { 38 | const doc = domParser.parseFromString(html, 'text/html'); 39 | await testMatcher(doc, selector, expected); 40 | }); 41 | } 42 | 43 | it('handles adjacent text nodes', async () => { 44 | const { html, selector } = testCases['simple']; 45 | const doc = domParser.parseFromString(html, 'text/html'); 46 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 47 | 48 | textNode.splitText(16); 49 | // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) 50 | // → [ 'l😃rem ipsum dol', 'or amet yada yada' ] 51 | 52 | await testMatcher(doc, selector, [ 53 | { 54 | startContainerXPath: '//b/text()[1]', 55 | startOffset: 13, 56 | endContainerXPath: '//b/text()[2]', 57 | endOffset: 5, 58 | }, 59 | ]); 60 | }); 61 | 62 | it('handles empty text nodes', async () => { 63 | const { html, selector } = testCases['simple']; 64 | const doc = domParser.parseFromString(html, 'text/html'); 65 | 66 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 67 | textNode.splitText(textNode.length); 68 | textNode.splitText(21); 69 | textNode.splitText(21); 70 | textNode.splitText(18); 71 | textNode.splitText(18); 72 | textNode.splitText(13); 73 | textNode.splitText(13); 74 | textNode.splitText(0); 75 | // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) 76 | // → [ '', 'l😃rem ipsum ', '', 'dolor', '', ' am', '', 'et yada yada', '' ] 77 | 78 | await testMatcher(doc, selector, [ 79 | { 80 | startContainerXPath: '//b/text()[4]', // "dolor" 81 | startOffset: 0, 82 | endContainerXPath: '//b/text()[8]', // "et yada yada" 83 | endOffset: 0, 84 | }, 85 | ]); 86 | }); 87 | 88 | it('works when scope spans one text node’s contents, matching its first characters', async () => { 89 | const { html, selector, expected } = testCases['first characters']; 90 | const doc = domParser.parseFromString(html, 'text/html'); 91 | 92 | const scope = doc.createRange(); 93 | scope.selectNodeContents(evaluateXPath(doc, '//b/text()')); 94 | 95 | await testMatcher(scope, selector, expected); 96 | }); 97 | 98 | it('works when scope starts with an empty text node, matching its first characters', async () => { 99 | const { html, selector } = testCases['first characters']; 100 | const doc = domParser.parseFromString(html, 'text/html'); 101 | 102 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 103 | textNode.splitText(0); 104 | 105 | const scope = doc.createRange(); 106 | scope.selectNodeContents(evaluateXPath(doc, '//b')); 107 | 108 | await testMatcher(scope, selector, [ 109 | { 110 | startContainerXPath: '//b/text()[2]', 111 | startOffset: 0, 112 | endContainerXPath: '//b/text()[2]', 113 | endOffset: 12, 114 | }, 115 | ]); 116 | }); 117 | 118 | it('works when scope has both ends within one text node', async () => { 119 | const { html, expected } = testCases['simple']; 120 | 121 | const doc = domParser.parseFromString(html, 'text/html'); 122 | 123 | // Use the substring ‘ipsum dolor amet’ as scope. 124 | const scope = doc.createRange(); 125 | scope.setStart(evaluateXPath(doc, '//b/text()'), 7); 126 | scope.setEnd(evaluateXPath(doc, '//b/text()'), 23); 127 | 128 | const selector: TextPositionSelector = { 129 | type: 'TextPositionSelector', 130 | start: 6, 131 | end: 14, 132 | }; 133 | 134 | await testMatcher(scope, selector, expected); 135 | }); 136 | 137 | it('works when scope has both ends inside text nodes', async () => { 138 | const { html, expected } = testCases['across elements']; 139 | const doc = domParser.parseFromString(html, 'text/html'); 140 | 141 | // Use the substring ‘sum dolor am’ as scope. 142 | const scope = doc.createRange(); 143 | scope.setStart(evaluateXPath(doc, '//i/text()'), 2); 144 | scope.setEnd(evaluateXPath(doc, '//u/text()'), 2); 145 | 146 | const selector: TextPositionSelector = { 147 | type: 'TextPositionSelector', 148 | start: 4, 149 | end: 12, 150 | }; 151 | 152 | await testMatcher(scope, selector, expected); 153 | }); 154 | 155 | it('works when scope has both ends inside an element', async () => { 156 | const { html, expected } = testCases['across elements']; 157 | const doc = domParser.parseFromString(html, 'text/html'); 158 | 159 | const scope = doc.createRange(); 160 | scope.setStart(evaluateXPath(doc, '//b'), 1); // before the 161 | scope.setEnd(evaluateXPath(doc, '//b'), 4); // before the " yada yada" 162 | const selector: TextPositionSelector = { 163 | type: 'TextPositionSelector', 164 | start: 6, 165 | end: 14, 166 | }; 167 | await testMatcher(scope, selector, expected); 168 | }); 169 | }); 170 | 171 | async function testMatcher( 172 | scope: Node | Range, 173 | selector: TextPositionSelector, 174 | expected: RangeInfo[], 175 | ) { 176 | const matcher = createTextPositionSelectorMatcher(selector); 177 | let count = 0; 178 | for await (const match of matcher(scope)) { 179 | assertRangeEquals(match, expected[count++]); 180 | } 181 | assert.equal(count, expected.length, 'Wrong number of matches.'); 182 | } 183 | -------------------------------------------------------------------------------- /packages/selector/src/text/code-point-seeker.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { Chunk } from './chunker.js'; 25 | import type { Seeker } from './seeker.js'; 26 | 27 | /** 28 | * Seeks through text counting Unicode *code points* instead of *code units*. 29 | * 30 | * Javascript characters correspond to 16 bits *code units*, hence two such 31 | * ‘characters’ might together constitute a single Unicode character (i.e. a 32 | * *code point*). The {@link CodePointSeeker} allows to ignore this 33 | * variable-length encoding, by counting code points instead. 34 | * 35 | * It is made to wrap a {@link Seeker} that counts code units (presumably a 36 | * {@link TextSeeker}), which must be passed to its {@link constructor}. 37 | * 38 | * When reading from the `CodePointSeeker`, the returned values is not a string 39 | * but an array of strings, each containing one code point (thus each having a 40 | * `length` that is either 1 or 2). 41 | * 42 | * @public 43 | */ 44 | export class CodePointSeeker> 45 | implements Seeker { 46 | position = 0; 47 | 48 | /** 49 | * 50 | * @param raw The {@link Seeker} to wrap, which counts in code *units* (e.g. 51 | * a {@link TextSeeker}). It should have {@link Seeker.position | position} 52 | * `0` and its methods must no longer be used directly if the 53 | * `CodePointSeeker`’s position is to remain correct. 54 | */ 55 | constructor(public readonly raw: Seeker) {} 56 | 57 | seekBy(length: number): void { 58 | this.seekTo(this.position + length); 59 | } 60 | 61 | seekTo(target: number): void { 62 | this._readOrSeekTo(false, target); 63 | } 64 | 65 | read(length: number, roundUp?: boolean): string[] { 66 | return this.readTo(this.position + length, roundUp); 67 | } 68 | 69 | readTo(target: number, roundUp?: boolean): string[] { 70 | return this._readOrSeekTo(true, target, roundUp); 71 | } 72 | 73 | get currentChunk(): TChunk { 74 | return this.raw.currentChunk; 75 | } 76 | 77 | get offsetInChunk(): number { 78 | return this.raw.offsetInChunk; 79 | } 80 | 81 | seekToChunk(target: TChunk, offset = 0): void { 82 | this._readOrSeekToChunk(false, target, offset); 83 | } 84 | 85 | readToChunk(target: TChunk, offset = 0): string[] { 86 | return this._readOrSeekToChunk(true, target, offset); 87 | } 88 | 89 | private _readOrSeekToChunk( 90 | read: true, 91 | target: TChunk, 92 | offset?: number, 93 | ): string[]; 94 | private _readOrSeekToChunk( 95 | read: false, 96 | target: TChunk, 97 | offset?: number, 98 | ): void; 99 | private _readOrSeekToChunk(read: boolean, target: TChunk, offset = 0) { 100 | const oldRawPosition = this.raw.position; 101 | 102 | let s = this.raw.readToChunk(target, offset); 103 | 104 | const movedForward = this.raw.position >= oldRawPosition; 105 | 106 | if (movedForward && endsWithinCharacter(s)) { 107 | this.raw.seekBy(-1); 108 | s = s.slice(0, -1); 109 | } else if (!movedForward && startsWithinCharacter(s)) { 110 | this.raw.seekBy(1); 111 | s = s.slice(1); 112 | } 113 | 114 | const result = [...s]; 115 | 116 | this.position = movedForward 117 | ? this.position + result.length 118 | : this.position - result.length; 119 | 120 | if (read) return result; 121 | } 122 | 123 | private _readOrSeekTo( 124 | read: true, 125 | target: number, 126 | roundUp?: boolean, 127 | ): string[]; 128 | private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void; 129 | private _readOrSeekTo( 130 | read: boolean, 131 | target: number, 132 | roundUp = false, 133 | ): string[] | void { 134 | let result: string[] = []; 135 | 136 | if (this.position < target) { 137 | let unpairedSurrogate = ''; 138 | let characters: string[] = []; 139 | while (this.position < target) { 140 | let s = unpairedSurrogate + this.raw.read(1, true); 141 | if (endsWithinCharacter(s)) { 142 | unpairedSurrogate = s.slice(-1); // consider this half-character part of the next string. 143 | s = s.slice(0, -1); 144 | } else { 145 | unpairedSurrogate = ''; 146 | } 147 | characters = [...s]; 148 | this.position += characters.length; 149 | if (read) result = result.concat(characters); 150 | } 151 | if (unpairedSurrogate) this.raw.seekBy(-1); // align with the last complete character. 152 | if (!roundUp && this.position > target) { 153 | const overshootInCodePoints = this.position - target; 154 | const overshootInCodeUnits = characters 155 | .slice(-overshootInCodePoints) 156 | .join('').length; 157 | this.position -= overshootInCodePoints; 158 | this.raw.seekBy(-overshootInCodeUnits); 159 | } 160 | } else { 161 | // Nearly equal to the if-block, but moving backward in the text. 162 | let unpairedSurrogate = ''; 163 | let characters: string[] = []; 164 | while (this.position > target) { 165 | let s = this.raw.read(-1, true) + unpairedSurrogate; 166 | if (startsWithinCharacter(s)) { 167 | unpairedSurrogate = s[0]; 168 | s = s.slice(1); 169 | } else { 170 | unpairedSurrogate = ''; 171 | } 172 | characters = [...s]; 173 | this.position -= characters.length; 174 | if (read) result = characters.concat(result); 175 | } 176 | if (unpairedSurrogate) this.raw.seekBy(1); 177 | if (!roundUp && this.position < target) { 178 | const overshootInCodePoints = target - this.position; 179 | const overshootInCodeUnits = characters 180 | .slice(0, overshootInCodePoints) 181 | .join('').length; 182 | this.position += overshootInCodePoints; 183 | this.raw.seekBy(overshootInCodeUnits); 184 | } 185 | } 186 | 187 | if (read) return result; 188 | } 189 | } 190 | 191 | function endsWithinCharacter(s: string) { 192 | const codeUnit = s.charCodeAt(s.length - 1); 193 | return 0xd800 <= codeUnit && codeUnit <= 0xdbff; 194 | } 195 | 196 | function startsWithinCharacter(s: string) { 197 | const codeUnit = s.charCodeAt(0); 198 | return 0xdc00 <= codeUnit && codeUnit <= 0xdfff; 199 | } 200 | -------------------------------------------------------------------------------- /packages/dom/test/text-quote/match.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import type { TextQuoteSelector } from '@apache-annotator/selector'; 26 | import { createTextQuoteSelectorMatcher } from '../../src/text-quote/match.js'; 27 | import { evaluateXPath, assertRangeEquals } from '../utils.js'; 28 | import type { RangeInfo } from '../utils.js'; 29 | import { testCases } from './match-cases.js'; 30 | 31 | const domParser = new DOMParser(); 32 | 33 | describe('createTextQuoteSelectorMatcher', () => { 34 | for (const [name, { html, selector, expected }] of Object.entries( 35 | testCases, 36 | )) { 37 | it(`works for case: '${name}'`, async () => { 38 | const doc = domParser.parseFromString(html, 'text/html'); 39 | await testMatcher(doc, doc, selector, expected); 40 | }); 41 | } 42 | 43 | describe.skip('Is resistant to splitting text nodes', () => { 44 | for (const [name, { html, selector, expected }] of Object.entries( 45 | testCases, 46 | )) { 47 | it(`for case: '${name}'`, async () => { 48 | const doc = domParser.parseFromString(html, 'text/html'); 49 | await testMatcher(doc, doc, selector, expected, true); 50 | }); 51 | } 52 | }); 53 | 54 | it('handles adjacent text nodes', async () => { 55 | const { html, selector } = testCases['simple']; 56 | const doc = domParser.parseFromString(html, 'text/html'); 57 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 58 | 59 | for (let index = textNode.length - 1; index > 0; index--) 60 | textNode.splitText(index); 61 | // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) 62 | // → 'l', 'o', 'r', 'e', 'm', … 63 | 64 | await testMatcher(doc, doc, selector, [ 65 | { 66 | startContainerXPath: '//b/text()[13]', 67 | startOffset: 0, 68 | endContainerXPath: '//b/text()[20]', 69 | endOffset: 1, 70 | }, 71 | ]); 72 | }); 73 | 74 | it('handles empty text nodes', async () => { 75 | const { html, selector } = testCases['simple']; 76 | const doc = domParser.parseFromString(html, 'text/html'); 77 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 78 | textNode.splitText(textNode.length); 79 | textNode.splitText(20); 80 | textNode.splitText(20); 81 | textNode.splitText(17); 82 | textNode.splitText(17); 83 | textNode.splitText(12); 84 | textNode.splitText(12); 85 | textNode.splitText(0); 86 | // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) 87 | // → '', 'lorem ipsum ', '', 'dolor', '', ' am', '', 'et yada yada', '' 88 | 89 | await testMatcher(doc, doc, selector, [ 90 | { 91 | startContainerXPath: '//b/text()[4]', // "dolor" 92 | startOffset: 0, 93 | endContainerXPath: '//b/text()[6]', // " am" 94 | endOffset: 3, 95 | }, 96 | ]); 97 | }); 98 | 99 | it('works when scope spans one text node’s contents, matching its first characters', async () => { 100 | const { html, selector, expected } = testCases['first characters']; 101 | const doc = domParser.parseFromString(html, 'text/html'); 102 | 103 | const scope = doc.createRange(); 104 | scope.selectNodeContents(evaluateXPath(doc, '//b/text()')); 105 | 106 | await testMatcher(doc, scope, selector, expected); 107 | }); 108 | 109 | it('works when scope starts with an empty text node, matching its first characters', async () => { 110 | const { html, selector } = testCases['first characters']; 111 | const doc = domParser.parseFromString(html, 'text/html'); 112 | 113 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 114 | textNode.splitText(0); 115 | 116 | const scope = doc.createRange(); 117 | scope.selectNodeContents(evaluateXPath(doc, '//b')); 118 | 119 | await testMatcher(doc, scope, selector, [ 120 | { 121 | startContainerXPath: '//b/text()[2]', 122 | startOffset: 0, 123 | endContainerXPath: '//b/text()[2]', 124 | endOffset: 11, 125 | }, 126 | ]); 127 | }); 128 | 129 | it('works when scope has both ends within one text node', async () => { 130 | const { html, selector, expected } = testCases['simple']; 131 | const doc = domParser.parseFromString(html, 'text/html'); 132 | 133 | // Use the substring ‘ipsum dolor amet’ as scope. 134 | const scope = doc.createRange(); 135 | scope.setStart(evaluateXPath(doc, '//b/text()'), 6); 136 | scope.setEnd(evaluateXPath(doc, '//b/text()'), 22); 137 | await testMatcher(doc, scope, selector, expected); 138 | }); 139 | 140 | it('works when scope has both ends inside text nodes', async () => { 141 | const { html, selector, expected } = testCases['across elements']; 142 | const doc = domParser.parseFromString(html, 'text/html'); 143 | 144 | // Use the substring ‘sum dolor am’ as scope. 145 | const scope = doc.createRange(); 146 | scope.setStart(evaluateXPath(doc, '//i/text()'), 2); 147 | scope.setEnd(evaluateXPath(doc, '//u/text()'), 2); 148 | await testMatcher(doc, scope, selector, expected); 149 | }); 150 | 151 | it('works when scope has both ends inside an element', async () => { 152 | const { html, selector, expected } = testCases['across elements']; 153 | const doc = domParser.parseFromString(html, 'text/html'); 154 | 155 | const scope = doc.createRange(); 156 | scope.setStart(evaluateXPath(doc, '//b'), 1); // before the 157 | scope.setEnd(evaluateXPath(doc, '//b'), 4); // before the " yada yada" 158 | await testMatcher(doc, scope, selector, expected); 159 | }); 160 | 161 | it('ignores quote when scope is an empty range', async () => { 162 | const { html, selector } = testCases['simple']; 163 | const doc = domParser.parseFromString(html, 'text/html'); 164 | 165 | const scope = doc.createRange(); 166 | await testMatcher(doc, scope, selector, []); 167 | }); 168 | 169 | it('ignores quote extending just beyond scope', async () => { 170 | const { html, selector } = testCases['simple']; 171 | const doc = domParser.parseFromString(html, 'text/html'); 172 | 173 | const scope = doc.createRange(); 174 | scope.setStart(evaluateXPath(doc, '//b/text()'), 0); 175 | scope.setEnd(evaluateXPath(doc, '//b/text()'), 19); 176 | await testMatcher(doc, scope, selector, []); 177 | }); 178 | 179 | it('ignores quote starting just before scope', async () => { 180 | const { html, selector } = testCases['simple']; 181 | const doc = domParser.parseFromString(html, 'text/html'); 182 | 183 | const scope = doc.createRange(); 184 | scope.setStart(evaluateXPath(doc, '//b/text()'), 13); 185 | scope.setEnd(evaluateXPath(doc, '//b/text()'), 32); 186 | await testMatcher(doc, scope, selector, []); 187 | }); 188 | }); 189 | 190 | async function testMatcher( 191 | doc: Document, 192 | scope: Node | Range, 193 | selector: TextQuoteSelector, 194 | expected: RangeInfo[], 195 | mutateDom = false, 196 | ) { 197 | const matcher = createTextQuoteSelectorMatcher(selector); 198 | let count = 0; 199 | for await (const match of matcher(scope)) { 200 | assertRangeEquals(match, expected[count++]); 201 | if (mutateDom) { 202 | const wrapperNode = doc.createElement('mark'); 203 | match.surroundContents(wrapperNode); 204 | } 205 | } 206 | assert.equal(count, expected.length, 'Wrong number of matches.'); 207 | } 208 | -------------------------------------------------------------------------------- /packages/dom/test/highlight-text/highlight-text.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import { strict as assert } from 'assert'; 25 | import { highlightText } from '../../src/highlight-text.js'; 26 | import type { RangeInfo } from '../utils.js'; 27 | import { hydrateRange, evaluateXPath } from '../utils.js'; 28 | 29 | const domParser = new DOMParser(); 30 | 31 | const testCases: { 32 | [name: string]: { 33 | inputHtml: string; 34 | range: RangeInfo; 35 | tagName?: string; 36 | attributes?: Record; 37 | expectedHtml: string; 38 | }; 39 | } = { 40 | 'single text node': { 41 | inputHtml: 'lorem ipsum dolor amet yada yada', 42 | range: { 43 | startContainerXPath: '//b/text()', 44 | startOffset: 12, 45 | endContainerXPath: '//b/text()', 46 | endOffset: 20, 47 | }, 48 | expectedHtml: 'lorem ipsum dolor amet yada yada', 49 | }, 50 | 'across elements': { 51 | inputHtml: 'lorem ipsum dolor amet yada yada', 52 | range: { 53 | startContainerXPath: '//b/text()[2]', 54 | startOffset: 1, 55 | endContainerXPath: '//u/text()', 56 | endOffset: 2, 57 | }, 58 | expectedHtml: 59 | 'lorem ipsum dolor amet yada yada', 60 | }, 61 | 'collapsed range': { 62 | inputHtml: 'lorem ipsum dolor amet yada yada', 63 | range: { 64 | startContainerXPath: '//b/text()', 65 | startOffset: 12, 66 | endContainerXPath: '//b/text()', 67 | endOffset: 12, 68 | }, 69 | expectedHtml: 'lorem ipsum dolor amet yada yada', 70 | }, 71 | 'custom tag name': { 72 | inputHtml: 'lorem ipsum dolor amet yada yada', 73 | range: { 74 | startContainerXPath: '//b/text()', 75 | startOffset: 12, 76 | endContainerXPath: '//b/text()', 77 | endOffset: 20, 78 | }, 79 | tagName: 'span', 80 | expectedHtml: 'lorem ipsum dolor amet yada yada', 81 | }, 82 | 'custom attributes': { 83 | inputHtml: 'lorem ipsum dolor amet yada yada', 84 | range: { 85 | startContainerXPath: '//b/text()', 86 | startOffset: 12, 87 | endContainerXPath: '//b/text()', 88 | endOffset: 20, 89 | }, 90 | attributes: { 91 | class: 'red', 92 | }, 93 | expectedHtml: 94 | 'lorem ipsum dolor amet yada yada', 95 | }, 96 | 'overlapping highlight': { 97 | // Starts off from the result of the 'single text node' case. 98 | inputHtml: 'lorem ipsum dolor amet yada yada', 99 | range: { 100 | startContainerXPath: '//mark/text()', 101 | startOffset: 6, 102 | endContainerXPath: '//b/text()[2]', 103 | endOffset: 7, 104 | }, 105 | tagName: 'mark2', 106 | expectedHtml: 107 | 'lorem ipsum dolor amet yada yada', 108 | }, 109 | }; 110 | 111 | describe('highlightText', () => { 112 | for (const [ 113 | name, 114 | { inputHtml, range, tagName, attributes, expectedHtml }, 115 | ] of Object.entries(testCases)) { 116 | it(`works for case: ${name}`, () => { 117 | const doc = domParser.parseFromString(inputHtml, 'text/html'); 118 | 119 | // Invoke highlightText for the specified Range, and check the result. 120 | const removeHighlights = highlightText( 121 | hydrateRange(range, doc), 122 | tagName, 123 | attributes, 124 | ); 125 | assert.equal(doc.body.innerHTML, expectedHtml); 126 | 127 | // Remove the highlight again and check that we end up exactly how we started. 128 | removeHighlights(); 129 | assert.equal(doc.body.innerHTML, inputHtml); 130 | }); 131 | } 132 | 133 | it('works on adjacent text nodes', () => { 134 | const inputHtml = 'lorem ipsum dolor amet yada yada'; 135 | const doc = domParser.parseFromString(inputHtml, 'text/html'); 136 | 137 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 138 | textNode.splitText(15); // after 'dol' 139 | 140 | const range = doc.createRange(); 141 | range.setStart(evaluateXPath(doc, '//b/text()[1]'), 12); // before 'dolor am' 142 | range.setEnd(evaluateXPath(doc, '//b/text()[2]'), 20 - 15); // after 'dolor am' 143 | 144 | const removeHighlights = highlightText(range); 145 | const expectedHtml = 146 | 'lorem ipsum dolor amet yada yada'; 147 | assert.equal(doc.body.innerHTML, expectedHtml); 148 | 149 | removeHighlights(); 150 | assert.equal(doc.body.innerHTML, inputHtml); 151 | }); 152 | 153 | it('also marks empty text nodes', () => { 154 | const inputHtml = 'lorem ipsum dolor amet yada yada'; 155 | const doc = domParser.parseFromString(inputHtml, 'text/html'); 156 | 157 | const textNode = evaluateXPath(doc, '//b/text()') as Text; 158 | textNode.splitText(15); 159 | textNode.splitText(15); // Split the node twice to create an empty text node. 160 | 161 | const range = doc.createRange(); 162 | range.setStart(evaluateXPath(doc, '//b/text()[1]'), 12); // before 'dolor am' 163 | range.setEnd(evaluateXPath(doc, '//b/text()[3]'), 20 - 15); // after 'dolor am' 164 | 165 | const removeHighlights = highlightText(range); 166 | const expectedHtml = 167 | 'lorem ipsum dolor amet yada yada'; 168 | assert.equal(doc.body.innerHTML, expectedHtml); 169 | 170 | removeHighlights(); 171 | assert.equal(doc.body.innerHTML, inputHtml); 172 | }); 173 | 174 | it('ignores a range that does not contain Text nodes', () => { 175 | const inputHtml = `Try highlighting this image: — would that work?`; 176 | const doc = domParser.parseFromString(inputHtml, 'text/html'); 177 | 178 | const range = doc.createRange(); 179 | range.selectNode(evaluateXPath(doc, '//img')); 180 | 181 | const removeHighlights = highlightText(range); 182 | assert.equal(doc.body.innerHTML, inputHtml); 183 | 184 | removeHighlights(); 185 | assert.equal(doc.body.innerHTML, inputHtml); 186 | }); 187 | 188 | it('correctly removes multiple highlights (fifo order)', () => { 189 | const { inputHtml, range } = testCases['single text node']; 190 | const { range: range2, expectedHtml } = testCases['overlapping highlight']; 191 | const doc = domParser.parseFromString(inputHtml, 'text/html'); 192 | 193 | const removeHighlights1 = highlightText(hydrateRange(range, doc)); 194 | const removeHighlights2 = highlightText(hydrateRange(range2, doc), 'mark2'); 195 | assert.equal(doc.body.innerHTML, expectedHtml); 196 | 197 | removeHighlights1(); 198 | removeHighlights2(); 199 | assert.equal(doc.body.innerHTML, inputHtml); 200 | }); 201 | 202 | it('correctly removes multiple highlights (lifo order)', () => { 203 | const { inputHtml, range } = testCases['single text node']; 204 | const { range: range2, expectedHtml } = testCases['overlapping highlight']; 205 | const doc = domParser.parseFromString(inputHtml, 'text/html'); 206 | 207 | const removeHighlights1 = highlightText(hydrateRange(range, doc)); 208 | const removeHighlights2 = highlightText(hydrateRange(range2, doc), 'mark2'); 209 | assert.equal(doc.body.innerHTML, expectedHtml); 210 | 211 | removeHighlights2(); 212 | removeHighlights1(); 213 | assert.equal(doc.body.innerHTML, inputHtml); 214 | }); 215 | }); 216 | -------------------------------------------------------------------------------- /packages/selector/src/text/match-text-quote.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { TextQuoteSelector } from '../types.js'; 25 | import type { Chunk, Chunker, ChunkRange } from './chunker.js'; 26 | 27 | /** 28 | * Find occurrences in a text matching the given {@link TextQuoteSelector}. 29 | * 30 | * This performs an exact search the selector’s quote (including prefix and 31 | * suffix) within the given text. 32 | * 33 | * Note the match is based on strict character-by-character equivalence, i.e. 34 | * it is sensitive to whitespace, capitalisation, etc. 35 | * 36 | * This is an abstract implementation of the function’s logic, which expects a 37 | * generic {@link Chunker} to represent the text, and returns an (async) 38 | * generator of {@link ChunkRange}s to represent the matches. 39 | * 40 | * See {@link dom.createTextQuoteSelectorMatcher} for a 41 | * wrapper around this implementation which applies it to the text of an HTML 42 | * DOM. 43 | * 44 | * The function is curried, taking first the selector and then the text. 45 | * 46 | * As there may be multiple matches for a given selector (when its prefix and 47 | * suffix attributes are not sufficient to disambiguate it), the matcher will 48 | * return an (async) generator that produces each match in the order they are 49 | * found in the text. 50 | * 51 | * *XXX Modifying the Chunks while the search is still running can mess up and 52 | * result in an error or an infinite loop. See [issue #112](https://github.com/apache/incubator-annotator/issues/112).* 53 | * 54 | * @example 55 | * ``` 56 | * const selector = { type: 'TextQuoteSelector', exact: 'banana' }; 57 | * const matches = textQuoteSelectorMatcher(selector)(textChunks); 58 | * for await (match of matches) console.log(match); 59 | * // ⇒ { startChunk: { … }, startIndex: 187, endChunk: { … }, endIndex: 193 } 60 | * // ⇒ { startChunk: { … }, startIndex: 631, endChunk: { … }, endIndex: 637 } 61 | * ``` 62 | * 63 | * @param selector - The {@link TextQuoteSelector} to be anchored 64 | * @returns a {@link Matcher} function that applies `selector` to a given text 65 | * 66 | * @public 67 | */ 68 | export function textQuoteSelectorMatcher( 69 | selector: TextQuoteSelector, 70 | ): >( 71 | scope: Chunker, 72 | ) => AsyncGenerator, void, void> { 73 | return async function* matchAll>( 74 | textChunks: Chunker, 75 | ) { 76 | const exact = selector.exact; 77 | const prefix = selector.prefix || ''; 78 | const suffix = selector.suffix || ''; 79 | const searchPattern = prefix + exact + suffix; 80 | 81 | // The code below essentially just performs string.indexOf(searchPattern), 82 | // but on a string that is chopped up in multiple chunks. It runs a loop 83 | // containing three steps: 84 | // 1. Continue checking any partial matches from the previous chunk(s). 85 | // 2. Try find the whole pattern in the chunk (possibly multiple times). 86 | // 3. Check if this chunk ends with a partial match (or even multiple partial matches). 87 | 88 | interface PartialMatch { 89 | startChunk?: TChunk; 90 | startIndex?: number; 91 | endChunk?: TChunk; 92 | endIndex?: number; 93 | charactersMatched: number; 94 | } 95 | let partialMatches: PartialMatch[] = []; 96 | 97 | let isFirstChunk = true; 98 | do { 99 | const chunk = textChunks.currentChunk; 100 | const chunkValue = chunk.data; 101 | 102 | // 1. Continue checking any partial matches from the previous chunk(s). 103 | const remainingPartialMatches: typeof partialMatches = []; 104 | for (const partialMatch of partialMatches) { 105 | const charactersMatched = partialMatch.charactersMatched; 106 | 107 | // If the current chunk contains the start and/or end of the match, record these. 108 | if (partialMatch.endChunk === undefined) { 109 | const charactersUntilMatchEnd = 110 | prefix.length + exact.length - charactersMatched; 111 | if (charactersUntilMatchEnd <= chunkValue.length) { 112 | partialMatch.endChunk = chunk; 113 | partialMatch.endIndex = charactersUntilMatchEnd; 114 | } 115 | } 116 | if (partialMatch.startChunk === undefined) { 117 | const charactersUntilMatchStart = prefix.length - charactersMatched; 118 | if ( 119 | charactersUntilMatchStart < chunkValue.length || 120 | partialMatch.endChunk !== undefined // handles an edge case: an empty quote at the end of a chunk. 121 | ) { 122 | partialMatch.startChunk = chunk; 123 | partialMatch.startIndex = charactersUntilMatchStart; 124 | } 125 | } 126 | 127 | const charactersUntilSuffixEnd = 128 | searchPattern.length - charactersMatched; 129 | if (charactersUntilSuffixEnd <= chunkValue.length) { 130 | if ( 131 | chunkValue.startsWith(searchPattern.substring(charactersMatched)) 132 | ) { 133 | yield partialMatch as ChunkRange; // all fields are certainly defined now. 134 | } 135 | } else if ( 136 | chunkValue === 137 | searchPattern.substring( 138 | charactersMatched, 139 | charactersMatched + chunkValue.length, 140 | ) 141 | ) { 142 | // The chunk is too short to complete the match; comparison has to be completed in subsequent chunks. 143 | partialMatch.charactersMatched += chunkValue.length; 144 | remainingPartialMatches.push(partialMatch); 145 | } 146 | } 147 | partialMatches = remainingPartialMatches; 148 | 149 | // 2. Try find the whole pattern in the chunk (possibly multiple times). 150 | if (searchPattern.length <= chunkValue.length) { 151 | let fromIndex = 0; 152 | while (fromIndex <= chunkValue.length) { 153 | const patternStartIndex = chunkValue.indexOf( 154 | searchPattern, 155 | fromIndex, 156 | ); 157 | if (patternStartIndex === -1) break; 158 | fromIndex = patternStartIndex + 1; 159 | 160 | // Handle edge case: an empty searchPattern would already have been yielded at the end of the last chunk. 161 | if ( 162 | patternStartIndex === 0 && 163 | searchPattern.length === 0 && 164 | !isFirstChunk 165 | ) 166 | continue; 167 | 168 | yield { 169 | startChunk: chunk, 170 | startIndex: patternStartIndex + prefix.length, 171 | endChunk: chunk, 172 | endIndex: patternStartIndex + prefix.length + exact.length, 173 | }; 174 | } 175 | } 176 | 177 | // 3. Check if this chunk ends with a partial match (or even multiple partial matches). 178 | let newPartialMatches: number[] = []; 179 | const searchStartPoint = Math.max( 180 | chunkValue.length - searchPattern.length + 1, 181 | 0, 182 | ); 183 | for (let i = searchStartPoint; i < chunkValue.length; i++) { 184 | const character = chunkValue[i]; 185 | newPartialMatches = newPartialMatches.filter( 186 | (partialMatchStartIndex) => 187 | character === searchPattern[i - partialMatchStartIndex], 188 | ); 189 | if (character === searchPattern[0]) newPartialMatches.push(i); 190 | } 191 | for (const partialMatchStartIndex of newPartialMatches) { 192 | const charactersMatched = chunkValue.length - partialMatchStartIndex; 193 | const partialMatch: PartialMatch = { 194 | charactersMatched, 195 | }; 196 | if (charactersMatched >= prefix.length + exact.length) { 197 | partialMatch.endChunk = chunk; 198 | partialMatch.endIndex = 199 | partialMatchStartIndex + prefix.length + exact.length; 200 | } 201 | if ( 202 | charactersMatched > prefix.length || 203 | partialMatch.endChunk !== undefined // handles an edge case: an empty quote at the end of a chunk. 204 | ) { 205 | partialMatch.startChunk = chunk; 206 | partialMatch.startIndex = partialMatchStartIndex + prefix.length; 207 | } 208 | partialMatches.push(partialMatch); 209 | } 210 | 211 | isFirstChunk = false; 212 | } while (textChunks.nextChunk() !== null); 213 | }; 214 | } 215 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSES/Apache-2.0.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /packages/selector/src/text/describe-text-quote.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Licensed to the Apache Software Foundation (ASF) under one 4 | * or more contributor license agreements. See the NOTICE file 5 | * distributed with this work for additional information 6 | * regarding copyright ownership. The ASF licenses this file 7 | * to you under the Apache License, Version 2.0 (the 8 | * "License"); you may not use this file except in compliance 9 | * with the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, 14 | * software distributed under the License is distributed on an 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | * KIND, either express or implied. See the License for the 17 | * specific language governing permissions and limitations 18 | * under the License. 19 | * 20 | * SPDX-FileCopyrightText: The Apache Software Foundation 21 | * SPDX-License-Identifier: Apache-2.0 22 | */ 23 | 24 | import type { TextQuoteSelector } from '../types.js'; 25 | import type { Chunk, Chunker, ChunkRange } from './chunker.js'; 26 | import { chunkRangeEquals } from './chunker.js'; 27 | import { textQuoteSelectorMatcher } from './match-text-quote.js'; 28 | import type { RelativeSeeker } from './seeker.js'; 29 | import { TextSeeker } from './seeker.js'; 30 | 31 | /** 32 | * @public 33 | */ 34 | export interface DescribeTextQuoteOptions { 35 | /** 36 | * Keep prefix and suffix to the minimum that is necessary to disambiguate 37 | * the quote. Use only if robustness against text variations is not required. 38 | */ 39 | minimalContext?: boolean; 40 | 41 | /** 42 | * Add prefix and suffix to quotes below this length, such that the total of 43 | * `prefix + exact + suffix` is at least this length. 44 | */ 45 | minimumQuoteLength?: number; 46 | 47 | /** 48 | * When attempting to find a whitespace to make the prefix/suffix start/end 49 | * (resp.) at a word boundary, give up after this number of characters. 50 | */ 51 | maxWordLength?: number; 52 | } 53 | 54 | /** 55 | * Returns a {@link TextQuoteSelector} that points at the target quote in the 56 | * given text. 57 | * 58 | * The selector will contain the exact target quote. In case this quote appears 59 | * multiple times in the text, sufficient context around the quote will be 60 | * included in the selector’s `prefix` and `suffix` attributes to disambiguate. 61 | * By default, more prefix and suffix are included than strictly required; both 62 | * in order to be robust against slight modifications, and in an attempt to not 63 | * end halfway a word (mainly for human readability). 64 | * 65 | * This is an abstract implementation of the function’s logic, which expects a 66 | * generic {@link Chunker} to represent the text, and a {@link ChunkRange} to 67 | * represent the target. 68 | * 69 | * See {@link dom.describeTextQuote} for a wrapper around this 70 | * implementation which applies it to the text of an HTML DOM. 71 | * 72 | * @param target - The range of characters that the selector should describe 73 | * @param scope - The text containing the target range; or, more accurately, a 74 | * function that produces {@link Chunker}s corresponding to this text. 75 | * @param options - Options to fine-tune the function’s behaviour. 76 | * @returns The {@link TextQuoteSelector} that describes `target`. 77 | * 78 | * @public 79 | */ 80 | export async function describeTextQuote>( 81 | target: ChunkRange, 82 | scope: () => Chunker, 83 | options: DescribeTextQuoteOptions = {}, 84 | ): Promise { 85 | const { 86 | minimalContext = false, 87 | minimumQuoteLength = 0, 88 | maxWordLength = 50, 89 | } = options; 90 | 91 | // Create a seeker to read the target quote and the context around it. 92 | // TODO Possible optimisation: as it need not be an AbsoluteSeeker, a 93 | // different implementation could provide direct ‘jump’ access in seekToChunk 94 | // (the scope’s Chunker would of course also have to support this). 95 | const seekerAtTarget = new TextSeeker(scope()); 96 | 97 | // Create a second seeker so that we will be able to simultaneously read 98 | // characters near both the target and an unintended match, if we find any. 99 | const seekerAtUnintendedMatch = new TextSeeker(scope()); 100 | 101 | // Read the target’s exact text. 102 | seekerAtTarget.seekToChunk(target.startChunk, target.startIndex); 103 | const exact = seekerAtTarget.readToChunk(target.endChunk, target.endIndex); 104 | 105 | // Start with an empty prefix and suffix. 106 | let prefix = ''; 107 | let suffix = ''; 108 | 109 | // If the quote is below the given minimum length, add some prefix & suffix. 110 | const currentQuoteLength = () => prefix.length + exact.length + suffix.length; 111 | if (currentQuoteLength() < minimumQuoteLength) { 112 | // Expand the prefix, but only to reach halfway towards the desired length. 113 | seekerAtTarget.seekToChunk( 114 | target.startChunk, 115 | target.startIndex - prefix.length, 116 | ); 117 | const length = Math.floor((minimumQuoteLength - currentQuoteLength()) / 2); 118 | prefix = seekerAtTarget.read(-length, false, true) + prefix; 119 | 120 | // If needed, expand the suffix to achieve the minimum length. 121 | if (currentQuoteLength() < minimumQuoteLength) { 122 | seekerAtTarget.seekToChunk( 123 | target.endChunk, 124 | target.endIndex + suffix.length, 125 | ); 126 | const length = minimumQuoteLength - currentQuoteLength(); 127 | suffix = suffix + seekerAtTarget.read(length, false, true); 128 | 129 | // We might have to expand the prefix again (if at the end of the scope). 130 | if (currentQuoteLength() < minimumQuoteLength) { 131 | seekerAtTarget.seekToChunk( 132 | target.startChunk, 133 | target.startIndex - prefix.length, 134 | ); 135 | const length = minimumQuoteLength - currentQuoteLength(); 136 | prefix = seekerAtTarget.read(-length, false, true) + prefix; 137 | } 138 | } 139 | } 140 | 141 | // Expand prefix & suffix to avoid them ending somewhere halfway in a word. 142 | if (!minimalContext) { 143 | seekerAtTarget.seekToChunk( 144 | target.startChunk, 145 | target.startIndex - prefix.length, 146 | ); 147 | prefix = readUntilWhitespace(seekerAtTarget, maxWordLength, true) + prefix; 148 | seekerAtTarget.seekToChunk( 149 | target.endChunk, 150 | target.endIndex + suffix.length, 151 | ); 152 | suffix = suffix + readUntilWhitespace(seekerAtTarget, maxWordLength, false); 153 | } 154 | 155 | // Search for matches of the quote using the current prefix and suffix. At 156 | // each unintended match we encounter, we extend the prefix or suffix to 157 | // ensure it will no longer match. 158 | while (true) { 159 | const tentativeSelector: TextQuoteSelector = { 160 | type: 'TextQuoteSelector', 161 | exact, 162 | prefix, 163 | suffix, 164 | }; 165 | 166 | const matches = textQuoteSelectorMatcher(tentativeSelector)(scope()); 167 | let nextMatch = await matches.next(); 168 | 169 | // If this match is the intended one, no need to act. 170 | // XXX This test is fragile: nextMatch and target are assumed to be normalised. 171 | if (!nextMatch.done && chunkRangeEquals(nextMatch.value, target)) { 172 | nextMatch = await matches.next(); 173 | } 174 | 175 | // If there are no more unintended matches, our selector is unambiguous! 176 | if (nextMatch.done) return tentativeSelector; 177 | 178 | // Possible optimisation: A subsequent search could safely skip the part we 179 | // already processed, instead of starting from the beginning again. But we’d 180 | // need the matcher to start at the seeker’s position, instead of searching 181 | // in the whole current chunk. Then we could just seek back to just after 182 | // the start of the prefix: seeker.seekBy(-prefix.length + 1); (don’t forget 183 | // to also correct for any changes in the prefix we will make below) 184 | 185 | // We’ll have to add more prefix/suffix to disqualify this unintended match. 186 | const unintendedMatch = nextMatch.value; 187 | 188 | // Count how many characters we’d need as a prefix to disqualify this match. 189 | seekerAtTarget.seekToChunk( 190 | target.startChunk, 191 | target.startIndex - prefix.length, 192 | ); 193 | seekerAtUnintendedMatch.seekToChunk( 194 | unintendedMatch.startChunk, 195 | unintendedMatch.startIndex - prefix.length, 196 | ); 197 | let extraPrefix = readUntilDifferent( 198 | seekerAtTarget, 199 | seekerAtUnintendedMatch, 200 | true, 201 | ); 202 | if (extraPrefix !== undefined && !minimalContext) 203 | extraPrefix = 204 | readUntilWhitespace(seekerAtTarget, maxWordLength, true) + extraPrefix; 205 | 206 | // Count how many characters we’d need as a suffix to disqualify this match. 207 | seekerAtTarget.seekToChunk( 208 | target.endChunk, 209 | target.endIndex + suffix.length, 210 | ); 211 | seekerAtUnintendedMatch.seekToChunk( 212 | unintendedMatch.endChunk, 213 | unintendedMatch.endIndex + suffix.length, 214 | ); 215 | let extraSuffix = readUntilDifferent( 216 | seekerAtTarget, 217 | seekerAtUnintendedMatch, 218 | false, 219 | ); 220 | if (extraSuffix !== undefined && !minimalContext) 221 | extraSuffix = 222 | extraSuffix + readUntilWhitespace(seekerAtTarget, maxWordLength, false); 223 | 224 | if (minimalContext) { 225 | // Use either the prefix or suffix, whichever is shortest. 226 | if ( 227 | extraPrefix !== undefined && 228 | (extraSuffix === undefined || extraPrefix.length <= extraSuffix.length) 229 | ) { 230 | prefix = extraPrefix + prefix; 231 | } else if (extraSuffix !== undefined) { 232 | suffix = suffix + extraSuffix; 233 | } else { 234 | throw new Error( 235 | 'Target cannot be disambiguated; how could that have happened‽', 236 | ); 237 | } 238 | } else { 239 | // For redundancy, expand both prefix and suffix. 240 | if (extraPrefix !== undefined) prefix = extraPrefix + prefix; 241 | if (extraSuffix !== undefined) suffix = suffix + extraSuffix; 242 | } 243 | } 244 | } 245 | 246 | function readUntilDifferent( 247 | seeker1: RelativeSeeker, 248 | seeker2: RelativeSeeker, 249 | reverse: boolean, 250 | ): string | undefined { 251 | let result = ''; 252 | while (true) { 253 | let nextCharacter: string; 254 | try { 255 | nextCharacter = seeker1.read(reverse ? -1 : 1); 256 | } catch (err) { 257 | return undefined; // Start/end of text reached: cannot expand result. 258 | } 259 | result = reverse ? nextCharacter + result : result + nextCharacter; 260 | 261 | // Check if the newly added character makes the result differ from the second seeker. 262 | let comparisonCharacter: string | undefined; 263 | try { 264 | comparisonCharacter = seeker2.read(reverse ? -1 : 1); 265 | } catch (err) { 266 | // A RangeError would merely mean seeker2 is exhausted. 267 | if (!(err instanceof RangeError)) throw err; 268 | } 269 | if (nextCharacter !== comparisonCharacter) return result; 270 | } 271 | } 272 | 273 | function readUntilWhitespace( 274 | seeker: RelativeSeeker, 275 | limit = Infinity, 276 | reverse = false, 277 | ): string { 278 | let result = ''; 279 | while (result.length < limit) { 280 | let nextCharacter: string; 281 | try { 282 | nextCharacter = seeker.read(reverse ? -1 : 1); 283 | } catch (err) { 284 | if (!(err instanceof RangeError)) throw err; 285 | break; // End/start of text reached. 286 | } 287 | 288 | // Stop if we reached whitespace. 289 | if (isWhitespace(nextCharacter)) { 290 | seeker.seekBy(reverse ? 1 : -1); // ‘undo’ the last read. 291 | break; 292 | } 293 | 294 | result = reverse ? nextCharacter + result : result + nextCharacter; 295 | } 296 | return result; 297 | } 298 | 299 | function isWhitespace(s: string): boolean { 300 | return /^\s+$/.test(s); 301 | } 302 | --------------------------------------------------------------------------------