├── .eslintignore ├── .eslintrc.js ├── .gitattributes ├── .gitignore ├── .vscode ├── extensions.json ├── launch.json ├── settings.json └── tasks.json ├── .vscodeignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Changelog.md ├── Debugging.md ├── LICENSE ├── README.md ├── Testing.md ├── assets ├── VSCode-BadString.png ├── VSCode-BinaryError.png ├── VSCode-InvisibleUnicode.png ├── VSCode-MixedEOLs.png ├── VSCode-UnicodeBOM.png ├── VSCode-bitmask-hover.png ├── VSCode-breadcrumbs.png ├── VSCode-command-palette.png ├── VSCode-endobj-hover.png ├── VSCode-hexstring-hover.png ├── VSCode-outline-view.png ├── VSCode-problem-report.png ├── VSCode-xrefHover.png ├── fdf-dark.png ├── fdf-light.png ├── pdf-dark.png └── pdf-light.png ├── client ├── package-lock.json ├── package.json ├── src │ ├── PDFFoldingRangeProvider.ts │ ├── extension.ts │ ├── pdfClientUtilities.ts │ ├── sankey-webview.ts │ ├── test │ │ ├── completion.test.ts │ │ ├── diagnostics.test.ts │ │ ├── helper.ts │ │ ├── index.ts │ │ └── runTest.ts │ └── types │ │ ├── constants.ts │ │ ├── index.ts │ │ └── tokenTypes.ts ├── testFixture │ ├── completion.txt │ └── diagnostics.txt ├── tsconfig.json └── tsconfig.tsbuildinfo ├── language-configuration.json ├── media ├── d3-sankey.js ├── d3.js ├── main.js └── vscode.css ├── package-lock.json ├── package.json ├── scripts ├── arlington-to-vscode.py └── e2e.sh ├── server ├── package-lock.json ├── package.json ├── src │ ├── grammar │ │ ├── grammar_pdfFile.ohm │ │ ├── grammar_pdfTokens.ohm │ │ └── grammar_pdfTokens.ohm-bundle.d.ts │ ├── models │ │ ├── ArlingtonPDFModel.ts │ │ └── PdfObject.ts │ ├── ohmParser.ts │ ├── parser │ │ ├── PdfParser.ts │ │ └── XrefInfoMatrix.ts │ ├── server.ts │ ├── types │ │ ├── constants.ts │ │ ├── documentTypes.ts │ │ ├── index.ts │ │ └── tokenTypes.ts │ └── utils │ │ ├── ArlingtonUtils.ts │ │ └── pdfUtils.ts ├── tsconfig.json └── tsconfig.tsbuildinfo ├── snippets ├── fdf-snippets.json └── pdf-snippets.json ├── syntaxes ├── fdf.tmLanguage.json ├── pdf.tmLanguage.json └── pdfstreams.tmLanguage.json ├── testing-resources ├── CDQ_WhitePaper_Accessibility.pdf ├── CompactedPDFSyntaxTest.pdf ├── ISO_32000-2-2020_Amd1.fdf ├── SyntaxChecker-INVALID.pdf ├── keyboard-shortcuts-macos-human-readable.pdf ├── keyboard-shortcuts-macos-qdf.pdf ├── keyboard-shortcuts-macos.pdf ├── significant-properties-human-readable.pdf ├── significant-properties-qdf.pdf └── significant-properties.pdf └── tsconfig.json /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules/** 2 | client/node_modules/** 3 | client/out/** 4 | server/node_modules/** 5 | server/out/** -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | /**@type {import('eslint').Linter.Config} */ 2 | // eslint-disable-next-line no-undef 3 | module.exports = { 4 | root: true, 5 | parser: '@typescript-eslint/parser', 6 | plugins: [ 7 | '@typescript-eslint', 8 | ], 9 | extends: [ 10 | 'eslint:recommended', 11 | 'plugin:@typescript-eslint/recommended', 12 | ], 13 | rules: { 14 | 'semi': [2, "always"], 15 | '@typescript-eslint/no-unused-vars': 0, 16 | '@typescript-eslint/no-explicit-any': 0, 17 | '@typescript-eslint/explicit-module-boundary-types': 0, 18 | '@typescript-eslint/no-non-null-assertion': 0, 19 | } 20 | }; -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pdf binary 2 | *.fdf binary 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | node_modules 3 | client/server 4 | .vscode-test 5 | .DS_Store 6 | *.vsix 7 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations. 3 | // Extension identifier format: ${publisher}.${name}. Example: vscode.csharp 4 | 5 | // List of extensions which should be recommended for users of this workspace. 6 | "recommendations": [ 7 | // "dbaeumer.vscode-eslint" 8 | "pdfassociation.pdf-cos-syntax" 9 | ] 10 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | // A launch configuration that compiles the extension and then opens it inside a new window 2 | { 3 | "version": "0.2.0", 4 | "configurations": [ 5 | { 6 | "type": "extensionHost", 7 | "request": "launch", 8 | "name": "Launch Client", 9 | "runtimeExecutable": "${execPath}", 10 | "sourceMaps": true, 11 | "args": ["--extensionDevelopmentPath=${workspaceRoot}"], 12 | "preLaunchTask": { 13 | "type": "npm", 14 | "script": "watch" 15 | } 16 | }, 17 | { 18 | "name": "Language Server E2E Test", 19 | "type": "extensionHost", 20 | "request": "launch", 21 | "runtimeExecutable": "${execPath}", 22 | "sourceMaps": true, 23 | "args": [ 24 | "--extensionDevelopmentPath=${workspaceRoot}", 25 | "--extensionTestsPath=${workspaceRoot}/client/out/test/index", 26 | "${workspaceRoot}/client/testFixture" 27 | ] 28 | }, 29 | { 30 | "name": "Attach to Server", 31 | "type": "node", 32 | "request": "attach", 33 | "port": 6009, 34 | "restart": true, 35 | "sourceMaps": true, 36 | "preLaunchTask": "npm: watch" 37 | } 38 | 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.insertSpaces": true, 3 | "typescript.tsc.autoDetect": "off", 4 | "typescript.preferences.quoteStyle": "single", 5 | "editor.codeActionsOnSave": { 6 | "source.fixAll.eslint": "explicit" 7 | } 8 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "npm", 6 | "script": "compile", 7 | "group": "build", 8 | "presentation": { 9 | "panel": "dedicated", 10 | "reveal": "never" 11 | }, 12 | "problemMatcher": [ 13 | "$tsc" 14 | ] 15 | }, 16 | { 17 | "type": "npm", 18 | "script": "watch", 19 | "isBackground": true, 20 | "group": { 21 | "kind": "build", 22 | "isDefault": true 23 | }, 24 | "presentation": { 25 | "panel": "dedicated", 26 | "reveal": "never" 27 | }, 28 | "problemMatcher": [ 29 | "$tsc-watch" 30 | ] 31 | } 32 | ] 33 | } -------------------------------------------------------------------------------- /.vscodeignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .vscodeignore 3 | .eslintignore 4 | *.vsix 5 | *.txt 6 | *.vsix 7 | *.git* 8 | Debugging.md 9 | Testing.md 10 | testing-resources/ 11 | client/testFixture/ 12 | node_modules/ 13 | scripts/ 14 | media/ 15 | **/tsconfig.json 16 | **/webpack.config.js 17 | **/*.ts 18 | **/*.map 19 | **/*.h 20 | **/*.hh 21 | **/*.cpp 22 | **/*.cc 23 | **/*.c 24 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, caste, color, religion, or sexual identity 11 | and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the 27 | overall community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or 32 | advances of any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official e-mail address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at [code-of-conduct@pdfa.org](mailto:code-of-conduct@pdfa.org) 64 | or directly to the CEO or Chair of the Board of Directors. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series 87 | of actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or 94 | permanent ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within 114 | the community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.0, available at 120 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 127 | at [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations 134 | 135 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | We warmly welcome all contributions and feedback on our work. 4 | 5 | Here are some important resources: 6 | 7 | * We require all participants to abide by the [Contributor Covenent Code of Conduct](https://github.com/pdf-association/.github/blob/main/CODE_OF_CONDUCT.md). 8 | 9 | * All contributions require acceptance of our [Intellectual Property Rights (IPR) Policy](https://www.pdfa.org/intellectual-property-rights-ipr-policy/). This policy was adapted from and is fully aligned with the [ISO/IEC/ITU Common Patent Policy](https://www.iso.org/iso-standards-and-patents.html). To complete this acceptance, please use our [Agreement to Participate form](https://forms.gle/2yJcsRofmiSQixEh9). 10 | 11 | * To assist both academic and industry researchers achieve high-quality and accurate PDF-oriented research outcomes, the PDF Association makes available a free peer-review service. By emailing [pdf-research-support@pdfa.org](mailto:pdf-research-support@pdfa.org), this service will link acknowledged experts in the PDF file format with journal editors, academic publishers, conference steering committees and researchers to provide expert peer-review of pre-print articles, whitepapers and presentations in relation to statements made about PDF. 12 | 13 | * Not all our efforts occur in the public arena. If you wish to be involved, then please join us as a [PDF Association Member](https://www.pdfa.org/become-a-member/). 14 | 15 | 16 | If you need to contact us, please email us at [info@pdfa.org](mailto:info@pdfa.org). 17 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## 0.1.6 - T.B.D. 9 | - (ADDED) Date string hover 10 | - (ADDED) Semantic token processing 11 | - (IMPROVED) Replaced inefficient regex-based parsing with Ohm.js based parser 12 | - (CHANGED) moved all processing to LSP server side. Client now calls server via an API 13 | - 14 | 15 | ## 0.1.5 - 2023-11-27 16 | - (ADDED) Code completion options for PDF names when `/` is pressed (from [Arlington PDF Model](https://github.com/pdf-association/arlington-pdf-model)) 17 | - (FIXED) Support for new (unsaved) PDF/FDF files 18 | - (ADDED) Custom commands to import images as new Image XObjects with `/ASCII85Decode`/`/ASCIIHexDecode` on raw pixels 19 | - (ADDED) Custom commands to import images as new Image XObjects with `/ASCII85Decode`/`/ASCIIHexDecode` on JPEGs (`/DCTDecode`) 20 | - (ADDED) Custom commands to import binary data as a new stream object with `/ASCII85Decode`/`/ASCIIHexDecode` filters 21 | - (ADDED) Custom commands to convert between `/ASCII85Decode`/`/ASCIIHexDecode` and uncompressed data 22 | - (ADDED) Custom commands to convert between PDF literal and hex strings 23 | - (ADDED) Custom command to convert 1 or more selected non-stream objects to a single PDF 1.5 object stream 24 | - (ADDED) Custom command to conventional cross reference table, trailer, etc. to a cross reference stream 25 | - (ADDED) Editor context submenu "PDF" with above commands 26 | - (ADDED) Status bar item showing selected lines 27 | - (ADDED) Outline tree view and breadcrumbs, based on simple marker-based parser (NOT a full PDF parser as this fails during editing) 28 | 29 | ## 0.1.4 - 2023-09-04 30 | - (IMPROVED) Hover hints for cross-reference table entries 31 | - (ADDED) Hover hints for `endstream` and `endobj` keywords 32 | - (ADDED) Hover hints for key names which are bitmasks (`/F`, `/Ff`, `/Flags`) 33 | - (ADDED) Hover hints for hex strings 34 | - (ADDED) "Go to" functionality now supports PDFs with incremental updates and potentially multiple objects with the same object ID 35 | - (IMPROVED, FIXED and ADDED) Additional validation checks of conventional cross reference tables to Problem panel. 36 | 37 | ## 0.1.3 - 2023-08-24 38 | - (ADDED) Hover hints for cross-reference table entries 39 | - (IMPROVED) bracket matching for dictionaries (`<<`,`>>`), arrays (`[`,`]`), and PostScript brackets (`{`/`}`). 40 | - (IMPROVED) auto-indent and auto-outdent for dictionaries (`<<`,`>>`), arrays (`[`,`]`), hex strings (`<`/`>`) and PostScript brackets (`{`/`}`). 41 | - (ADDED) auto-complete and auto-closing for dictionaries (`<<`,`>>`), arrays (`[`,`]`), literal strings (`(`/`)`), hex strings (`<`/`>`) and PostScript brackets (`{`/`}`). 42 | - (ADDED) LSP semantic token processor used by "go to" functionality to ensure correct token is located 43 | - (IMPROVED) TextMate grammar updates for syntax highlighting for PDFs with binary data 44 | - (IMPROVED) Folding support for objects, streams, conventional cross-reference tables and paired content stream operators 45 | - (ADDED and FIXED) Additional validation checks of conventional cross reference tables 46 | 47 | ## 0.1.2 - 2023-08-15 48 | - (ADDED) Distinguish handling of FDF and PDF for validation checks and snippets 49 | - (FIXED) Visual quality of icons (relevant to FDF) and made PNG backgrounds transparent 50 | - (ADDED) Added badges to the README 51 | - (ADDED) Additional validation checks of conventional cross reference tables 52 | - Code and packaging tidy-up 53 | - Update and improve this changelog 54 | 55 | ## 0.1.1 - 2023-08-15 56 | - (FIXED) Fix packaging of server for "Go to" functionality 57 | 58 | ## 0.1.0 - 2023-08-14 59 | - Initial release via Marketplace: https://marketplace.visualstudio.com/items?itemName=pdfassociation.pdf-cos-syntax 60 | 61 | ## 0.0.2 - 2023-08-13 (prerelease) 62 | - Initial pre-release for feedback. 63 | -------------------------------------------------------------------------------- /Debugging.md: -------------------------------------------------------------------------------- 1 | # Extension debugging and development environment 2 | 3 | Heavily documented sample code for https://code.visualstudio.com/api/language-extensions/language-server-extension-guide 4 | 5 | - https://code.visualstudio.com/api 6 | - https://code.visualstudio.com/api/language-extensions/language-server-extension-guide 7 | - https://langserver.org/ 8 | - https://microsoft.github.io/language-server-protocol/ 9 | 10 | 11 | ## Functionality 12 | 13 | This Language Server works for PDFs that are plain text. It has the following language features: 14 | - Completions 15 | - Diagnostics regenerated on each file change or configuration change 16 | 17 | It also includes an End-to-End test. 18 | 19 | ## Structure 20 | 21 | ``` 22 | . 23 | ├── client // Language Client 24 | │ ├── src 25 | │ │ ├── test // End to End tests for Language Client / Server 26 | │ │ └── extension.ts // Language Client entry point 27 | ├── package.json // The extension manifest. 28 | └── server // Language Server 29 | └── src 30 | └── server.ts // Language Server entry point 31 | ``` 32 | 33 | ## Running 34 | 35 | - Run `npm install` in this folder. This installs all necessary npm modules in both the client and server folder 36 | - Open VS Code on this folder. 37 | - Press Ctrl+Shift+B to start compiling the client and server in [watch mode](https://code.visualstudio.com/docs/editor/tasks#:~:text=The%20first%20entry%20executes,the%20HelloWorld.js%20file.). 38 | - Switch to the Run and Debug View in the Sidebar (Ctrl+Shift+D). 39 | - Select `Launch Client` from the drop down (if it is not already). 40 | - Press ▷ to run the launch config (F5). 41 | - In the [Extension Development Host](https://code.visualstudio.com/api/get-started/your-first-extension#:~:text=Then%2C%20inside%20the%20editor%2C%20press%20F5.%20This%20will%20compile%20and%20run%20the%20extension%20in%20a%20new%20Extension%20Development%20Host%20window.) instance of VSCode, open a PDF document in 'plain text' language mode. 42 | 43 | ## Packaging as VSIX 44 | ```bash 45 | npm install -g @vscode/vsce 46 | vsce package 47 | ``` 48 | 49 | See also https://code.visualstudio.com/api/working-with-extensions/publishing-extension. 50 | 51 | # Development Notes for PDF/FDF 52 | 53 | - cannot use "FirstLine" in `package.json` because otherwise new PDFs cannot use the "PDF-" snippets as file type is not recognized and snippets don't work inside comments. Same goes for FDF. 54 | 55 | - bytes are always interpreted as UTF-8 by VSCode!! This is a big problem for bytes > 0x7F that form invalid UTF-8 sequences since these get completely eaten up by VSCode and replaced with an alternative UTF-8 symbol using different bytes! 56 | - For valid UTF-8 byte sequences, can use a `TextEncoder()` to convert the UTF-8 codepoints (as returned by `Strings.slice()`, etc) back into their original bytes. See the binary marker comment validation code in `server/src/server.ts`. See also the discussion in `README.md` 57 | 58 | # Notes on JavaScript/TypeScript regex 59 | 60 | Working in VSCode is heavily JS centric, however JS regex special characters are NOT the same as PDF and thus **extreme caution** must be used: 61 | 62 | - JS `\b` (word boundary special char assertion) is **_NOT_** how anything in PDF is defined! According to [this Mozilla JS link](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Word_boundary_assertion) `\b` is anything that is not in `[a-zA-Z0-9_]`. And we definitely do NOT want to enable Unicode because of how VSCode treats binary data in PDFs. 63 | - Note that it is OK to use `\b` between `xref` and `trailer` keywords in conventional cross-reference tables, such as when parsing the cross-reference table entries or sub-section marker lines, because this section of PDFs has **_very_** strict rules (`[0-9fn \r\n]`) - but nowhere else is suitable! 64 | - JS `\s` (whitespace special char assertion) is also not the same as PDF whitespace, as it includes far more whitespace than PDF allows - see [this Mozilla JS link](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Assertions). 65 | - PDF needs just the 6 characters in Table 1: `[\0\t\n\f\n ]` but if demarcating a "next token" then the set needs to be extended by the 8 PDF delimiters `[\(\)<>\[\]\/%]` (and if it is in a Type 4 PostScript function the `[\{\}]` also needs to be added). 66 | 67 | - `xref` is a substring in `startxref` - this can be addressed be avoided the letter `t` before `xref` as in `(?:[^t])xref`, or looking for PDF whitespace before `xref` as in `(?:\0|\t|\n|\f|\n| )xref`. Depending on use-case, non-capturing of the preceding character may also be desirable as shown here with `(?:`...`)` 68 | 69 | - due to PDFs with binary data, various "end" symbols can be missed since VSCode sees some of the bytes as part of a UTF-8 multi-byte sequence and thus misses the end symbol. For this reason, some dictionary and object ending sequences also include `endobj` (and possibly other keywords or symbols) in an attempt to isolate the confusion within a single PDF object. Non-capturing of these backups may also be helpful... 70 | 71 | # Node modules 72 | 73 | Minimal Node module dependencies are kept below `client` and `server` folders. The `.\package.json` shouldn't have any runtime dependencies: 74 | 75 | ``` 76 | cd client 77 | npm install 78 | npm outdated 79 | cd ..\server 80 | npm install 81 | npm outdated 82 | cd .. 83 | npm install 84 | npm outdated 85 | npm outdated -g 86 | ``` 87 | 88 | # Packaging 89 | 90 | For some reason `vsce package` includes DevDependencies for both `.\client` and `.\server` folders. Work around is to manually prune the dev-only dependences in client and server, then package, and then reinstate: 91 | 92 | ``` 93 | cd client 94 | npm prune --omit dev 95 | cd ..\server 96 | npm prune --omit dev 97 | cd .. 98 | vsce package 99 | cd client 100 | npm install 101 | cd ..\server 102 | npm install 103 | cd .. 104 | ``` 105 | 106 | ## Supporting Sharp across multiple platforms 107 | 108 | Sharp is used to read image files such as JPEG, PNG, PPM, etc. and depends it on `libvips` which is a platform-dependent DLLs, dylibs, etc. To ensure the packaged VSIX is cross-platform, all supported platforms need to be install locally: 109 | 110 | ``` 111 | cd client 112 | npm rebuild --platform=win32 --arch=x64 sharp 113 | npm rebuild --platform=darwin --arch=arm64 sharp 114 | npm rebuild --platform=darwin --arch=x64 sharp 115 | npm rebuild --platform=linux --arch=x64 sharp 116 | ``` 117 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Testing.md: -------------------------------------------------------------------------------- 1 | # Testing Strategy / Plan 2 | 3 | # Corpora 4 | 5 | - https://github.com/pdf-association/safedocs 6 | - https://github.com/pdf-association/pdf20examples/ 7 | - https://github.com/pdf-association/interactive-pdf - "Sample lessons" folder 8 | - https://github.com/pdfix/Functional-PDF-Exemplars-ISO-32000-2 9 | 10 | # Display / Navigation 11 | - check things work for `.pdf` and `.fdf` files 12 | 13 | ## Syntax highlighting 14 | 15 | Text-based PDF-centric grammars to test: COS, Content Streams, XMP (XML), PS Type 4 Functions, CMaps, JavaScript, other embedded XML (e.g. ZUGFeRD, Order-X, Fractur-X) 16 | 17 | - check with test-based PDFs 18 | - check with binary PDFs - does the syntax highlighting eventually re-sync after binary data?? 19 | 20 | NOTE: there is some confusion over empty dictionaries and hex-strings. 21 | NOTE: there is some confusion over multi-line literal strings. 22 | 23 | ## Folding 24 | - check folding of `X Y obj` to matching `endobj` - what if no matching? 25 | - check folding of `stream` to matching `endstream` - what if no matching? 26 | - check folding of dictionaries with `<<` and `>>` on lines by themselves 27 | - check folding of dictionaries (incl. deeply nested) with `/Keyname <<` and `>>` on a line by itself 28 | 29 | NOTE: there is some confusion over empty dictionaries and hex-strings. 30 | 31 | ## Hover hints 32 | - check hovers of `xref` table in-use (`n`) entries 33 | - check hovers of `xref` table free (`f`) entries 34 | - check hovers of hex-strings `<...>` - check degenerate `<>` 35 | - check hovers of literal strings `(...)` - especially with escape sequences, octal codes, multi-line, etc. Check degenerate `()` 36 | - check hovers of name objects with `#`-hex pairs `/...#xx...` 37 | 38 | 39 | ## Bracket Matching 40 | - check bracket matching for dictionaries and hex-strings `<`/`>` 41 | - check bracket matching for literal strings `(`/`)` 42 | - check bracket matching for arrays `[`/`]` 43 | - check bracket matching for PS Type 4 functions `{`/`}` 44 | 45 | ## "Go to" functionality 46 | 47 | Use a PDF with incremental updates where multiple objects exist with the same object ID. 48 | 49 | - right-click `X Y obj` 50 | - right-click `X Y R` 51 | - right-click cross reference table in-use `n` entry 52 | 53 | ## Outline / Breadcrumbs 54 | - use a PDF with multiple incremental updates 55 | - change/edit/add/delete various keywords and comments 56 | 57 | ## Sankey Flow Diagram 58 | - 59 | 60 | # Editing 61 | 62 | ## Snippets 63 | - start with a blank editor (with extension `.pdf`) 64 | - type "PDF-" on a blank line --> new PDF file 65 | - type snippets on a comment line `%` --> nothing! 66 | 67 | # Block comment / uncomment 68 | - pick some arbitrary lines of a file. Toggle '%' comment on and off 69 | 70 | ## Auto-complete and auto-closing 71 | - 72 | 73 | ## Auto-indent and Auto-outdent on ENTER 74 | - 75 | 76 | ## File validation 77 | 78 | - check `%PDF-` header with good and bad versions 79 | - change/edit/add/delete a `xref` keyword in a PDF and in an incremental update 80 | - change/edit/add/delete a `trailer` keyword in a PDF and in an incremental update 81 | - change/edit/add/delete a `startxref` keyword in a PDF and in an incremental update 82 | - change/edit/add/delete a `%%EOF` keyword in a PDF and in an incremental update 83 | - change/edit/add/delete a cross reference in-use `n` entry - add whitespace or junk at end of line 84 | - change/edit/add/delete a cross reference in-use `f` entry - add whitespace or junk at end of line 85 | - change/edit/add/delete a cross reference subsection marker line - add junk at end of line 86 | 87 | ## Custom Commands 88 | - 89 | -------------------------------------------------------------------------------- /assets/VSCode-BadString.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-BadString.png -------------------------------------------------------------------------------- /assets/VSCode-BinaryError.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-BinaryError.png -------------------------------------------------------------------------------- /assets/VSCode-InvisibleUnicode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-InvisibleUnicode.png -------------------------------------------------------------------------------- /assets/VSCode-MixedEOLs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-MixedEOLs.png -------------------------------------------------------------------------------- /assets/VSCode-UnicodeBOM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-UnicodeBOM.png -------------------------------------------------------------------------------- /assets/VSCode-bitmask-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-bitmask-hover.png -------------------------------------------------------------------------------- /assets/VSCode-breadcrumbs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-breadcrumbs.png -------------------------------------------------------------------------------- /assets/VSCode-command-palette.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-command-palette.png -------------------------------------------------------------------------------- /assets/VSCode-endobj-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-endobj-hover.png -------------------------------------------------------------------------------- /assets/VSCode-hexstring-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-hexstring-hover.png -------------------------------------------------------------------------------- /assets/VSCode-outline-view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-outline-view.png -------------------------------------------------------------------------------- /assets/VSCode-problem-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-problem-report.png -------------------------------------------------------------------------------- /assets/VSCode-xrefHover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/VSCode-xrefHover.png -------------------------------------------------------------------------------- /assets/fdf-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/fdf-dark.png -------------------------------------------------------------------------------- /assets/fdf-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/fdf-light.png -------------------------------------------------------------------------------- /assets/pdf-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/pdf-dark.png -------------------------------------------------------------------------------- /assets/pdf-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/assets/pdf-light.png -------------------------------------------------------------------------------- /client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pdf-cos-syntax-client", 3 | "description": "VSCode client of the PDF COS syntax language server", 4 | "author": "PDF Association", 5 | "license": "Apache-2.0", 6 | "version": "0.1.6", 7 | "publisher": "pdfassociation", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/pdf-association/pdf-cos-syntax" 11 | }, 12 | "engines": { 13 | "vscode": "^1.84.2" 14 | }, 15 | "dependencies": { 16 | "@vscode/test-electron": "^2.3.8", 17 | "ascii85": "^1.0.2", 18 | "deasync": "^0.1.29", 19 | "sharp": "^0.33.5", 20 | "vscode-languageclient": "^9.0.1" 21 | }, 22 | "devDependencies": { 23 | "@types/mocha": "^10.0.6", 24 | "@types/node": "^22.13.9", 25 | "@types/vscode": "^1.84.2", 26 | "@typescript-eslint/eslint-plugin": "^6.12.0", 27 | "@typescript-eslint/parser": "^6.12.0", 28 | "eslint": "^8.54.0", 29 | "typescript": "^5.3.2" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /client/src/PDFFoldingRangeProvider.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief VSCode "pdf-cos-syntax" extension client-side folding support 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 'use strict'; 17 | 18 | import { FoldingRangeProvider, TextDocument, FoldingRange, ProviderResult } from 'vscode'; 19 | 20 | export class PDFFoldingRangeProvider implements FoldingRangeProvider { 21 | provideFoldingRanges(document: TextDocument): ProviderResult { 22 | const ranges: FoldingRange[] = []; 23 | 24 | let startObjLine = -1; 25 | let startStreamLine = -1; 26 | let startxrefLine = -1; 27 | let startDictLines = []; 28 | let startQLines = []; 29 | let startBTLines = []; 30 | let startBXLines = []; 31 | let startMarkedContentLines = []; 32 | 33 | for (let i = 0; i < document.lineCount; i++) { 34 | const line = document.lineAt(i).text.trim(); 35 | 36 | /** 37 | * @todo Remove any comments to leave just content. Handle strings 38 | * Especially important if a comment or string contains any of the markers! 39 | */ 40 | 41 | // cannot be nested 42 | if (line.match('\\d+ \\d+ obj')) { 43 | startObjLine = i; 44 | startDictLines = []; // reset dictionaries 45 | continue; 46 | } else if (line.startsWith('endobj') && startObjLine >= 0) { 47 | const r = new FoldingRange(startObjLine, i); 48 | ranges.push(r); 49 | startObjLine = -1; 50 | continue; 51 | } 52 | 53 | // cannot be nested 54 | if (line.startsWith('xref')) { 55 | startxrefLine = i; 56 | startDictLines = []; // reset dictionaries 57 | continue; 58 | } else if (line.startsWith('trailer') && startxrefLine >= 0) { 59 | const r = new FoldingRange(startxrefLine, i); 60 | ranges.push(r); 61 | startxrefLine = -1; 62 | continue; 63 | } 64 | 65 | // cannot be nested 66 | if (line.startsWith('stream')) { 67 | startStreamLine = i; 68 | startDictLines = []; // reset dictionaries 69 | startQLines = []; // reset q/Q operand pairs 70 | startBTLines = []; // reset BT/ET operand pairs 71 | startBXLines = []; // reset BX/EX operand pairs 72 | startMarkedContentLines = []; // reset BDC/BMC/EMC operand pairs 73 | continue; 74 | } else if (line.startsWith('endstream') && (startStreamLine >= 0)) { 75 | const r = new FoldingRange(startStreamLine, i); 76 | ranges.push(r); 77 | startStreamLine = -1; 78 | startDictLines = []; // reset dictionaries 79 | startQLines = []; // reset q/Q operand pairs 80 | startBTLines = []; // reset BT/ET operand pairs 81 | startBXLines = []; // reset BX/EX operand pairs 82 | startMarkedContentLines = []; // reset BDC/BMC/EMC operand pairs 83 | continue; 84 | } 85 | 86 | // Nestable. Avoid when an entire dict (both "<<" and ">>") are on 1 line 87 | if (line.startsWith('<<') && !line.includes('>>')) { 88 | startDictLines.push(i); 89 | continue; 90 | } 91 | else if (line.startsWith('/') && line.includes('<<') && !line.includes('>>')) { 92 | startDictLines.push(i); 93 | continue; 94 | } else if (line.startsWith('>>') && (startDictLines.length > 0)) { 95 | const r = new FoldingRange(startDictLines.pop(), i); 96 | ranges.push(r); 97 | continue; 98 | } 99 | 100 | // No operands or other operators. Nestable. 101 | if (line.startsWith('q')) { 102 | startQLines.push(i); 103 | continue; 104 | } else if (line.startsWith('Q') && (startQLines.length > 0)) { 105 | const r = new FoldingRange(startQLines.pop(), i); 106 | ranges.push(r); 107 | continue; 108 | } 109 | 110 | // No operands or other operators. Nestable. 111 | if (line.startsWith('BT')) { 112 | startBTLines.push(i); 113 | continue; 114 | } else if (line.startsWith('ET') && startBTLines.length > 0) { 115 | const r = new FoldingRange(startBTLines.pop(), i); 116 | ranges.push(r); 117 | continue; 118 | } 119 | 120 | // No operands or other operators. Nestable. 121 | if (line.startsWith('BX')) { 122 | startBXLines.push(i); 123 | continue; 124 | } else if (line.startsWith('EX') && startBXLines.length > 0) { 125 | const r = new FoldingRange(startBXLines.pop(), i); 126 | ranges.push(r); 127 | continue; 128 | } 129 | 130 | // Nestable. Supports operands on same line as operator: 131 | // /tag properties BDC 132 | // /tag BMC 133 | if ((line.includes('BDC')) || (line.includes('BMC'))) { 134 | startMarkedContentLines.push(i); 135 | continue; 136 | } else if (line.startsWith('EMC') && (startMarkedContentLines.length > 0)) { 137 | const r = new FoldingRange(startMarkedContentLines.pop(), i); 138 | ranges.push(r); 139 | continue; 140 | } 141 | } 142 | return ranges; 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /client/src/sankey-webview.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief VSCode PDF COS syntax client-side Sankey webview functionality 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @author Peter Wyatt, PDF Association 9 | * 10 | * @remark 11 | * This material is based upon work supported by the Defense Advanced 12 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 13 | * Any opinions, findings and conclusions or recommendations expressed 14 | * in this material are those of the author(s) and do not necessarily 15 | * reflect the views of the Defense Advanced Research Projects Agency 16 | * (DARPA). Approved for public release. 17 | */ 18 | 'use strict'; 19 | 20 | import * as vscode from "vscode"; 21 | 22 | export function getWebviewOptions(extensionUri: vscode.Uri): vscode.WebviewOptions { 23 | // console.log(`getWebviewOptions = ${vscode.Uri.joinPath(extensionUri, 'media')}`); 24 | return { 25 | // Enable JavaScript and forms in the webview 26 | enableScripts: true, 27 | enableForms: true, 28 | // And restrict the webview to only loading content from our extension's `media` directory. 29 | localResourceRoots: [vscode.Uri.joinPath(extensionUri, 'media')] 30 | }; 31 | } 32 | 33 | /** 34 | * Manages Sankey Flow Diagram webview panels 35 | */ 36 | export class SankeyPanel 37 | { 38 | /** Track the current panel. Only allow a single panel to exist at a time. */ 39 | public static currentPanel: SankeyPanel | undefined; 40 | public static readonly viewType = 'pdf'; 41 | private readonly _panel: vscode.WebviewPanel; 42 | private readonly _extensionUri: vscode.Uri; 43 | private _disposables: vscode.Disposable[] = []; 44 | 45 | public static createOrShow(context: vscode.ExtensionContext, csvData: string) { 46 | // console.log(`createOrShow ${context.extensionUri}`); 47 | 48 | const column = vscode.window.activeTextEditor 49 | ? vscode.window.activeTextEditor.viewColumn 50 | : undefined; 51 | 52 | // If we already have a panel, show it. 53 | if (SankeyPanel.currentPanel) { 54 | SankeyPanel.currentPanel._panel.reveal(column); 55 | return; 56 | } 57 | 58 | // Otherwise, create a new panel. 59 | const panel = vscode.window.createWebviewPanel( 60 | SankeyPanel.viewType, 61 | 'Sankey Flow Diagram', 62 | column || vscode.ViewColumn.One, 63 | getWebviewOptions(context.extensionUri), 64 | ); 65 | 66 | panel.webview.onDidReceiveMessage( 67 | message => { 68 | switch (message.type) { 69 | default: 70 | console.log(`onDidReceiveMessage: ${message.type} ${message.value}`); break; 71 | } 72 | }, 73 | undefined, 74 | context.subscriptions 75 | ); 76 | 77 | SankeyPanel.currentPanel = new SankeyPanel(panel, context); 78 | panel.webview.postMessage({ type: {type: 'CSV-Data', value: csvData } }); 79 | } 80 | 81 | public static revive(panel: vscode.WebviewPanel, context: vscode.ExtensionContext) { 82 | // console.log(`revive`); 83 | SankeyPanel.currentPanel = new SankeyPanel(panel, context); 84 | } 85 | 86 | private constructor(panel: vscode.WebviewPanel, context: vscode.ExtensionContext) { 87 | // console.log(`constructor`); 88 | this._panel = panel; 89 | this._extensionUri = context.extensionUri; 90 | 91 | // Listen for when the panel is disposed 92 | // This happens when the user closes the panel or when the panel is closed programmatically 93 | this._panel.onDidDispose(() => this.dispose(), null, this._disposables); 94 | 95 | // Update the content based on view changes 96 | this._panel.onDidChangeViewState( 97 | e => { 98 | if (this._panel.visible) 99 | this._getHtmlForWebview(this._panel.webview); 100 | }, 101 | null, 102 | this._disposables 103 | ); 104 | 105 | // Handle messages from the webview 106 | this._panel.webview.onDidReceiveMessage( 107 | message => { 108 | switch (message.type) { 109 | case 'alert': 110 | // console.log(`onDidReceiveMessage: ${message.type} ${message.value}`); 111 | return; 112 | } 113 | }, 114 | null, 115 | this._disposables 116 | ); 117 | 118 | this._panel.webview.html = this._getHtmlForWebview(this._panel.webview); 119 | } 120 | 121 | public sendDataToWebview() { 122 | // console.log(`sendDataToWebview`); 123 | // Send a message to the webview 124 | // You can send any JSON serializable data. 125 | this._panel.webview.postMessage({ type: 'refactor', value: 'Do it now!' }); 126 | } 127 | 128 | public dispose() { 129 | // console.log(`dispose`); 130 | SankeyPanel.currentPanel = undefined; 131 | 132 | // Clean up our resources 133 | this._panel.dispose(); 134 | 135 | while (this._disposables.length) { 136 | const x = this._disposables.pop(); 137 | if (x) x.dispose(); 138 | } 139 | } 140 | 141 | private _getHtmlForWebview(webview: vscode.Webview): string { 142 | // console.log(`_getHtmlForWebview`); 143 | 144 | // Local path to main script run in the webview 145 | const scriptPathOnDisk = vscode.Uri.joinPath(this._extensionUri, 'media', 'main.js'); 146 | 147 | // And the uri we use to load this script in the webview 148 | const scriptUri = webview.asWebviewUri(scriptPathOnDisk); 149 | 150 | // Uri to load styles into webview, as webviews cannot load 'file:' resources 151 | const cssMainUri = webview.asWebviewUri(vscode.Uri.joinPath(this._extensionUri, 'media', 'vscode.css')); 152 | const mainJSUri = webview.asWebviewUri(vscode.Uri.joinPath(this._extensionUri, 'media', 'main.js')); 153 | 154 | // wget -O d3.js -q https://cdn.jsdelivr.net/npm/d3@7 155 | const d3Uri = webview.asWebviewUri(vscode.Uri.joinPath(this._extensionUri, 'media', 'd3.js')); 156 | 157 | // wget -O d3-sankey.js -q https://cdn.jsdelivr.net/npm/d3-sankey@0.12 158 | const d3SankeyUri = webview.asWebviewUri(vscode.Uri.joinPath(this._extensionUri, 'media', 'd3-sankey.js')); 159 | 160 | // Use a nonce to only allow specific scripts to be run 161 | const nonce = _getNonce(); 162 | 163 | return ` 164 | 165 | 166 | 167 | 171 | 174 | 175 | 176 | 177 | Sankey Flow Diagram 178 | 179 | 180 | 181 | 182 | 201 | 205 | 209 | 213 | 217 | 218 |
183 | Alignment: 184 |
185 | 186 | 187 |
188 |
189 | 190 | 191 |
192 |
193 | 194 | 195 |
196 |
197 | 198 | 199 |
200 |
219 | 220 | 221 | 222 | 223 | 224 | `; 225 | } 226 | } 227 | 228 | 229 | 230 | /** Generate a random nonce for extension webview */ 231 | function _getNonce(): string { 232 | let text = ''; 233 | const possible = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; 234 | for (let i = 0; i < 32; i++) { 235 | text += possible.charAt(Math.floor(Math.random() * possible.length)); 236 | } 237 | return text; 238 | } 239 | -------------------------------------------------------------------------------- /client/src/test/completion.test.ts: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | * ------------------------------------------------------------------------------------------ */ 5 | 6 | import * as vscode from 'vscode'; 7 | import * as assert from 'assert'; 8 | import { getDocUri, activate } from './helper'; 9 | 10 | suite('Should do completion', () => { 11 | const docUri = getDocUri('completion.txt'); 12 | 13 | test('Completes JS/TS in txt file', async () => { 14 | await testCompletion(docUri, new vscode.Position(0, 0), { 15 | items: [ 16 | { label: 'JavaScript', kind: vscode.CompletionItemKind.Text }, 17 | { label: 'TypeScript', kind: vscode.CompletionItemKind.Text } 18 | ] 19 | }); 20 | }); 21 | }); 22 | 23 | async function testCompletion( 24 | docUri: vscode.Uri, 25 | position: vscode.Position, 26 | expectedCompletionList: vscode.CompletionList 27 | ) { 28 | await activate(docUri); 29 | 30 | // Executing the command `vscode.executeCompletionItemProvider` to simulate triggering completion 31 | const actualCompletionList = (await vscode.commands.executeCommand( 32 | 'vscode.executeCompletionItemProvider', 33 | docUri, 34 | position 35 | )) as vscode.CompletionList; 36 | 37 | assert.ok(actualCompletionList.items.length >= 2); 38 | expectedCompletionList.items.forEach((expectedItem, i) => { 39 | const actualItem = actualCompletionList.items[i]; 40 | assert.equal(actualItem.label, expectedItem.label); 41 | assert.equal(actualItem.kind, expectedItem.kind); 42 | }); 43 | } 44 | -------------------------------------------------------------------------------- /client/src/test/diagnostics.test.ts: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | * ------------------------------------------------------------------------------------------ */ 5 | 6 | import * as vscode from 'vscode'; 7 | import * as assert from 'assert'; 8 | import { getDocUri, activate } from './helper'; 9 | 10 | suite('Should get diagnostics', () => { 11 | const docUri = getDocUri('diagnostics.txt'); 12 | 13 | test('Diagnoses uppercase texts', async () => { 14 | await testDiagnostics(docUri, [ 15 | { message: 'ANY is all uppercase.', range: toRange(0, 0, 0, 3), severity: vscode.DiagnosticSeverity.Warning, source: 'ex' }, 16 | { message: 'ANY is all uppercase.', range: toRange(0, 14, 0, 17), severity: vscode.DiagnosticSeverity.Warning, source: 'ex' }, 17 | { message: 'OS is all uppercase.', range: toRange(0, 18, 0, 20), severity: vscode.DiagnosticSeverity.Warning, source: 'ex' } 18 | ]); 19 | }); 20 | }); 21 | 22 | function toRange(sLine: number, sChar: number, eLine: number, eChar: number) { 23 | const start = new vscode.Position(sLine, sChar); 24 | const end = new vscode.Position(eLine, eChar); 25 | return new vscode.Range(start, end); 26 | } 27 | 28 | async function testDiagnostics(docUri: vscode.Uri, expectedDiagnostics: vscode.Diagnostic[]) { 29 | await activate(docUri); 30 | 31 | const actualDiagnostics = vscode.languages.getDiagnostics(docUri); 32 | 33 | assert.equal(actualDiagnostics.length, expectedDiagnostics.length); 34 | 35 | expectedDiagnostics.forEach((expectedDiagnostic, i) => { 36 | const actualDiagnostic = actualDiagnostics[i]; 37 | assert.equal(actualDiagnostic.message, expectedDiagnostic.message); 38 | assert.deepEqual(actualDiagnostic.range, expectedDiagnostic.range); 39 | assert.equal(actualDiagnostic.severity, expectedDiagnostic.severity); 40 | }); 41 | } -------------------------------------------------------------------------------- /client/src/test/helper.ts: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | * ------------------------------------------------------------------------------------------ */ 5 | 6 | import * as vscode from 'vscode'; 7 | import * as path from 'path'; 8 | 9 | export let doc: vscode.TextDocument; 10 | export let editor: vscode.TextEditor; 11 | export let documentEol: string; 12 | export let platformEol: string; 13 | 14 | /** 15 | * Activates the vscode.PDF COS Syntax extension 16 | */ 17 | export async function activate(docUri: vscode.Uri) { 18 | // The extensionId is `publisher.name` from package.json 19 | const ext = vscode.extensions.getExtension('pdfassociation.pdf-cos-syntax')!; 20 | await ext.activate(); 21 | try { 22 | doc = await vscode.workspace.openTextDocument(docUri); 23 | editor = await vscode.window.showTextDocument(doc); 24 | await sleep(2000); // Wait for server activation 25 | } catch (e) { 26 | console.error(e); 27 | } 28 | } 29 | 30 | async function sleep(ms: number) { 31 | return new Promise(resolve => setTimeout(resolve, ms)); 32 | } 33 | 34 | export const getDocPath = (p: string) => { 35 | return path.resolve(__dirname, '../../testFixture', p); 36 | }; 37 | export const getDocUri = (p: string) => { 38 | return vscode.Uri.file(getDocPath(p)); 39 | }; 40 | 41 | export async function setTestContent(content: string): Promise { 42 | const all = new vscode.Range( 43 | doc.positionAt(0), 44 | doc.positionAt(doc.getText().length) 45 | ); 46 | return editor.edit(eb => eb.replace(all, content)); 47 | } 48 | -------------------------------------------------------------------------------- /client/src/test/index.ts: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | * ------------------------------------------------------------------------------------------ */ 5 | import * as path from 'path'; 6 | import * as Mocha from 'mocha'; 7 | import * as glob from 'glob'; 8 | 9 | export function run(): Promise { 10 | // Create the mocha test 11 | const mocha = new Mocha({ 12 | ui: 'tdd', 13 | color: true 14 | }); 15 | mocha.timeout(100000); 16 | 17 | const testsRoot = __dirname; 18 | 19 | return new Promise((resolve, reject) => { 20 | glob('**.test.js', { cwd: testsRoot }, (err, files) => { 21 | if (err) { 22 | return reject(err); 23 | } 24 | 25 | // Add files to the test suite 26 | files.forEach(f => mocha.addFile(path.resolve(testsRoot, f))); 27 | 28 | try { 29 | // Run the mocha test 30 | mocha.run(failures => { 31 | if (failures > 0) { 32 | reject(new Error(`${failures} tests failed.`)); 33 | } else { 34 | resolve(); 35 | } 36 | }); 37 | } catch (err) { 38 | console.error(err); 39 | reject(err); 40 | } 41 | }); 42 | }); 43 | } -------------------------------------------------------------------------------- /client/src/test/runTest.ts: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | *--------------------------------------------------------------------------------------------*/ 5 | import * as path from 'path'; 6 | 7 | import { runTests } from '@vscode/test-electron'; 8 | 9 | async function main() { 10 | try { 11 | // The folder containing the Extension Manifest package.json 12 | // Passed to `--extensionDevelopmentPath` 13 | const extensionDevelopmentPath = path.resolve(__dirname, '../../../'); 14 | 15 | // The path to test runner 16 | // Passed to --extensionTestsPath 17 | const extensionTestsPath = path.resolve(__dirname, './index'); 18 | 19 | // Download VS Code, unzip it and run the integration test 20 | await runTests({ extensionDevelopmentPath, extensionTestsPath }); 21 | } catch (err) { 22 | console.error('Failed to run tests'); 23 | process.exit(1); 24 | } 25 | } 26 | 27 | main(); 28 | -------------------------------------------------------------------------------- /client/src/types/constants.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief VSCode semantic tokens and modifiers. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 'use strict'; 17 | 18 | export const TOKEN_TYPES = [ 19 | 'header', 20 | 'endobj', 21 | // 'pdf_token', 22 | 'indirect_object_start', 23 | 'stream', 24 | 'endstream', 25 | 'dict_start', 26 | 'dict_end', 27 | 'array_start', 28 | 'array_end', 29 | 'name', 30 | // 'valid_name_char', 31 | // 'name_hex_escape', 32 | 33 | 'string_literal', 34 | 35 | // 'string_literal_char', 36 | // 'string_literal_escape', 37 | // 'octal', 38 | // 'octal_digit', 39 | // 'escaped_eol', 40 | 41 | 'hex_string', 42 | 'indirect_ref', 43 | 'integer', 44 | 'real', 45 | 'bool', 46 | 'null', 47 | 'xref', 48 | 'xref_10entry', 49 | 'xref_5entry', 50 | 'xref_entry', 51 | 'trailer', 52 | 'eof', 53 | 'startxref', 54 | 'comment', 55 | 56 | // 'eol', 57 | // 'delimiter', 58 | // 'start_delimiter', 59 | // 'end_delimiter', 60 | // 'ws_incl_eol', 61 | // 'ws_no_eol', 62 | ]; 63 | 64 | export const TOKEN_MODIFIERS = [ 65 | 'isDictKey', // only applies to 'name' objects 66 | 'isArrayElement' 67 | ]; 68 | -------------------------------------------------------------------------------- /client/src/types/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Semantic tokenizing parser interface. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 17 | export * from './tokenTypes'; 18 | export * from './constants'; 19 | -------------------------------------------------------------------------------- /client/src/types/tokenTypes.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Token from Ohm-based tokenizing parser. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 'use strict'; 17 | 18 | export interface PDFToken { 19 | line: number, 20 | start: number; 21 | end: number; 22 | type: string; 23 | [key: string]: any; 24 | } 25 | -------------------------------------------------------------------------------- /client/testFixture/completion.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/client/testFixture/completion.txt -------------------------------------------------------------------------------- /client/testFixture/diagnostics.txt: -------------------------------------------------------------------------------- 1 | ANY browsers, ANY OS. -------------------------------------------------------------------------------- /client/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es2020", 5 | "lib": ["es2020"], 6 | "outDir": "out", 7 | "rootDir": "src", 8 | "sourceMap": true 9 | }, 10 | "include": ["src"], 11 | "exclude": ["node_modules", ".vscode-test"] 12 | } 13 | -------------------------------------------------------------------------------- /client/tsconfig.tsbuildinfo: -------------------------------------------------------------------------------- 1 | {"root":["./src/pdffoldingrangeprovider.ts","./src/extension.ts","./src/pdfclientutilities.ts","./src/sankey-webview.ts","./src/test/completion.test.ts","./src/test/diagnostics.test.ts","./src/test/helper.ts","./src/test/index.ts","./src/test/runtest.ts","./src/types/constants.ts","./src/types/index.ts","./src/types/tokentypes.ts"],"version":"5.8.2"} -------------------------------------------------------------------------------- /language-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "comments": { 3 | "lineComment": "%" 4 | }, 5 | "brackets": [ 6 | ["<<", ">>"], 7 | ["<", ">"], 8 | ["[", "]"], 9 | ["(", ")"], 10 | ["{", "}"] 11 | ], 12 | "autoClosingPairs": [ 13 | { "open": "<<", "close": ">>", "notIn": ["string", "comment"] }, 14 | { "open": "<", "close": ">", "notIn": ["string", "comment"] }, 15 | { "open": "[", "close": "]", "notIn": ["string", "comment"] }, 16 | { "open": "{", "close": "}", "notIn": ["string", "comment"] }, 17 | { "open": "(", "close": ")", "notIn": ["comment"] } 18 | ], 19 | "surroundingPairs": [ 20 | ["<<", ">>"], 21 | ["<", ">"], 22 | ["[", "]"], 23 | ["(", ")"], 24 | ["{", "}"] 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /media/d3-sankey.js: -------------------------------------------------------------------------------- 1 | // https://github.com/d3/d3-sankey v0.12.3 Copyright 2019 Mike Bostock 2 | !function(n,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports,require("d3-array"),require("d3-shape")):"function"==typeof define&&define.amd?define(["exports","d3-array","d3-shape"],t):t((n=n||self).d3=n.d3||{},n.d3,n.d3)}(this,function(n,t,e){"use strict";function o(n){return n.target.depth}function r(n,t){return n.sourceLinks.length?n.depth:t-1}function i(n){return function(){return n}}function s(n,t){return u(n.source,t.source)||n.index-t.index}function f(n,t){return u(n.target,t.target)||n.index-t.index}function u(n,t){return n.y0-t.y0}function c(n){return n.value}function l(n){return n.index}function a(n){return n.nodes}function d(n){return n.links}function h(n,t){const e=n.get(t);if(!e)throw new Error("missing: "+t);return e}function g({nodes:n}){for(const t of n){let n=t.y0,e=n;for(const e of t.sourceLinks)e.y0=n+e.width/2,n+=e.width;for(const n of t.targetLinks)n.y1=e+n.width/2,e+=n.width}}function y(n){return[n.source.x1,n.y0]}function k(n){return[n.target.x0,n.y1]}n.sankey=function(){let n,e,o,y=0,k=0,L=1,p=1,w=24,x=8,m=l,v=r,M=a,b=d,S=6;function z(){const r={nodes:M.apply(null,arguments),links:b.apply(null,arguments)};return function({nodes:n,links:t}){for(const[t,e]of n.entries())e.index=t,e.sourceLinks=[],e.targetLinks=[];const e=new Map(n.map((t,e)=>[m(t,e,n),t]));for(const[n,o]of t.entries()){o.index=n;let{source:t,target:r}=o;"object"!=typeof t&&(t=o.source=h(e,t)),"object"!=typeof r&&(r=o.target=h(e,r)),t.sourceLinks.push(o),r.targetLinks.push(o)}if(null!=o)for(const{sourceLinks:t,targetLinks:e}of n)t.sort(o),e.sort(o)}(r),function({nodes:n}){for(const e of n)e.value=void 0===e.fixedValue?Math.max(t.sum(e.sourceLinks,c),t.sum(e.targetLinks,c)):e.fixedValue}(r),function({nodes:n}){const t=n.length;let e=new Set(n),o=new Set,r=0;for(;e.size;){for(const n of e){n.depth=r;for(const{target:t}of n.sourceLinks)o.add(t)}if(++r>t)throw new Error("circular link");e=o,o=new Set}}(r),function({nodes:n}){const t=n.length;let e=new Set(n),o=new Set,r=0;for(;e.size;){for(const n of e){n.height=r;for(const{source:t}of n.targetLinks)o.add(t)}if(++r>t)throw new Error("circular link");e=o,o=new Set}}(r),function(o){const r=function({nodes:n}){const o=t.max(n,n=>n.depth)+1,r=(L-y-w)/(o-1),i=new Array(o);for(const t of n){const n=Math.max(0,Math.min(o-1,Math.floor(v.call(null,t,o))));t.layer=n,t.x0=y+n*r,t.x1=t.x0+w,i[n]?i[n].push(t):i[n]=[t]}if(e)for(const n of i)n.sort(e);return i}(o);n=Math.min(x,(p-k)/(t.max(r,n=>n.length)-1)),function(e){const o=t.min(e,e=>(p-k-(e.length-1)*n)/t.sum(e,c));for(const t of e){let e=k;for(const r of t){r.y0=e,r.y1=e+r.value*o,e=r.y1+n;for(const n of r.sourceLinks)n.width=n.value*o}e=(p-e+n)/(t.length+1);for(let n=0;n0))continue;let r=(e/o-n.y0)*t;n.y0+=r,n.y1+=r,P(n)}void 0===e&&i.sort(u),q(i,o)}}function E(n,t,o){for(let r=n.length-2;r>=0;--r){const i=n[r];for(const n of i){let e=0,o=0;for(const{target:t,value:r}of n.sourceLinks){let i=r*(t.layer-n.layer);e+=C(n,t)*i,o+=i}if(!(o>0))continue;let r=(e/o-n.y0)*t;n.y0+=r,n.y1+=r,P(n)}void 0===e&&i.sort(u),q(i,o)}}function q(t,e){const o=t.length>>1,r=t[o];H(t,r.y0-n,o-1,e),A(t,r.y1+n,o+1,e),H(t,p,t.length-1,e),A(t,k,0,e)}function A(t,e,o,r){for(;o1e-6&&(i.y0+=s,i.y1+=s),e=i.y1+n}}function H(t,e,o,r){for(;o>=0;--o){const i=t[o],s=(i.y1-e)*r;s>1e-6&&(i.y0-=s,i.y1-=s),e=i.y0-n}}function P({sourceLinks:n,targetLinks:t}){if(void 0===o){for(const{source:{sourceLinks:n}}of t)n.sort(f);for(const{target:{targetLinks:t}}of n)t.sort(s)}}function V(n){if(void 0===o)for(const{sourceLinks:t,targetLinks:e}of n)t.sort(f),e.sort(s)}function _(t,e){let o=t.y0-(t.sourceLinks.length-1)*n/2;for(const{target:r,width:i}of t.sourceLinks){if(r===e)break;o+=i+n}for(const{source:n,width:r}of e.targetLinks){if(n===t)break;o-=r}return o}function C(t,e){let o=e.y0-(e.targetLinks.length-1)*n/2;for(const{source:r,width:i}of e.targetLinks){if(r===t)break;o+=i+n}for(const{target:n,width:r}of t.sourceLinks){if(n===e)break;o-=r}return o}return z.update=function(n){return g(n),n},z.nodeId=function(n){return arguments.length?(m="function"==typeof n?n:i(n),z):m},z.nodeAlign=function(n){return arguments.length?(v="function"==typeof n?n:i(n),z):v},z.nodeSort=function(n){return arguments.length?(e=n,z):e},z.nodeWidth=function(n){return arguments.length?(w=+n,z):w},z.nodePadding=function(t){return arguments.length?(x=n=+t,z):x},z.nodes=function(n){return arguments.length?(M="function"==typeof n?n:i(n),z):M},z.links=function(n){return arguments.length?(b="function"==typeof n?n:i(n),z):b},z.linkSort=function(n){return arguments.length?(o=n,z):o},z.size=function(n){return arguments.length?(y=k=0,L=+n[0],p=+n[1],z):[L-y,p-k]},z.extent=function(n){return arguments.length?(y=+n[0][0],L=+n[1][0],k=+n[0][1],p=+n[1][1],z):[[y,k],[L,p]]},z.iterations=function(n){return arguments.length?(S=+n,z):S},z},n.sankeyCenter=function(n){return n.targetLinks.length?n.depth:n.sourceLinks.length?t.min(n.sourceLinks,o)-1:0},n.sankeyJustify=r,n.sankeyLeft=function(n){return n.depth},n.sankeyLinkHorizontal=function(){return e.linkHorizontal().source(y).target(k)},n.sankeyRight=function(n,t){return t-1-n.height},Object.defineProperty(n,"__esModule",{value:!0})}); 3 | -------------------------------------------------------------------------------- /media/main.js: -------------------------------------------------------------------------------- 1 | // Settings 2 | let sankeyHeight = 600; 3 | const width = 720; 4 | let nodeWidth = 15; 5 | let padding = 10; 6 | let align; // cannot assign until D3 is fully loaded 7 | let defaultColor = "#dddddd"; 8 | let sankeyCsvText; // the actual CSV data - join('\n') 9 | 10 | window.onload = function () { 11 | let inputSelector; 12 | 13 | // D3 has loaded so initialize 14 | align = d3.sankeyLeft; 15 | 16 | inputSelector = document.getElementById("align-left"); 17 | inputSelector.addEventListener( 18 | "change", 19 | function () { 20 | align = d3.sankeyLeft; 21 | renderSankeyDiagram(); 22 | } 23 | ); 24 | inputSelector = document.getElementById("align-right"); 25 | inputSelector.addEventListener( 26 | "change", 27 | function () { 28 | align = d3.sankeyRight; 29 | renderSankeyDiagram(); 30 | } 31 | ); 32 | 33 | inputSelector = document.getElementById("align-center"); 34 | inputSelector.addEventListener( 35 | "change", 36 | function () { 37 | align = d3.sankeyCenter; 38 | renderSankeyDiagram(); 39 | } 40 | ); 41 | 42 | inputSelector = document.getElementById("align-justify"); 43 | inputSelector.addEventListener( 44 | "change", 45 | function () { 46 | align = d3.sankeyJustify; 47 | renderSankeyDiagram(); 48 | } 49 | ); 50 | 51 | inputSelector = document.getElementById("node-padding"); 52 | inputSelector.addEventListener( 53 | "input", 54 | function () { 55 | inputSelector = document.getElementById("node-padding"); 56 | padding = parseInt(inputSelector.value, 10); 57 | renderSankeyDiagram(); 58 | } 59 | ); 60 | 61 | inputSelector = document.getElementById("node-width"); 62 | inputSelector.addEventListener( 63 | "input", 64 | function () { 65 | inputSelector = document.getElementById("node-width"); 66 | nodeWidth = parseInt(inputSelector.value, 10); 67 | renderSankeyDiagram(); 68 | } 69 | ); 70 | 71 | inputSelector = document.getElementById("default-color"); 72 | inputSelector.addEventListener( 73 | "input", 74 | function () { 75 | inputSelector = document.getElementById("default-color"); 76 | defaultColor = inputSelector.value; 77 | renderSankeyDiagram(); 78 | } 79 | ); 80 | 81 | inputSelector = document.getElementById("height-selector"); 82 | inputSelector.addEventListener( 83 | "input", 84 | function () { 85 | inputSelector = document.getElementById("height-selector"); 86 | sankeyHeight = parseInt(inputSelector.value, 10); 87 | renderSankeyDiagram(); 88 | } 89 | ); 90 | 91 | // cannot call renderSankeyDiagram(); until CSV message arrives! 92 | }; 93 | 94 | let allLinks; 95 | let allNodes; 96 | 97 | function renderSankeyDiagram() { 98 | sankey = d3.sankey() 99 | .nodeId(d => d.name) 100 | .nodeAlign(align) 101 | .nodeSort(null) // keep order as per CSV data!! 102 | .nodeWidth(nodeWidth) 103 | .nodePadding(padding) 104 | .extent([[0, 5], [width, sankeyHeight - 5]]); 105 | 106 | const links_ = d3.csvParseRows( 107 | sankeyCsvText, 108 | ([source, target, value, linkColor = defaultColor]) => 109 | ( 110 | source && target 111 | ? { 112 | source, 113 | target, 114 | value: !value || isNaN(value = +value) ? 1 : value, 115 | color: linkColor 116 | } 117 | : null 118 | ) 119 | ); 120 | const nodeByName = new Map; 121 | for (const link of links_) { 122 | if (!nodeByName.has(link.source)) nodeByName.set(link.source, {name: link.source}); 123 | if (!nodeByName.has(link.target)) nodeByName.set(link.target, {name: link.target}); 124 | } 125 | const data = {nodes: Array.from(nodeByName.values()), links: links_}; 126 | 127 | d3.select("body").selectAll("svg").remove(); 128 | const svg = d3.select("body").append("svg") // append the SVG to the HMTL body 129 | .attr("width", width) 130 | .attr("height", sankeyHeight) 131 | .attr("preserveAspectRatio", "xMinYMin") 132 | .attr("viewBox", `0 0 ${width} ${sankeyHeight}`) 133 | .style("background", "#fff") 134 | .style("width", "100%") 135 | .style("height", "auto"); 136 | 137 | const {nodes, links} = sankey({ 138 | nodes: data.nodes.map(d => Object.assign({}, d)), 139 | links: data.links.map(d => Object.assign({}, d)) 140 | }); 141 | 142 | allNodes = svg.append("g") 143 | .selectAll("rect") 144 | .data(nodes) 145 | .join("rect") 146 | .attr("id", function (d, i) { 147 | d.id = i; 148 | return "node-" + d.id; 149 | }) 150 | .attr("x", d => d.x0 + 1) 151 | .attr("y", d => d.y0) 152 | .attr("height", d => d.y1 - d.y0) 153 | .attr("width", d => d.x1 - d.x0 - 2) 154 | .attr("fill", d => { 155 | let c; 156 | for (const link of d.sourceLinks) { 157 | if (c === undefined) 158 | c = link.color; 159 | else if (c !== link.color) 160 | c = null; 161 | } 162 | if (c === undefined) { 163 | for (const link of d.targetLinks) { 164 | if (c === undefined) 165 | c = link.color; 166 | else if (c !== link.color) 167 | c = null; 168 | } 169 | } 170 | return (d3.color(c) || d3.color(defaultColor)).darker(0.5); 171 | }) 172 | .on("click", highlightLinks) 173 | .style("cursor", "pointer"); 174 | 175 | allNodes.append("title") 176 | .text(d => `${d.name}\n${d.value.toLocaleString()}`); 177 | 178 | allLinks = svg.append("g") 179 | .attr("fill", "none") 180 | .style("pointer-events", "none") 181 | .selectAll("g") 182 | .data(links) 183 | .join("g") 184 | .attr("id", function (d, i) { 185 | d.id = i; 186 | return "link-" + d.id; 187 | }) 188 | .attr("stroke", d => d3.color(d.color) || defaultColor) 189 | .style("mix-blend-mode", "multiply"); 190 | 191 | allLinks.append("path") 192 | .attr("d", d3.sankeyLinkHorizontal()) 193 | .attr("stroke-width", d => Math.max(1, d.width)); 194 | 195 | allLinks.append("title") 196 | .text(d => `${d.source.name} → ${d.target.name}\n${d.value.toLocaleString()}`); 197 | 198 | svg.append("g") 199 | .style("font", "10px sans-serif") 200 | .style("text-shadow", "white 0 0 2px, white 0 0 1px") 201 | .selectAll("text") 202 | .data(nodes) 203 | .join("text") 204 | .attr("x", d => d.x0 < width / 2 ? d.x1 + 6 : d.x0 - 6) 205 | .attr("y", d => (d.y1 + d.y0) / 2) 206 | .attr("dy", "0.35em") 207 | .attr("text-anchor", d => d.x0 < width / 2 ? "start" : "end") 208 | .text(d => d.name) 209 | .append("tspan") 210 | .attr("fill-opacity", 0.7) 211 | .text(d => ` ${d.value.toLocaleString()}`); 212 | } 213 | 214 | let clickedNode = null; 215 | 216 | function highlightLinks(event, node) { 217 | // Based on https://observablehq.com/@iashishsingh/sankey-diagram-path-highlighting 218 | if (node === clickedNode) { 219 | resetHighlight(); 220 | clickedNode = null; 221 | return; 222 | } 223 | clickedNode = node; 224 | // fade everything out a bit 225 | allLinks.style("stroke-opacity", 0.4); 226 | allNodes.style("fill-opacity", 0.4); 227 | 228 | // Restore ancestors and descendents to normal opacity 229 | let currentLevel = []; 230 | let nextLevel; 231 | 232 | setNodeOpacity(node, 1.0); 233 | [ 234 | {linkType: "sourceLinks", nodeType: "target"}, 235 | {linkType: "targetLinks", nodeType: "source"} 236 | ].forEach(function ({linkType, nodeType}) { 237 | node[linkType].forEach(function (link) { 238 | currentLevel.push(link[nodeType]); 239 | setLinkOpacity(link, 1.0); 240 | }); 241 | while (currentLevel.length) { 242 | nextLevel = []; 243 | currentLevel.forEach(function (node) { 244 | setNodeOpacity(node, 1.0); 245 | node[linkType].forEach(function (link) { 246 | nextLevel.push(link[nodeType]); 247 | setLinkOpacity(link, 1.0); 248 | }); 249 | }); 250 | currentLevel = nextLevel; 251 | } 252 | }); 253 | } 254 | 255 | function setLinkOpacity(link, opacity) { 256 | d3.select("#link-" + link.id).style("stroke-opacity", opacity); 257 | } 258 | 259 | function setNodeOpacity(node, opacity) { 260 | d3.select("#node-" + node.id).style("fill-opacity", opacity); 261 | } 262 | 263 | function resetHighlight() { 264 | allLinks.style("stroke-opacity", 1.0); 265 | allNodes.style("fill-opacity", 1.0); 266 | } 267 | 268 | // Install a handler to receive messages from the VSCode extension 269 | const handleExtensionMessages = (event) => { 270 | const { message, type }= event.data; 271 | switch (type.type) { 272 | case "CSV-Data": 273 | // console.log(`Received CSV-Data`); 274 | sankeyCsvText = type.value; 275 | renderSankeyDiagram(); 276 | break; 277 | } 278 | }; 279 | 280 | window.addEventListener("message", handleExtensionMessages); 281 | -------------------------------------------------------------------------------- /media/vscode.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --container-padding: 20px; 3 | --input-padding-vertical: 6px; 4 | --input-padding-horizontal: 4px; 5 | --input-margin-vertical: 4px; 6 | --input-margin-horizontal: 0; 7 | } 8 | 9 | body { 10 | padding: 0 var(--container-padding); 11 | color: var(--vscode-foreground); 12 | font-size: var(--vscode-font-size); 13 | font-weight: var(--vscode-font-weight); 14 | font-family: var(--vscode-font-family); 15 | background-color: var(--vscode-editor-background); 16 | } 17 | 18 | ol, 19 | ul { 20 | padding-left: var(--container-padding); 21 | } 22 | 23 | body > *, 24 | form > * { 25 | margin-block-start: var(--input-margin-vertical); 26 | margin-block-end: var(--input-margin-vertical); 27 | } 28 | 29 | *:focus { 30 | outline-color: var(--vscode-focusBorder) !important; 31 | } 32 | 33 | a { 34 | color: var(--vscode-textLink-foreground); 35 | } 36 | 37 | a:hover, 38 | a:active { 39 | color: var(--vscode-textLink-activeForeground); 40 | } 41 | 42 | code { 43 | font-size: var(--vscode-editor-font-size); 44 | font-family: var(--vscode-editor-font-family); 45 | } 46 | 47 | button { 48 | border: none; 49 | padding: var(--input-padding-vertical) var(--input-padding-horizontal); 50 | /* width: 100%; */ 51 | text-align: center; 52 | outline: 1px solid transparent; 53 | outline-offset: 2px !important; 54 | color: var(--vscode-button-foreground); 55 | background: var(--vscode-button-background); 56 | } 57 | 58 | button:hover { 59 | cursor: pointer; 60 | background: var(--vscode-button-hoverBackground); 61 | } 62 | 63 | button:focus { 64 | outline-color: var(--vscode-focusBorder); 65 | } 66 | 67 | button.secondary { 68 | color: var(--vscode-button-secondaryForeground); 69 | background: var(--vscode-button-secondaryBackground); 70 | } 71 | 72 | button.secondary:hover { 73 | background: var(--vscode-button-secondaryHoverBackground); 74 | } 75 | 76 | label { 77 | display: inline-block; 78 | text-align: right; 79 | } 80 | 81 | input:not([type='checkbox']), 82 | textarea { 83 | display: inline-block; /* was block */ 84 | /* width: 100%; */ 85 | border: none; 86 | font-family: var(--vscode-font-family); 87 | padding: var(--input-padding-vertical) var(--input-padding-horizontal); 88 | color: var(--vscode-input-foreground); 89 | outline-color: var(--vscode-input-border); 90 | background-color: var(--vscode-input-background); 91 | } 92 | 93 | input::placeholder, 94 | textarea::placeholder { 95 | display: inline-block; /* was block */ 96 | color: var(--vscode-input-placeholderForeground); 97 | } 98 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pdf-cos-syntax", 3 | "version": "0.1.6", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "pdf-cos-syntax", 9 | "version": "0.1.6", 10 | "hasInstallScript": true, 11 | "license": "Apache-2.0", 12 | "devDependencies": { 13 | "@types/node": "^22.13.9", 14 | "@types/vscode": "^1.84.2", 15 | "typescript": "^5.3.2" 16 | }, 17 | "engines": { 18 | "vscode": "^1.84.2" 19 | } 20 | }, 21 | "node_modules/@types/node": { 22 | "version": "22.13.9", 23 | "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.9.tgz", 24 | "integrity": "sha512-acBjXdRJ3A6Pb3tqnw9HZmyR3Fiol3aGxRCK1x3d+6CDAMjl7I649wpSd+yNURCjbOUGu9tqtLKnTGxmK6CyGw==", 25 | "dev": true, 26 | "license": "MIT", 27 | "dependencies": { 28 | "undici-types": "~6.20.0" 29 | } 30 | }, 31 | "node_modules/@types/vscode": { 32 | "version": "1.97.0", 33 | "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.97.0.tgz", 34 | "integrity": "sha512-ueE73loeOTe7olaVyqP9mrRI54kVPJifUPjblZo9fYcv1CuVLPOEKEkqW0GkqPC454+nCEoigLWnC2Pp7prZ9w==", 35 | "dev": true, 36 | "license": "MIT" 37 | }, 38 | "node_modules/typescript": { 39 | "version": "5.8.2", 40 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.2.tgz", 41 | "integrity": "sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==", 42 | "dev": true, 43 | "license": "Apache-2.0", 44 | "bin": { 45 | "tsc": "bin/tsc", 46 | "tsserver": "bin/tsserver" 47 | }, 48 | "engines": { 49 | "node": ">=14.17" 50 | } 51 | }, 52 | "node_modules/undici-types": { 53 | "version": "6.20.0", 54 | "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", 55 | "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", 56 | "dev": true, 57 | "license": "MIT" 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /scripts/arlington-to-vscode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright 2023 PDF Association, Inc. https://www.pdfa.org 4 | # 5 | # This material is based upon work supported by the Defense Advanced 6 | # Research Projects Agency (DARPA) under Contract No. HR001119C0079. 7 | # Any opinions, findings and conclusions or recommendations expressed 8 | # in this material are those of the author(s) and do not necessarily 9 | # reflect the views of the Defense Advanced Research Projects Agency 10 | # (DARPA). Approved for public release. 11 | # 12 | # SPDX-License-Identifier: Apache-2.0 13 | # Contributors: Peter Wyatt, PDF Association 14 | # 15 | # Converts an Arlington PDF Model "pandas.tsv" monolithic model 16 | # into a JSON file for use with VSCode Code Completion. 17 | # 18 | 19 | import pandas as pd 20 | import sys 21 | import csv 22 | import os 23 | import re 24 | import json 25 | import argparse 26 | from pprint import pprint 27 | 28 | rowCount = 1 29 | 30 | # Each VSCode Code Completion item needs a unique ID number 31 | # These IDs are now global for an Arlington model. VSCode does not require 32 | # them to be sequential 33 | def CreateVSCodeCompletionData(row): 34 | global rowCount 35 | rowCount = rowCount + 1 36 | return rowCount - 1 37 | 38 | 39 | # VSCode Code Completion MarkDown-styled documentation a-la PDF specifications 40 | # e.g. documentation: { 41 | # kind: MarkupKind.Markdown, 42 | # value: "`array`;`boolean` _(PDF 1.2; Required; Deprecated in PDF 1.4)_ Must be indirect reference." 43 | # } 44 | def CreateVSCodeCompletionDocumentation(row): 45 | s = "`" + row["Type"] + "` _(" 46 | s = re.sub(";", "`;`", s) 47 | if (len(row["SinceVersion"]) == 3): 48 | s = s + "PDF " + row["SinceVersion"] 49 | if (row["Required"] == "TRUE"): 50 | s = s + "; Required" 51 | elif (row["Required"] == "FALSE"): 52 | s = s + "; Optional" 53 | if (row["DeprecatedIn"] != ""): 54 | s = s + "; Deprecated in PDF " + row["DeprecatedIn"] 55 | s = s + ")_" 56 | if (row["IndirectReference"] == "TRUE"): 57 | s = s + " Must be indirect reference." 58 | # Tidy up the documentation 59 | s = re.sub("_\(\)_", "", s) 60 | s = re.sub("_\(; ", "_(", s) 61 | return s 62 | 63 | 64 | # Convert pandas TSV to JSON, but also add some additional fields for VSCode Code Completion 65 | def ArlingtonToTS(pandas_fname: str, json_fname: str): 66 | df = pd.read_csv(pandas_fname, delimiter='\t', na_filter=False, 67 | dtype={'Object':'string', 'Key':'string', 'Type':'string', 'SinceVersion':'string', 68 | 'DeprecatedIn':'string', 'Required':'string', 'IndirectReference':'string', 69 | 'Inheritable':'string', 'DefaultValue':'string', 'PossibleValues':'string', 70 | 'SpecialCase':'string', 'Link':'string', 'Note':'string'}) 71 | 72 | # df is a pandas DataFrame of a full Arlington file set 73 | df = df.drop(columns='Note') 74 | 75 | # Drop all arrays - where "Object" contains "Array" or "ColorSpace" 76 | arr_obj = df[ df["Object"].map( lambda x: x.find("Array") != -1)].index 77 | df.drop(arr_obj, inplace = True) 78 | arr_obj = df[ df["Object"].map( lambda x: x.find("ColorSpace") != -1)].index 79 | df.drop(arr_obj, inplace = True) 80 | 81 | # Add new columns needed for VSCode Code Completion 82 | df["Data"] = df.apply( lambda row: CreateVSCodeCompletionData(row), axis=1) 83 | df["Documentation"] = df.apply( lambda row: CreateVSCodeCompletionDocumentation(row), axis=1) 84 | 85 | df.to_json(path_or_buf=json_fname, orient='records', indent=2) 86 | 87 | 88 | # Attempt to load pandas created JSON file to see if it is valid 89 | def ValidateJSONLoads(json_fname: str): 90 | with open(json_fname) as f: 91 | d = json.load(f) 92 | f.close() 93 | # pprint(d) 94 | 95 | 96 | if __name__ == '__main__': 97 | cli_parser = argparse.ArgumentParser() 98 | cli_parser.add_argument('-p', '--pandas', dest="pandasTSV", default="pandas.tsv", 99 | help='filename of a single Pandas-compatible TSV') 100 | cli_parser.add_argument('-j', '--json', dest="jsonFile", default="arlington.json", 101 | help='filename of a JSON output file') 102 | cli = cli_parser.parse_args() 103 | 104 | if (cli.pandasTSV is None) or not os.path.isfile(cli.pandasTSV): 105 | print("'%s' is not a valid file" % cli.pandasTSV) 106 | cli_parser.print_help() 107 | sys.exit() 108 | 109 | print("Loading from '%s' --> '%s'" % (cli.pandasTSV, cli.jsonFile)) 110 | arl = ArlingtonToTS(cli.pandasTSV, cli.jsonFile) 111 | ValidateJSONLoads(cli.jsonFile) 112 | print("Done") 113 | -------------------------------------------------------------------------------- /scripts/e2e.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export CODE_TESTS_PATH="$(pwd)/client/out/test" 4 | export CODE_TESTS_WORKSPACE="$(pwd)/client/testFixture" 5 | 6 | node "$(pwd)/client/out/test/runTest" -------------------------------------------------------------------------------- /server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pdf-cos-syntax-server", 3 | "description": "PDF COS Syntax language server in Node.", 4 | "author": "PDF Association", 5 | "license": "Apache-2.0", 6 | "version": "0.1.6", 7 | "publisher": "pdfassociation", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/pdf-association/pdf-cos-syntax" 11 | }, 12 | "engines": { 13 | "node": "*" 14 | }, 15 | "devDependencies": { 16 | "@ohm-js/cli": "^2.0.0", 17 | "@types/mocha": "^10.0.6", 18 | "@types/node": "^22.13.9", 19 | "@types/vscode": "^1.84.2", 20 | "@typescript-eslint/eslint-plugin": "^6.12.0", 21 | "@typescript-eslint/parser": "^6.12.0", 22 | "eslint": "^8.54.0", 23 | "typescript": "^5.3.2" 24 | }, 25 | "dependencies": { 26 | "ohm-js": "^17.1.0", 27 | "vscode-languageserver": "^9.0.1", 28 | "vscode-languageserver-textdocument": "^1.0.11" 29 | }, 30 | "scripts": {} 31 | } 32 | -------------------------------------------------------------------------------- /server/src/grammar/grammar_pdfFile.ohm: -------------------------------------------------------------------------------- 1 | // This material is based upon work supported by the Defense Advanced 2 | // Research Projects Agency (DARPA) under Contract No. HR001119C0079. 3 | // Any opinions, findings and conclusions or recommendations expressed 4 | // in this material are those of the author(s) and do not necessarily 5 | // reflect the views of the Defense Advanced Research Projects Agency 6 | // (DARPA). Approved for public release. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | // Original author: Ozzy Kirkby 20 | // Author: Peter Wyatt 21 | // 22 | // Test online at: https://ohmjs.org/editor/ 23 | // 24 | // https://ohmjs.org/docs/syntax-reference#syntactic-lexical: 25 | // lowercase = lexical rule - whitespace NOT skipped 26 | // uppercase = syntactic rule - whitespace IS skipped 27 | // 28 | // This grammar has to be VERY lenient to allow live editing of a PDF. 29 | // Just define valid PDF objects and constructs but NOT overall file layout. 30 | // 31 | PDFObject { 32 | pdf = header binary_marker? revision* 33 | 34 | // File header 35 | header = "%PDF-" digit "." digit (~eol any)* eol 36 | 37 | // Binary file marker comment (4 bytes minimum) 38 | binary_marker = "%" "\x80".."\xFF" "\x80".."\xFF" "\x80".."\xFF" "\x80".."\xFF" (~eol any)* eol 39 | 40 | // Revision 41 | revision = body* xref* trailer? startxref eof 42 | 43 | // Body 44 | body = indirect_object_start | object | stream | indirect_object_end 45 | 46 | // Parts of indirect objects - allows editing so extra lenient 47 | indirect_object_start = ws_incl_eol* digit+ ws_incl_eol digit+ ws_incl_eol "obj" ws_incl_eol* 48 | indirect_object_end = ws_incl_eol* "endobj" ws_incl_eol 49 | stream = ws_incl_eol* "stream" ws_incl_eol (~"endstream" any)* ws_incl_eol? "endstream" ws_incl_eol 50 | 51 | // PDF objects NOT incl. streams (i.e. things allowed as array elements or dict key values) 52 | object = indirect_ref | dictionary | array | string | name | number | bool | null 53 | 54 | // PDF dictionary - of key/value pairs 55 | dictionary = "<<" key_value_pair* ws_incl_eol* ">>" ws_incl_eol* 56 | key_value_pair = (ws_incl_eol* | &"/") name (ws_incl_eol* | &start_delimiter) object 57 | 58 | // PDF arrays - empty arrays are valid 59 | array = "[" ((ws_incl_eol* | &start_delimiter) object)* ws_incl_eol* "]" (ws_incl_eol* | &delimiter) 60 | 61 | // PDF Name - empty name is valid. 2 digit hex codes preceded by '#'. 62 | name = "/" (~(&delimiter | ws_incl_eol) valid_name_char)* 63 | valid_name_char = name_hex_escape | (~("#" | &delimiter) any) 64 | name_hex_escape = "#" hexDigit hexDigit 65 | 66 | // PDF string literal - empty string is valid 67 | string = string_literal | hex_string 68 | string_literal = "(" (~")" (string_literal_char | any))* ")" 69 | string_literal_char = string_literal_escape | string_literal 70 | string_literal_escape = "\\n" | "\\r" | "\\b" | "\\f" | "\\(" | "\\)" | "\\\\" | octal | escaped_eol 71 | octal = "\\" octal_digit octal_digit octal_digit 72 | octal_digit = "0".."7" 73 | escaped_eol = "\\" eol 74 | 75 | // PDF hex string - empty hex string is valid 76 | hex_string = "<" (hexDigit | ws_incl_eol)* ">" 77 | 78 | // indirect reference ` R` (e.g. 12 0 R) 79 | indirect_ref = ws_incl_eol* digit+ ws_incl_eol digit+ ws_incl_eol "R" 80 | 81 | // PDF Numeric object - integer or real 82 | number = integer | real 83 | integer = ("+" | "-")? digit+ 84 | real = ("+" | "-")? ((digit* "."? digit+) | (digit+ "."? digit*)) 85 | 86 | // PDF Boolean object 87 | bool = "true" | "false" 88 | 89 | // PDF null object 90 | null = "null" 91 | 92 | // Conventional cross reference tables 93 | xref = ws_incl_eol* "xref" ws_incl_eol xref_subsection+ 94 | xref_subsection = xref_subsection_marker xref_entry* 95 | xref_subsection_marker = ws_no_eol? digit+ " " digit+ ws_incl_eol 96 | xref_10entry = digit digit digit digit digit digit digit digit digit digit 97 | xref_5entry = digit digit digit digit digit 98 | xref_entry = xref_10entry " " xref_5entry " " ("f" | "n") ws_incl_eol 99 | 100 | // trailer, startxref and %%EOF. %%EOF does not need EOL (as could be EOF) 101 | trailer = "trailer" ws_incl_eol dictionary ws_incl_eol* 102 | startxref = "startxref" ws_incl_eol digit+ ws_incl_eol* 103 | eof = "%%EOF" ws_incl_eol* 104 | 105 | // PDF comment - up to next EOL. Treat lexically as an object 106 | comment = "%" (~eol any)* eol 107 | 108 | // Specific PDF EOL sequences. No whitespace or delimiters. 109 | eol = "\r\n" | "\n" | "\r" 110 | 111 | // PDF delimiters NOT including whitespace, EOLs or comments. 112 | delimiter = start_delimiter | end_delimiter 113 | start_delimiter = "<<" | "<" | "[" | "(" | "%" 114 | end_delimiter = ">>" | ">" | "]" | ")" 115 | 116 | // PDF whitespace optionally including EOLs and comments. ISO 32000-2 Table 1. DO NOT USE Ohm.js's "space"! 117 | ws_incl_eol = (" " | "\t" | "\x0C" | "\x00" | "\r" | "\n" | comment)+ 118 | ws_no_eol = (" " | "\t" | "\x0C" | "\x00")+ 119 | } -------------------------------------------------------------------------------- /server/src/grammar/grammar_pdfTokens.ohm: -------------------------------------------------------------------------------- 1 | // This material is based upon work supported by the Defense Advanced 2 | // Research Projects Agency (DARPA) under Contract No. HR001119C0079. 3 | // Any opinions, findings and conclusions or recommendations expressed 4 | // in this material are those of the author(s) and do not necessarily 5 | // reflect the views of the Defense Advanced Research Projects Agency 6 | // (DARPA). Approved for public release. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | // Author: Peter Wyatt 20 | // 21 | // Test online at: https://ohmjs.org/editor/ 22 | // 23 | // https://ohmjs.org/docs/syntax-reference#syntactic-lexical: 24 | // lowercase = lexical rule - whitespace NOT skipped 25 | // uppercase = syntactic rule - whitespace IS skipped 26 | // 27 | // ONLY does token detection. THIS IS NOT A FULL PARSER!!! 28 | // ONLY works if run line-by-line. 29 | // DOES NOT SUPPORT multi-line tokens (such as literal and hex strings). 30 | // MUST AVOID attempting to parse stream data after "stream", before "endstream" 31 | // 32 | PDFTokenizer { 33 | pdf_token = (header | 34 | eof | 35 | bool | 36 | null | 37 | xref | 38 | trailer | 39 | startxref | 40 | dict_start | 41 | dict_end | 42 | array_start | 43 | array_end | 44 | name | 45 | xref_entry | 46 | indirect_object_start | 47 | endobj | 48 | indirect_ref | 49 | string_literal | 50 | hex_string | 51 | real | 52 | integer | 53 | stream | 54 | endstream | 55 | comment | 56 | ws_incl_eol)+ 57 | 58 | // File header 59 | header = "%PDF-" digit "." digit (~eol any)* 60 | 61 | // Parts of indirect objects - allows editing so extra lenient 62 | indirect_object_start = digit+ ws_incl_eol digit+ ws_incl_eol "obj" 63 | endobj = "endobj" 64 | stream = "stream" 65 | endstream = "endstream" 66 | 67 | dict_start = "<<" 68 | dict_end = ">>" 69 | array_start = "[" 70 | array_end = "]" 71 | 72 | // PDF Name - empty name is valid. 2 digit hex codes preceded by '#'. 73 | name = "/" (~(&delimiter | ws_incl_eol) valid_name_char)* 74 | valid_name_char = name_hex_escape | (~("#" | &delimiter) any) 75 | name_hex_escape = "#" hexDigit hexDigit 76 | 77 | // PDF string literal - empty string is valid 78 | string_literal = "(" (~")" (string_literal_char | any))* ")" 79 | string_literal_char = string_literal_escape | string_literal 80 | string_literal_escape = "\\n" | "\\r" | "\\b" | "\\f" | "\\(" | "\\)" | "\\\\" | octal | escaped_eol 81 | octal = "\\" octal_digit octal_digit octal_digit 82 | octal_digit = "0".."7" 83 | escaped_eol = "\\" eol 84 | 85 | // PDF hex string - empty hex string is valid 86 | hex_string = "<" (hexDigit | ws_incl_eol)* ">" 87 | 88 | // indirect reference ` R` (e.g. 12 0 R) 89 | indirect_ref = digit+ ws_incl_eol digit+ ws_incl_eol "R" 90 | 91 | // PDF Numeric objects - integer and real 92 | integer = ("+" | "-")? digit+ 93 | real = ("+" | "-")? ((digit* "." digit+) | (digit+ "." digit*)) 94 | 95 | // PDF Boolean object 96 | bool = "true" | "false" 97 | 98 | // PDF null object 99 | null = "null" 100 | 101 | // Conventional cross reference tables 102 | xref = "xref" 103 | xref_10entry = digit digit digit digit digit digit digit digit digit digit 104 | xref_5entry = digit digit digit digit digit 105 | xref_entry = xref_10entry " " xref_5entry " " ("f" | "n") 106 | 107 | // Footer keywords 108 | trailer = "trailer" 109 | startxref = "startxref" 110 | eof = "%%EOF" 111 | 112 | // PDF comment - up to next EOL. 113 | comment = "%" (~eol any)* eol 114 | 115 | // Specific PDF EOL sequences. No whitespace or delimiters. 116 | eol = "\r\n" | "\n" | "\r" 117 | 118 | // PDF delimiters NOT including whitespace, EOLs or comments. 119 | delimiter = start_delimiter | end_delimiter 120 | start_delimiter = "<<" | "<" | "[" | "(" | "/" | "%" 121 | end_delimiter = ">>" | ">" | "]" | ")" 122 | 123 | // PDF whitespace optionally including EOLs and comments. ISO 32000-2 Table 1. DO NOT USE Ohm.js's "space"! 124 | ws_incl_eol = (" " | "\t" | "\x0C" | "\x00" | "\r" | "\n" | comment)+ 125 | ws_no_eol = (" " | "\t" | "\x0C" | "\x00")+ 126 | } -------------------------------------------------------------------------------- /server/src/grammar/grammar_pdfTokens.ohm-bundle.d.ts: -------------------------------------------------------------------------------- 1 | // AUTOGENERATED FILE 2 | // This file was generated from grammar_pdfTokens.ohm by `ohm generateBundles`. 3 | 4 | import { 5 | BaseActionDict, 6 | Grammar, 7 | IterationNode, 8 | Node, 9 | NonterminalNode, 10 | Semantics, 11 | TerminalNode 12 | } from 'ohm-js'; 13 | 14 | export interface PDFTokenizerActionDict extends BaseActionDict { 15 | pdf_token?: (this: NonterminalNode, arg0: IterationNode) => T; 16 | header?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode, arg2: TerminalNode, arg3: NonterminalNode, arg4: IterationNode) => T; 17 | indirect_object_start?: (this: NonterminalNode, arg0: IterationNode, arg1: NonterminalNode, arg2: IterationNode, arg3: NonterminalNode, arg4: TerminalNode) => T; 18 | endobj?: (this: NonterminalNode, arg0: TerminalNode) => T; 19 | stream?: (this: NonterminalNode, arg0: TerminalNode) => T; 20 | endstream?: (this: NonterminalNode, arg0: TerminalNode) => T; 21 | dict_start?: (this: NonterminalNode, arg0: TerminalNode) => T; 22 | dict_end?: (this: NonterminalNode, arg0: TerminalNode) => T; 23 | array_start?: (this: NonterminalNode, arg0: TerminalNode) => T; 24 | array_end?: (this: NonterminalNode, arg0: TerminalNode) => T; 25 | name?: (this: NonterminalNode, arg0: TerminalNode, arg1: IterationNode) => T; 26 | valid_name_char?: (this: NonterminalNode, arg0: NonterminalNode) => T; 27 | name_hex_escape?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode, arg2: NonterminalNode) => T; 28 | string_literal?: (this: NonterminalNode, arg0: TerminalNode, arg1: IterationNode, arg2: TerminalNode) => T; 29 | string_literal_char?: (this: NonterminalNode, arg0: NonterminalNode) => T; 30 | string_literal_escape?: (this: NonterminalNode, arg0: NonterminalNode | TerminalNode) => T; 31 | octal?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode, arg2: NonterminalNode, arg3: NonterminalNode) => T; 32 | octal_digit?: (this: NonterminalNode, arg0: TerminalNode) => T; 33 | escaped_eol?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode) => T; 34 | hex_string?: (this: NonterminalNode, arg0: TerminalNode, arg1: IterationNode, arg2: TerminalNode) => T; 35 | indirect_ref?: (this: NonterminalNode, arg0: IterationNode, arg1: NonterminalNode, arg2: IterationNode, arg3: NonterminalNode, arg4: TerminalNode) => T; 36 | integer?: (this: NonterminalNode, arg0: IterationNode, arg1: IterationNode) => T; 37 | real?: (this: NonterminalNode, arg0: IterationNode, arg1: IterationNode, arg2: TerminalNode, arg3: IterationNode) => T; 38 | bool?: (this: NonterminalNode, arg0: TerminalNode) => T; 39 | null?: (this: NonterminalNode, arg0: TerminalNode) => T; 40 | xref?: (this: NonterminalNode, arg0: TerminalNode) => T; 41 | xref_10entry?: (this: NonterminalNode, arg0: NonterminalNode, arg1: NonterminalNode, arg2: NonterminalNode, arg3: NonterminalNode, arg4: NonterminalNode, arg5: NonterminalNode, arg6: NonterminalNode, arg7: NonterminalNode, arg8: NonterminalNode, arg9: NonterminalNode) => T; 42 | xref_5entry?: (this: NonterminalNode, arg0: NonterminalNode, arg1: NonterminalNode, arg2: NonterminalNode, arg3: NonterminalNode, arg4: NonterminalNode) => T; 43 | xref_entry?: (this: NonterminalNode, arg0: NonterminalNode, arg1: TerminalNode, arg2: NonterminalNode, arg3: TerminalNode, arg4: TerminalNode) => T; 44 | trailer?: (this: NonterminalNode, arg0: TerminalNode) => T; 45 | startxref?: (this: NonterminalNode, arg0: TerminalNode) => T; 46 | eof?: (this: NonterminalNode, arg0: TerminalNode) => T; 47 | comment?: (this: NonterminalNode, arg0: TerminalNode, arg1: IterationNode, arg2: NonterminalNode) => T; 48 | eol?: (this: NonterminalNode, arg0: TerminalNode) => T; 49 | delimiter?: (this: NonterminalNode, arg0: NonterminalNode) => T; 50 | start_delimiter?: (this: NonterminalNode, arg0: TerminalNode) => T; 51 | end_delimiter?: (this: NonterminalNode, arg0: TerminalNode) => T; 52 | ws_incl_eol?: (this: NonterminalNode, arg0: IterationNode) => T; 53 | ws_no_eol?: (this: NonterminalNode, arg0: IterationNode) => T; 54 | } 55 | 56 | export interface PDFTokenizerSemantics extends Semantics { 57 | addOperation(name: string, actionDict: PDFTokenizerActionDict): this; 58 | extendOperation(name: string, actionDict: PDFTokenizerActionDict): this; 59 | addAttribute(name: string, actionDict: PDFTokenizerActionDict): this; 60 | extendAttribute(name: string, actionDict: PDFTokenizerActionDict): this; 61 | } 62 | 63 | export interface PDFTokenizerGrammar extends Grammar { 64 | createSemantics(): PDFTokenizerSemantics; 65 | extendSemantics(superSemantics: PDFTokenizerSemantics): PDFTokenizerSemantics; 66 | } 67 | 68 | declare const grammar: PDFTokenizerGrammar; 69 | export default grammar; 70 | 71 | -------------------------------------------------------------------------------- /server/src/models/PdfObject.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Simple class representing metrics a single PDF indirect object 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 17 | export default class PDFObject { 18 | private readonly _objectNumber: number; 19 | private readonly _generationNumber: number; 20 | private readonly _startOffset: number; 21 | private readonly _endOffset: number; 22 | private readonly _startStreamOffset: number; 23 | private readonly _endStreamOffset: number; 24 | 25 | /** 26 | * First line match for object identifier. Raw data can span lines. 27 | * Object starts with object number so no look-before is required. 28 | */ 29 | private readonly _objectIdentifier: RegExp = 30 | /(\d+)[ \t\r\n\f\0]+(\d+)[ \t\r\n\f\0]+obj/; 31 | 32 | /** 33 | * @param obj - the full PDF object data (from `X Y obj` to `endobj`) 34 | * @param start - the byte offset of the start of `X Y obj` 35 | * @param stmStartOffset - the byte offset of the start of the stream or -1 36 | * @param stmEndOffset - the byte offset of the start of the stream or -1 37 | */ 38 | constructor(obj: string, start: number, stmStartOffset: number, stmEndOffset: number, ) { 39 | this._startOffset = start; 40 | this._endOffset = start + obj.length; 41 | this._startStreamOffset = stmStartOffset; 42 | this._endStreamOffset = stmEndOffset; 43 | 44 | const match = obj.match(this._objectIdentifier); 45 | if (match && (match.length == 3)) { 46 | this._objectNumber = parseInt(match[1]); 47 | this._generationNumber = parseInt(match[2]); 48 | } 49 | else 50 | throw new Error(`Could not find object ID in ${obj.slice(0,10)}!`); 51 | 52 | if (stmStartOffset !== -1) { 53 | if ((stmStartOffset >= stmEndOffset) || 54 | (stmStartOffset <= start) || 55 | (stmEndOffset > this._endOffset)) 56 | throw new Error(`Stream offsets are invalid for object ${this.getObjectID()}}!`); 57 | } 58 | } 59 | 60 | /** Returns a single line, nicely spaced string of the object ID */ 61 | getObjectID(): string { 62 | return `${this._objectNumber} ${this._generationNumber} obj`; 63 | } 64 | 65 | getStartOffset(): number { 66 | return this._startOffset; 67 | } 68 | 69 | getEndOffset(): number { 70 | return this._endOffset; 71 | } 72 | 73 | getStartStreamOffset(): number { 74 | return this._startStreamOffset; 75 | } 76 | 77 | getEndStreamOffset(): number { 78 | return this._endStreamOffset; 79 | } 80 | 81 | hasStream(): boolean { 82 | return (this._startStreamOffset !== -1) && (this._endStreamOffset !== -1); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /server/src/ohmParser.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Ohm-based tokenizing parser running in LSP server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | "use strict"; 17 | 18 | import * as ohm from "ohm-js"; 19 | import * as fs from "fs"; 20 | import { PDFToken, TOKEN_TYPES } from "./types"; 21 | import * as path from "path"; 22 | 23 | const grammarPath = path.join( 24 | __dirname, 25 | "../src/grammar/grammar_pdfTokens.ohm" 26 | ); 27 | const grammarString = fs.readFileSync(grammarPath, "utf-8"); 28 | const grammar = ohm.grammar(grammarString); 29 | 30 | // Main entry point to Ohm parser called by LSP server 31 | function getTokens(text: string): PDFToken[] { 32 | let lineNbr: number = 1; 33 | 34 | const semantics = grammar.createSemantics(); 35 | 36 | semantics.addOperation("extract()", { 37 | _iter(...children) { 38 | let childTokenList: PDFToken[] = []; 39 | children.forEach((child, index) => { 40 | const childTokens: PDFToken[] = child.extract(); 41 | childTokenList = childTokenList.concat(childTokens); 42 | }); 43 | return childTokenList; 44 | }, 45 | _terminal() { 46 | // ignore 47 | return []; 48 | }, 49 | 50 | header(_1, majorVer, _3, minorVer, _5) { 51 | const token: PDFToken = { 52 | line: lineNbr, 53 | start: this.source.startIdx, 54 | end: this.source.endIdx, 55 | type: "header", 56 | content: majorVer.sourceString + "." + minorVer.sourceString, 57 | }; 58 | return [token]; 59 | }, 60 | endobj(_1) { 61 | const token: PDFToken = { 62 | line: lineNbr, 63 | start: this.source.startIdx, 64 | end: this.source.endIdx, 65 | type: "endobj", 66 | }; 67 | return [token]; 68 | }, 69 | indirect_object_start(objNum, _2, genNum, _4, _5) { 70 | const token: PDFToken = { 71 | line: lineNbr, 72 | start: this.source.startIdx, 73 | end: this.source.endIdx, 74 | type: "indirect_object_start", 75 | objNum: parseInt(objNum.sourceString), 76 | genNum: parseInt(genNum.sourceString), 77 | }; 78 | return [token]; 79 | }, 80 | stream(_1) { 81 | const token: PDFToken = { 82 | line: lineNbr, 83 | start: this.source.startIdx, 84 | end: this.source.endIdx, 85 | type: "stream", 86 | }; 87 | return [token]; 88 | }, 89 | endstream(_1) { 90 | const token: PDFToken = { 91 | line: lineNbr, 92 | start: this.source.startIdx, 93 | end: this.source.endIdx, 94 | type: "endstream", 95 | }; 96 | return [token]; 97 | }, 98 | dict_start(_1) { 99 | const token: PDFToken = { 100 | line: lineNbr, 101 | start: this.source.startIdx, 102 | end: this.source.endIdx, 103 | type: "dict_start", 104 | }; 105 | return [token]; 106 | }, 107 | dict_end(_1) { 108 | const token: PDFToken = { 109 | line: lineNbr, 110 | start: this.source.startIdx, 111 | end: this.source.endIdx, 112 | type: "dict_end", 113 | }; 114 | return [token]; 115 | }, 116 | array_start(_1) { 117 | const token: PDFToken = { 118 | line: lineNbr, 119 | start: this.source.startIdx, 120 | end: this.source.endIdx, 121 | type: "array_start", 122 | }; 123 | return [token]; 124 | }, 125 | array_end(_1) { 126 | const token: PDFToken = { 127 | line: lineNbr, 128 | start: this.source.startIdx, 129 | end: this.source.endIdx, 130 | type: "array_end", 131 | }; 132 | return [token]; 133 | }, 134 | name(_1, characters) { 135 | const token: PDFToken = { 136 | line: lineNbr, 137 | start: this.source.startIdx, 138 | end: this.source.endIdx, 139 | type: "name", 140 | content: characters.sourceString, 141 | }; 142 | return [token]; 143 | }, 144 | string_literal(_1, content, _2) { 145 | const token: PDFToken = { 146 | line: lineNbr, 147 | start: this.source.startIdx, 148 | end: this.source.endIdx, 149 | type: "string_literal", 150 | content: content.sourceString, 151 | }; 152 | return [token]; 153 | }, 154 | hex_string(_1, content, _2) { 155 | const token: PDFToken = { 156 | line: lineNbr, 157 | start: this.source.startIdx, 158 | end: this.source.endIdx, 159 | type: "hex_string", 160 | content: content.sourceString, 161 | }; 162 | return [token]; 163 | }, 164 | indirect_ref(objNum, _2, genNum, _3, _4) { 165 | const token: PDFToken = { 166 | line: lineNbr, 167 | start: this.source.startIdx, 168 | end: this.source.endIdx, 169 | type: "indirect_ref", 170 | objNum: parseInt(objNum.sourceString), 171 | genNum: parseInt(genNum.sourceString), 172 | }; 173 | return [token]; 174 | }, 175 | integer(sign, digits) { 176 | const token: PDFToken = { 177 | line: lineNbr, 178 | start: this.source.startIdx, 179 | end: this.source.endIdx, 180 | type: "integer", 181 | content: sign 182 | ? parseInt(sign.sourceString + digits.sourceString) 183 | : parseInt(digits.sourceString), 184 | }; 185 | return [token]; 186 | }, 187 | real(sign, part1, dot, part2) { 188 | const token: PDFToken = { 189 | line: lineNbr, 190 | start: this.source.startIdx, 191 | end: this.source.endIdx, 192 | type: "real", 193 | content: parseFloat( 194 | (sign ? sign.sourceString : "") + 195 | part1.sourceString + 196 | dot.sourceString + 197 | (part2 ? part2.sourceString : "") 198 | ), 199 | }; 200 | return [token]; 201 | }, 202 | bool(value) { 203 | const token: PDFToken = { 204 | line: lineNbr, 205 | start: this.source.startIdx, 206 | end: this.source.endIdx, 207 | type: "bool", 208 | content: value.sourceString === "true", 209 | }; 210 | return [token]; 211 | }, 212 | null(_1) { 213 | const token: PDFToken = { 214 | line: lineNbr, 215 | start: this.source.startIdx, 216 | end: this.source.endIdx, 217 | type: "null", 218 | }; 219 | return [token]; 220 | }, 221 | xref(_1) { 222 | const token: PDFToken = { 223 | line: lineNbr, 224 | start: this.source.startIdx, 225 | end: this.source.endIdx, 226 | type: "xref", 227 | }; 228 | return [token]; 229 | }, 230 | xref_10entry(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10) { 231 | // ignore - wait for xref_entry 232 | return []; 233 | }, 234 | xref_5entry(_1, _2, _3, _4, _5) { 235 | // ignore - wait for xref_entry 236 | return []; 237 | }, 238 | 239 | xref_entry(tenEntry, _1, fiveEntry, _2, status) { 240 | const token: PDFToken = { 241 | line: lineNbr, 242 | start: this.source.startIdx, 243 | end: this.source.endIdx, 244 | type: "xref_entry", 245 | tenEntry: parseInt(tenEntry.sourceString), 246 | fiveEntry: parseInt(fiveEntry.sourceString), 247 | status: status.sourceString, 248 | }; 249 | return [token]; 250 | }, 251 | trailer(_1) { 252 | const token: PDFToken = { 253 | line: lineNbr, 254 | start: this.source.startIdx, 255 | end: this.source.endIdx, 256 | type: "trailer", 257 | }; 258 | return [token]; 259 | }, 260 | eof(_1) { 261 | const token: PDFToken = { 262 | line: lineNbr, 263 | start: this.source.startIdx, 264 | end: this.source.endIdx, 265 | type: "eof", 266 | }; 267 | return [token]; 268 | }, 269 | startxref(_1) { 270 | const token: PDFToken = { 271 | line: lineNbr, 272 | start: this.source.startIdx, 273 | end: this.source.endIdx, 274 | type: "startxref", 275 | }; 276 | return [token]; 277 | }, 278 | comment(_1, commentText, _2) { 279 | const token: PDFToken = { 280 | line: lineNbr, 281 | start: this.source.startIdx, 282 | end: this.source.endIdx, 283 | type: "comment", 284 | // content: commentText.sourceString, // no need to keep comment 285 | }; 286 | return [token]; 287 | }, 288 | }); 289 | 290 | // Tokenize line-by-line 291 | const lines = text.split("\n"); 292 | let insideStream: boolean = false; 293 | let tokenList: PDFToken[] = []; 294 | for (const line of lines) { 295 | if (insideStream) { 296 | // be robust to live editing to re-start parser at end of a stream 297 | if ( 298 | line.trim().startsWith("endstream") || 299 | line.trim().startsWith("endobj") || 300 | line.trim().match(/\\d+[ \t\f\0\r\n]+\\d+[ \t\f\0\r\n]+obj/) 301 | ) 302 | insideStream = false; // fallthrough and let Ohm parse this line fully to get token locations 303 | } 304 | 305 | if (!insideStream && line.trim().length > 0) { 306 | const matchResult: ohm.MatchResult = grammar.match(line + "\n"); // restore '\n' so parser sees it 307 | if (matchResult.failed()) { 308 | // This will fail for multi-line tokens such as literal and hex strings 309 | /// @todo - Could retry by stitching a few lines together, but VSCode SemanticTokens 310 | /// cannot span multiple lines: https://github.com/microsoft/vscode/blob/3be5ad240bd78db6892e285cb0c0de205ceab126/src/vs/workbench/api/common/extHostTypes.ts#L3261 311 | console.log(`Line ${lineNbr}: getTokens() failed! "${line.trim()}"`); 312 | } else { 313 | const lineTokens: PDFToken[] = semantics(matchResult).extract(); 314 | // console.log( 315 | // `Line ${lineNbr}: tokenized "${line.trim()}": `, 316 | // lineTokens 317 | // ); 318 | // When encounter a "stream" token, skip until "endstream" (or "endobj" or "X Y obj") 319 | /// @todo - try a different Ohm grammar on the stream data! Content Stream, PSType4, CMap, etc. 320 | const streamKeyword = lineTokens.findIndex((t: PDFToken) => { 321 | return t.type === "stream"; 322 | }); 323 | if (streamKeyword !== -1) insideStream = true; 324 | tokenList = tokenList.concat(lineTokens); 325 | } 326 | } 327 | lineNbr += 1; 328 | } 329 | // console.log(`Finished tokenizing ${lineNbr} lines`); 330 | 331 | // DEBUG ONLY VALIDATION OF TOKENS 332 | // 333 | // tokenList.forEach((token) => { 334 | // console.log(token); 335 | // if (!TOKEN_TYPES.includes(token.type)) { 336 | // console.error(`server-Missing token type: ${token.type}`); 337 | // } else { 338 | // console.log("all passing"); 339 | // } 340 | // }); 341 | // 342 | // tokenList.forEach((token) => { 343 | // if (token.type === undefined || token.type === null) { 344 | // console.error(`Undefined or null token type: `, token); 345 | // } 346 | // }); 347 | 348 | /// @todo - How do we mark an error in syntax (what does Ohm do)??? 349 | /// - whole line vs after a few tokens and at end-of-a-line vs somewhere in the middle of a line 350 | 351 | /// @todo - processing tokenList array for 352 | /// - basic file and syntax validation 353 | /// - file layout and structure markers 354 | /// - dictionary key modifier (so can know key or key-value) 355 | /// - array element modifier 356 | /// - streams (that were skipped above) - rely on dict key /Type, etc. and use other Ohm grammars 357 | /// - folding?? 358 | 359 | return tokenList; 360 | } 361 | 362 | export { getTokens }; 363 | -------------------------------------------------------------------------------- /server/src/types/constants.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief VSCode semantic tokens and modifiers. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 'use strict'; 17 | 18 | export const TOKEN_TYPES = [ 19 | 'header', 20 | 'endobj', 21 | // 'pdf_token', 22 | 'indirect_object_start', 23 | 'stream', 24 | 'endstream', 25 | 'dict_start', 26 | 'dict_end', 27 | 'array_start', 28 | 'array_end', 29 | 'name', 30 | // 'valid_name_char', 31 | // 'name_hex_escape', 32 | 33 | 'string_literal', 34 | 35 | // 'string_literal_char', 36 | // 'string_literal_escape', 37 | // 'octal', 38 | // 'octal_digit', 39 | // 'escaped_eol', 40 | 41 | 'hex_string', 42 | 'indirect_ref', 43 | 'integer', 44 | 'real', 45 | 'bool', 46 | 'null', 47 | 'xref', 48 | 'xref_10entry', 49 | 'xref_5entry', 50 | 'xref_entry', 51 | 'trailer', 52 | 'eof', 53 | 'startxref', 54 | 'comment', 55 | 56 | // 'eol', 57 | // 'delimiter', 58 | // 'start_delimiter', 59 | // 'end_delimiter', 60 | // 'ws_incl_eol', 61 | // 'ws_no_eol', 62 | ]; 63 | 64 | export const TOKEN_MODIFIERS = [ 65 | 'isDictKey', // only applies to 'name' objects 66 | 'isArrayElement' 67 | ]; 68 | -------------------------------------------------------------------------------- /server/src/types/documentTypes.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Conventional cross reference table and file structure information. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 'use strict'; 17 | 18 | import { XrefInfoMatrix } from '../parser/XrefInfoMatrix'; 19 | 20 | export interface PDSCOSSyntaxSettings { 21 | maxNumberOfProblems: number; 22 | } 23 | 24 | export type PDFDocumentData = { 25 | settings: PDSCOSSyntaxSettings; 26 | xrefMatrix?: XrefInfoMatrix; 27 | }; 28 | -------------------------------------------------------------------------------- /server/src/types/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Semantic tokenizing parser interface. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 17 | export * from './documentTypes'; 18 | export * from './tokenTypes'; 19 | export * from './constants'; 20 | -------------------------------------------------------------------------------- /server/src/types/tokenTypes.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Token from Ohm-based tokenizing parser. Shared between client and server. 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 'use strict'; 17 | 18 | export interface PDFToken { 19 | line: number, 20 | start: number; 21 | end: number; 22 | type: string; 23 | [key: string]: any; 24 | } 25 | -------------------------------------------------------------------------------- /server/src/utils/ArlingtonUtils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief VSCode PDF COS syntax Arlington PDF Model utility functions 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * @remark 9 | * This material is based upon work supported by the Defense Advanced 10 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 11 | * Any opinions, findings and conclusions or recommendations expressed 12 | * in this material are those of the author(s) and do not necessarily 13 | * reflect the views of the Defense Advanced Research Projects Agency 14 | * (DARPA). Approved for public release. 15 | */ 16 | 17 | import { 18 | ArlingtonPDFModel, 19 | AlringtonItem 20 | } from "../models/ArlingtonPDFModel"; 21 | 22 | import { 23 | CompletionItem, 24 | CompletionItemKind, 25 | MarkupKind, 26 | } from "vscode-languageserver/node"; 27 | 28 | 29 | /** 30 | * Returns a list of all key names from the Arlington PDF Model (except for wildcards). 31 | * If a key is unique then lots of detail. 32 | * 33 | * @todo support deprecation tags in 34 | * @todo support filtering by PDF version from header / Catalog::Version 35 | */ 36 | export function DictKeyCodeCompletion() : CompletionItem[] { 37 | const dictKeys: CompletionItem[] = []; 38 | 39 | let k: AlringtonItem; 40 | for (k of ArlingtonPDFModel) { 41 | if (k.Key.includes("*")) continue; // skip wildcards 42 | const alreadyExist = dictKeys.find((obj) => { return obj.label == k.Key; }); 43 | if (!alreadyExist) { 44 | dictKeys.push({ 45 | kind: CompletionItemKind.Variable, 46 | // data: k.Data, // not needed 47 | label: k.Key, 48 | detail: k.Object, 49 | documentation: { kind: MarkupKind.Markdown, value: k.Documentation }, 50 | // tags: [ CompletionItemTag.Deprecated ] 51 | }); 52 | } 53 | else { 54 | // Multiple objects have a key with this name. Clean out the specifics. 55 | alreadyExist.documentation = ""; 56 | if (alreadyExist.detail != "Many...") { 57 | alreadyExist.detail = alreadyExist.detail + ", " + k.Object; 58 | // Too many objects so don't list them all 59 | if (alreadyExist.detail.length > 90) alreadyExist.detail = "Many..."; 60 | } 61 | alreadyExist.data = 0; 62 | } 63 | } 64 | return dictKeys; 65 | } 66 | 67 | 68 | /** 69 | * Returns the list of possible name values for a given dictionary key. 70 | */ 71 | export function DictKeyValueCodeCompletion(dictKey: string) : CompletionItem[] { 72 | const dictKeyValues: CompletionItem[] = []; 73 | 74 | let k: AlringtonItem; 75 | for (k of ArlingtonPDFModel) { 76 | if (k.Key.includes("*")) continue; // skip wildcards 77 | 78 | /** @todo support multi-typed keys: k.Type.includes("name") */ 79 | if ((k.Key == dictKey) && (k.Type == "name")) { 80 | const values = k.PossibleValues.slice(1, k.PossibleValues.length - 1).split(','); 81 | let v: string; 82 | for (v of values) { 83 | dictKeyValues.push({ 84 | kind: CompletionItemKind.Variable, 85 | label: v, 86 | detail: k.Object + "::" + k.Key, 87 | documentation: { kind: MarkupKind.Markdown, value: k.Documentation }, 88 | }); 89 | } 90 | } 91 | } 92 | return dictKeyValues; 93 | } 94 | -------------------------------------------------------------------------------- /server/src/utils/pdfUtils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief VSCode PDF COS syntax LSP server 3 | * 4 | * @copyright 5 | * Copyright 2023 PDF Association, Inc. https://www.pdfa.org 6 | * SPDX-License-Identifier: Apache-2.0 7 | * 8 | * Original portions: Copyright (c) Microsoft Corporation. All rights reserved. 9 | * Licensed under the MIT License. 10 | * 11 | * @remark 12 | * This material is based upon work supported by the Defense Advanced 13 | * Research Projects Agency (DARPA) under Contract No. HR001119C0079. 14 | * Any opinions, findings and conclusions or recommendations expressed 15 | * in this material are those of the author(s) and do not necessarily 16 | * reflect the views of the Defense Advanced Research Projects Agency 17 | * (DARPA). Approved for public release. 18 | */ 19 | import { Range, TextDocument } from "vscode-languageserver-textdocument"; 20 | import { Location, Position } from "vscode-languageserver"; 21 | import { XrefInfoMatrix } from '../parser/XrefInfoMatrix'; 22 | 23 | /** PDF Whitespace from Table 1, ISO 32000-2:2020 */ 24 | const pdfWhitespaceRegex = new RegExp(/ \\t\\r\\n\\0\\x0C/); 25 | 26 | /** 27 | * Takes a number, assumed to be a 32 bit signed integer and 28 | * converts to groups of 8 bits for display as a PDF bitmask. 29 | */ 30 | export function flags32_to_binary(num: number): string { 31 | const flag = Math.abs(num) & 0xFFFFFFFF; 32 | 33 | let s = (flag & 0x000000FF).toString(2).padStart(8, "0"); 34 | s = ((flag & 0x0000FF00) >> 8).toString(2).padStart(8, "0") + ' ' + s; 35 | s = ((flag & 0x00FF0000) >> 16).toString(2).padStart(8, "0") + ' ' + s; 36 | s = ((flag & 0x8F000000) >> 24).toString(2).padStart(7, "0") + ' ' + s; 37 | if (num < 0) { 38 | s = "1" + s; 39 | } 40 | else { 41 | s = "0" + s; 42 | } 43 | return "Bitmask: " + s; 44 | } 45 | 46 | 47 | /** 48 | * Find all occurrences of "X Y R" in the text for a given object ID. 49 | * 50 | * @param {number} objNum - object number. Should be > 0. 51 | * @param {number}genNum - object generation number. Should be >= 0. 52 | * @param {TextDocument} document - the PDF (as text) document 53 | * 54 | * @returns {Location[]} an array of locations 55 | */ 56 | export function findAllReferences( 57 | objNum: number, 58 | genNum: number, 59 | document: TextDocument 60 | ): Location[] { 61 | if (objNum <= 0 || genNum < 0) { 62 | return []; 63 | } 64 | 65 | const references: Location[] = []; 66 | 67 | // Avoid minimal matches with larger object numbers (e.g. 10 matches 10 but also 110, 210) 68 | // Avoid false matches with PDF "RG" operator as it takes 3 numeric operands 69 | const referencePattern = new RegExp( 70 | `(? 0. 98 | * @param {number} genNum - object generation number. Should be >= 0. 99 | * @param {TextDocument} document - the PDF (as text) document 100 | * 101 | * @returns {Location[]} an array of definition locations. Might be empty. 102 | */ 103 | export function findAllDefinitions( 104 | objNum: number, 105 | genNum: number, 106 | document: TextDocument 107 | ): Location[] { 108 | if (objNum <= 0 || genNum < 0) return []; 109 | 110 | const definitions: Location[] = []; 111 | 112 | const objDefinitionPattern = new RegExp( 113 | `(?/g; 309 | while ((match = regex.exec(lineText)) !== null) { 310 | const matchStart = match.index; 311 | const matchEnd = matchStart + match[0].length; 312 | 313 | if (matchStart <= position.character && position.character <= matchEnd) { 314 | return { 315 | type: "hexString", 316 | range: { 317 | start: { line: position.line, character: matchStart }, 318 | end: { line: position.line, character: matchEnd }, 319 | }, 320 | }; 321 | } 322 | } 323 | 324 | return null; 325 | } 326 | 327 | export function buildXrefMatrix(content: string): XrefInfoMatrix { 328 | // Create a new instance of the XrefInfoMatrix 329 | const xrefMatrix = new XrefInfoMatrix(); 330 | const lines = content.split("\n"); 331 | 332 | const mockPDFDocument: TextDocument = { 333 | getText: () => content, 334 | uri: "mockURI", 335 | languageId: "pdf", 336 | version: 1, // mock version 337 | positionAt: (offset: number) => { 338 | let charCount = 0; 339 | for (let i = 0; i < lines.length; i++) { 340 | if (charCount + lines[i].length >= offset) { 341 | return { line: i, character: offset - charCount }; 342 | } 343 | charCount += lines[i].length + 1; 344 | } 345 | return { 346 | line: lines.length - 1, 347 | character: lines[lines.length - 1].length, 348 | }; 349 | }, 350 | offsetAt: (position: Position) => { 351 | let offset = 0; 352 | for (let i = 0; i < position.line; i++) { 353 | offset += lines[i].length + 1; 354 | } 355 | return offset + position.character; 356 | }, 357 | lineCount: content.split("\n").length, 358 | }; 359 | 360 | // Merge all xref tables found in the document into the matrix 361 | xrefMatrix.mergeAllXrefTables(mockPDFDocument); 362 | 363 | return xrefMatrix; 364 | } 365 | 366 | 367 | /** 368 | * Consrtuct a PDF hover for Date objects. 369 | * 370 | * @param d PDF date string (literal or hex string) 371 | * @returns Human-readable date for the valid parts of the PDF date string 372 | */ 373 | function parsePDFDateString(d: string): string { 374 | /// @todo - hex strings! 375 | 376 | // Parse a PDF Date string into consistuent fields 377 | const PDFDateRegex = /^D:(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?([-+Z])?(\d{2})?(')?(\d{2})?(')?/gm; 378 | 379 | let errorInFormat: boolean = false; 380 | let year: number = -1; 381 | let month: number = 1; 382 | let day: number = 1; 383 | let hour: number = 0; 384 | let minute: number = 0; 385 | let second: number = 0; 386 | let utc_char: string = ''; // Z, + or - 387 | let utc_hour: number = 0; 388 | let utc_minute: number = 0; 389 | let s: string = ''; 390 | 391 | const m = PDFDateRegex.exec(d); 392 | if (m != null) { 393 | try { 394 | // console.log(m); 395 | 396 | if ((m.length >= 1) && (m[1] != null)) { 397 | year = parseInt(m[1]); 398 | if (year < 0) year = 0; 399 | s = year.toString().padStart(4, '0'); 400 | } 401 | 402 | const MonthNames: string[] = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec' ]; 403 | if ((m.length >= 2) && (m[2] != null)) { 404 | month = parseInt(m[2]); 405 | if ((month < 1) || (month > 12)) { month = 1; errorInFormat = true; } 406 | } 407 | s = MonthNames[month - 1] + ' ' + s; 408 | 409 | const DaysInMonth: number[] = [ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]; // Leap years not checked! 410 | if ((m.length >= 3) && (m[3] != null) && !errorInFormat) { 411 | day = parseInt(m[3]); 412 | if ((day < 1) || (day > DaysInMonth[month - 1])) { day = 1; errorInFormat = true; } 413 | } 414 | s = day + ' ' + s; 415 | 416 | if ((m.length >= 4) && (m[4] != null) && !errorInFormat) { 417 | hour = parseInt(m[4]); 418 | if ((hour < 0) || (hour > 23)) { hour = 0; errorInFormat = true; } 419 | } 420 | s = s + ', ' + hour.toString().padStart(2, '0'); 421 | 422 | if ((m.length >= 5) && (m[5] != null) && !errorInFormat) { 423 | minute = parseInt(m[5]); 424 | if ((minute < 0) || (minute > 59)) { minute = 0; errorInFormat = true; } 425 | } 426 | s = s + ':' + minute.toString().padStart(2, '0'); 427 | 428 | if ((m.length >= 6) && (m[6] != null) && !errorInFormat) { 429 | second = parseInt(m[6]); 430 | if ((second < 0) || (second > 59)) { second = 0; errorInFormat = true; } 431 | } 432 | s = s + ':' + second.toString().padStart(2, '0'); 433 | 434 | if ((m.length >= 7) && (m[7] != null) && !errorInFormat) { 435 | utc_char = m[7]; 436 | 437 | if ((m.length >= 8) && (m[8] != null) && !errorInFormat) { 438 | utc_hour = parseInt(m[8]); 439 | if ((utc_hour < 0) || (utc_hour > 23)) { utc_hour = 0; errorInFormat = true; } 440 | 441 | // skip m[9] (apostrophe) 442 | 443 | if ((m.length >= 10) && (m[10] != null) && !errorInFormat) { 444 | utc_minute = parseInt(m[10]); 445 | if ((utc_minute < 0) || (utc_minute > 59)) { utc_minute = 0; errorInFormat = true; } 446 | } 447 | } 448 | if (utc_char === 'Z') 449 | s = s + ' UTC'; 450 | else // + or - 451 | s = s + ' UTC' + utc_char + utc_hour.toString().padStart(2, '0') + ':' + utc_minute.toString().padStart(2, '0'); 452 | } 453 | else { 454 | s = s + ' GMT'; // Default as per PDF specification 455 | } 456 | 457 | } 458 | catch (e: any) { 459 | console.log("ERROR: ", e); 460 | s = 'ERROR: ' + e + ' - ' + s; 461 | } 462 | } 463 | 464 | return s; 465 | } 466 | -------------------------------------------------------------------------------- /server/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2020", 4 | "lib": ["es2020"], 5 | "module": "commonjs", 6 | "moduleResolution": "node", 7 | "sourceMap": true, 8 | "strict": true, 9 | "outDir": "out", 10 | "rootDir": "src" 11 | }, 12 | "include": ["src"], 13 | "exclude": ["node_modules", ".vscode-test"] 14 | } 15 | -------------------------------------------------------------------------------- /server/tsconfig.tsbuildinfo: -------------------------------------------------------------------------------- 1 | {"root":["./src/ohmparser.ts","./src/server.ts","./src/grammar/grammar_pdftokens.ohm-bundle.d.ts","./src/models/arlingtonpdfmodel.ts","./src/models/pdfobject.ts","./src/parser/pdfparser.ts","./src/parser/xrefinfomatrix.ts","./src/types/constants.ts","./src/types/documenttypes.ts","./src/types/index.ts","./src/types/tokentypes.ts","./src/utils/arlingtonutils.ts","./src/utils/pdfutils.ts"],"version":"5.8.2"} -------------------------------------------------------------------------------- /snippets/fdf-snippets.json: -------------------------------------------------------------------------------- 1 | { 2 | "Create a new FDF dictionary object": { 3 | "description": "Insert an empty FDF dictionary object", 4 | "prefix": "obj", 5 | "body": [ 6 | "0 obj", 7 | "<<", 8 | " /Type ${1:/}", 9 | ">>", 10 | "endobj", 11 | "" 12 | ] 13 | }, 14 | "Create a new FDF stream object": { 15 | "description": "Insert an empty FDF stream object", 16 | "prefix": "stream", 17 | "body": [ 18 | "0 obj", 19 | "<<", 20 | " /Length ${1:0}", 21 | " /Filter null", 22 | ">>", 23 | "stream", 24 | "", 25 | "endstream", 26 | "endobj", 27 | "" 28 | ] 29 | }, 30 | "Create a minimal empty FDF file" : { 31 | "description": "A minimal empty FDF file", 32 | "prefix": "FDF-", 33 | "body":[ 34 | "%FDF-1.2", 35 | "1 0 obj", 36 | "<<", 37 | " /Type /Catalog", 38 | " /FDF << /Annots [", 39 | " ] >>", 40 | ">>", 41 | "endobj", 42 | "trailer", 43 | "<< /Root 1 0 R >>", 44 | "%%EOF", 45 | "" 46 | ] 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /snippets/pdf-snippets.json: -------------------------------------------------------------------------------- 1 | { 2 | "Create a new PDF dictionary object": { 3 | "description": "Insert an empty PDF dictionary object", 4 | "prefix": "obj", 5 | "body": [ 6 | "0 obj", 7 | "<<", 8 | " /Type ${1:/}", 9 | ">>", 10 | "endobj", 11 | "" 12 | ] 13 | }, 14 | "Create a new PDF stream object": { 15 | "description": "Insert an empty PDF stream object", 16 | "prefix": "stream", 17 | "body": [ 18 | "0 obj", 19 | "<<", 20 | " /Length ${1:0}", 21 | " /Filter null", 22 | ">>", 23 | "stream", 24 | "", 25 | "endstream", 26 | "endobj", 27 | "" 28 | ] 29 | }, 30 | "Create a minimal empty PDF file" : { 31 | "description": "A minimal empty PDF file", 32 | "prefix": "PDF-", 33 | "body":[ 34 | "%PDF-1.7", 35 | "%©© ", 36 | "", 37 | "1 0 obj", 38 | "<<", 39 | " /Type /Catalog", 40 | " /Outlines 2 0 R", 41 | " /Pages 3 0 R", 42 | ">>", 43 | "endobj", 44 | "", 45 | "2 0 obj", 46 | "<<", 47 | " /Type /Outlines", 48 | " /Count 0", 49 | ">>", 50 | "endobj", 51 | "", 52 | "3 0 obj % Page Tree Root", 53 | "<<", 54 | " /Type /Pages", 55 | " /Kids [ 4 0 R ]", 56 | " /Count 1", 57 | ">>", 58 | "endobj", 59 | "", 60 | "4 0 obj % Single Page", 61 | "<<", 62 | " /Type /Page", 63 | " /Parent 3 0 R", 64 | " /MediaBox [ 0 0 1000 1000 ]", 65 | " /Contents 5 0 R", 66 | " /Resources <<", 67 | " >>", 68 | ">>", 69 | "endobj", 70 | "", 71 | "5 0 obj % Page content stream", 72 | "<<", 73 | " /Length 6", 74 | ">>", 75 | "stream", 76 | "q", 77 | "Q", 78 | "endstream", 79 | "endobj", 80 | "xref", 81 | "0 6", 82 | "0000000000 65535 f", 83 | "0000000021 00000 n", 84 | "0000000107 00000 n", 85 | "0000000169 00000 n", 86 | "0000000266 00000 n", 87 | "0000000424 00000 n", 88 | "trailer", 89 | "<< /Root 1 0 R /Size 6 >>", 90 | "startxref", 91 | "511", 92 | "%%EOF", 93 | "" 94 | ] 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /syntaxes/fdf.tmLanguage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "FDF", 3 | "scopeName": "source.fdf", 4 | "patterns": [ 5 | { 6 | "name": "markup.underline.link.pdf", 7 | "match": "\\b(http|https|ftp|ftps)://[a-zA-Z0-9\\-.]+(/[a-zA-Z0-9\\-._?,'+&%$#=~]*)*\\b" 8 | }, 9 | { 10 | "name": "comment.line.percent.pdf", 11 | "match": "%.*" 12 | }, 13 | { 14 | "name": "string.quoted.hex.pdf", 15 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 16 | }, 17 | { 18 | "name": "keyword.control.pdf", 19 | "match": "\\b(?:\\d+ \\d+ R|startxref|null|true|false|(^|(\\r?\\n)[ \\t\\r\\n\\f\\0]*)\\d+ \\d+ obj|endobj)\\b" 20 | }, 21 | { 22 | "name": "variable.other.name.pdf", 23 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 24 | }, 25 | { 26 | "name": "constant.numeric.real.pdf", 27 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 28 | }, 29 | { 30 | "name": "constant.numeric.integer.pdf", 31 | "match": "\\b[+-]?\\d+\\b" 32 | }, 33 | { 34 | "name": "string.quoted.literal.pdf", 35 | "begin": "\\(", 36 | "end": "(?<=[^\\\\])\\)", 37 | "patterns": [ 38 | { 39 | "name": "constant.character.escape.linefeed.pdf", 40 | "match": "\\\\n" 41 | }, 42 | { 43 | "name": "constant.character.escape.return.pdf", 44 | "match": "\\\\r" 45 | }, 46 | { 47 | "name": "constant.character.escape.tab.pdf", 48 | "match": "\\\\t" 49 | }, 50 | { 51 | "name": "constant.character.escape.backspace.pdf", 52 | "match": "\\\\b" 53 | }, 54 | { 55 | "name": "constant.character.escape.formfeed.pdf", 56 | "match": "\\\\f" 57 | }, 58 | { 59 | "name": "constant.character.escape.backslash.pdf", 60 | "match": "\\\\\\\\" 61 | }, 62 | { 63 | "name": "constant.character.escape.eol.pdf", 64 | "match": "\\\\$" 65 | }, 66 | { 67 | "name": "constant.character.escape.octal.pdf", 68 | "match": "\\\\[0-7]{3}" 69 | } 70 | ] 71 | }, 72 | { 73 | "name": "punctuation.definition.dictionary.pdf", 74 | "begin": "<<", 75 | "end": "(>>|(?=stream|endstream|endobj|xref|trailer))", 76 | "patterns": [ 77 | { 78 | "name": "comment.line.percent.pdf", 79 | "match": "%.*" 80 | }, 81 | { 82 | "name": "keyword.control.pdf", 83 | "match": "\\b(?:true|false|null)\\b" 84 | }, 85 | { 86 | "name": "keyword.control.reference.pdf", 87 | "match": "\\b\\d+ \\d+ R\\b" 88 | }, 89 | { 90 | "name": "variable.other.name.pdf", 91 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 92 | }, 93 | { 94 | "name": "constant.numeric.real.pdf", 95 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 96 | }, 97 | { 98 | "name": "constant.numeric.integer.pdf", 99 | "match": "\\b[+-]?\\d+\\b" 100 | }, 101 | { 102 | "name": "string.quoted.hex.pdf", 103 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 104 | }, 105 | { 106 | "name": "string.quoted.literal.pdf", 107 | "begin": "\\(", 108 | "end": "(?<=[^\\\\])\\)", 109 | "patterns": [ 110 | { 111 | "name": "constant.character.escape.linefeed.pdf", 112 | "match": "\\\\n" 113 | }, 114 | { 115 | "name": "constant.character.escape.return.pdf", 116 | "match": "\\\\r" 117 | }, 118 | { 119 | "name": "constant.character.escape.tab.pdf", 120 | "match": "\\\\t" 121 | }, 122 | { 123 | "name": "constant.character.escape.backspace.pdf", 124 | "match": "\\\\b" 125 | }, 126 | { 127 | "name": "constant.character.escape.formfeed.pdf", 128 | "match": "\\\\f" 129 | }, 130 | { 131 | "name": "constant.character.escape.backslash.pdf", 132 | "match": "\\\\\\\\" 133 | }, 134 | { 135 | "name": "constant.character.escape.eol.pdf", 136 | "match": "\\\\$" 137 | }, 138 | { 139 | "name": "constant.character.escape.octal.pdf", 140 | "match": "\\\\[0-7]{3}" 141 | } 142 | ] 143 | } 144 | ] 145 | }, 146 | { 147 | "name": "punctuation.definition.array.pdf", 148 | "begin": "\\[", 149 | "end": "(\\]|(?=stream|endstream|endobj|xref|trailer))", 150 | "patterns": [ 151 | { 152 | "name": "comment.line.percent.pdf", 153 | "match": "%.*" 154 | }, 155 | { 156 | "name": "keyword.control.pdf", 157 | "match": "\\b(?:true|false|null)\\b" 158 | }, 159 | { 160 | "name": "keyword.control.reference.pdf", 161 | "match": "\\b\\d+ \\d+ R\\b" 162 | }, 163 | { 164 | "name": "variable.other.name.pdf", 165 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 166 | }, 167 | { 168 | "name": "constant.numeric.real.pdf", 169 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 170 | }, 171 | { 172 | "name": "constant.numeric.integer.pdf", 173 | "match": "\\b[+-]?\\d+\\b" 174 | }, 175 | { 176 | "name": "string.quoted.hex.pdf", 177 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 178 | }, 179 | { 180 | "name": "string.quoted.literal.pdf", 181 | "begin": "\\(", 182 | "end": "(?<=[^\\\\])\\)", 183 | "patterns": [ 184 | { 185 | "name": "constant.character.escape.linefeed.pdf", 186 | "match": "\\\\n" 187 | }, 188 | { 189 | "name": "constant.character.escape.return.pdf", 190 | "match": "\\\\r" 191 | }, 192 | { 193 | "name": "constant.character.escape.tab.pdf", 194 | "match": "\\\\t" 195 | }, 196 | { 197 | "name": "constant.character.escape.backspace.pdf", 198 | "match": "\\\\b" 199 | }, 200 | { 201 | "name": "constant.character.escape.formfeed.pdf", 202 | "match": "\\\\f" 203 | }, 204 | { 205 | "name": "constant.character.escape.backslash.pdf", 206 | "match": "\\\\\\\\" 207 | }, 208 | { 209 | "name": "constant.character.escape.eol.pdf", 210 | "match": "\\\\$" 211 | }, 212 | { 213 | "name": "constant.character.escape.octal.pdf", 214 | "match": "\\\\[0-7]{3}" 215 | } 216 | ] 217 | } 218 | ] 219 | }, 220 | { 221 | "name": "keyword.section.content-stream.pdf", 222 | "begin": "\\bstream\\b", 223 | "end": "\\b(endstream|(?=endobj|xref|trailer))\\b", 224 | "patterns": [ 225 | { 226 | "name": "comment.line.percent.pdf", 227 | "match": "%.*" 228 | }, 229 | { 230 | "name": "keyword.control.pdf", 231 | "match": "\\b(?:true|false|null)\\b" 232 | }, 233 | { 234 | "name": "variable.other.name.pdf", 235 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 236 | }, 237 | { 238 | "name": "constant.numeric.real.pdf", 239 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 240 | }, 241 | { 242 | "name": "constant.numeric.integer.pdf", 243 | "match": "\\b[+-]?\\d+\\b" 244 | }, 245 | { 246 | "name": "string.quoted.hex.pdf", 247 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 248 | }, 249 | { 250 | "name": "string.quoted.literal.pdf", 251 | "begin": "\\(", 252 | "end": "(?<=[^\\\\])\\)", 253 | "patterns": [ 254 | { 255 | "name": "constant.character.escape.linefeed.pdf", 256 | "match": "\\\\n" 257 | }, 258 | { 259 | "name": "constant.character.escape.return.pdf", 260 | "match": "\\\\r" 261 | }, 262 | { 263 | "name": "constant.character.escape.tab.pdf", 264 | "match": "\\\\t" 265 | }, 266 | { 267 | "name": "constant.character.escape.backspace.pdf", 268 | "match": "\\\\b" 269 | }, 270 | { 271 | "name": "constant.character.escape.formfeed.pdf", 272 | "match": "\\\\f" 273 | }, 274 | { 275 | "name": "constant.character.escape.backslash.pdf", 276 | "match": "\\\\\\\\" 277 | }, 278 | { 279 | "name": "constant.character.escape.eol.pdf", 280 | "match": "\\\\$" 281 | }, 282 | { 283 | "name": "constant.character.escape.octal.pdf", 284 | "match": "\\\\[0-7]{3}" 285 | } 286 | ] 287 | }, 288 | { 289 | "name": "punctuation.definition.dictionary.pdf", 290 | "begin": "<<", 291 | "end": "(>>|(?=stream|endstream|endobj|xref|trailer))", 292 | "patterns": [ 293 | { 294 | "name": "comment.line.percent.pdf", 295 | "match": "%.*" 296 | }, 297 | { 298 | "name": "keyword.control.pdf", 299 | "match": "\\b(?:true|false|null)\\b" 300 | }, 301 | { 302 | "name": "keyword.control.reference.pdf", 303 | "match": "\\b\\d+ \\d+ R\\b" 304 | }, 305 | { 306 | "name": "variable.other.name.pdf", 307 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 308 | }, 309 | { 310 | "name": "constant.numeric.real.pdf", 311 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 312 | }, 313 | { 314 | "name": "constant.numeric.integer.pdf", 315 | "match": "\\b[+-]?\\d+\\b" 316 | }, 317 | { 318 | "name": "string.quoted.hex.pdf", 319 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 320 | }, 321 | { 322 | "name": "string.quoted.literal.pdf", 323 | "begin": "\\(", 324 | "end": "(?<=[^\\\\])\\)", 325 | "patterns": [ 326 | { 327 | "name": "constant.character.escape.linefeed.pdf", 328 | "match": "\\\\n" 329 | }, 330 | { 331 | "name": "constant.character.escape.return.pdf", 332 | "match": "\\\\r" 333 | }, 334 | { 335 | "name": "constant.character.escape.tab.pdf", 336 | "match": "\\\\t" 337 | }, 338 | { 339 | "name": "constant.character.escape.backspace.pdf", 340 | "match": "\\\\b" 341 | }, 342 | { 343 | "name": "constant.character.escape.formfeed.pdf", 344 | "match": "\\\\f" 345 | }, 346 | { 347 | "name": "constant.character.escape.backslash.pdf", 348 | "match": "\\\\\\\\" 349 | }, 350 | { 351 | "name": "constant.character.escape.eol.pdf", 352 | "match": "\\\\$" 353 | }, 354 | { 355 | "name": "constant.character.escape.octal.pdf", 356 | "match": "\\\\[0-7]{3}" 357 | } 358 | ] 359 | } 360 | ] 361 | }, 362 | { 363 | "name": "punctuation.definition.array.pdf", 364 | "begin": "\\[", 365 | "end": "(\\]|(?=stream|endstream|endobj|xref|trailer))", 366 | "patterns": [ 367 | { 368 | "name": "comment.line.percent.pdf", 369 | "match": "%.*" 370 | }, 371 | { 372 | "name": "keyword.control.pdf", 373 | "match": "\\b(?:true|false|null)\\b" 374 | }, 375 | { 376 | "name": "variable.other.name.pdf", 377 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 378 | }, 379 | { 380 | "name": "constant.numeric.real.pdf", 381 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 382 | }, 383 | { 384 | "name": "constant.numeric.integer.pdf", 385 | "match": "\\b[+-]?\\d+\\b" 386 | }, 387 | { 388 | "name": "string.quoted.hex.pdf", 389 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 390 | }, 391 | { 392 | "name": "string.quoted.literal.pdf", 393 | "begin": "\\(", 394 | "end": "(?<=[^\\\\])\\)", 395 | "patterns": [ 396 | { 397 | "name": "constant.character.escape.linefeed.pdf", 398 | "match": "\\\\n" 399 | }, 400 | { 401 | "name": "constant.character.escape.return.pdf", 402 | "match": "\\\\r" 403 | }, 404 | { 405 | "name": "constant.character.escape.tab.pdf", 406 | "match": "\\\\t" 407 | }, 408 | { 409 | "name": "constant.character.escape.backspace.pdf", 410 | "match": "\\\\b" 411 | }, 412 | { 413 | "name": "constant.character.escape.formfeed.pdf", 414 | "match": "\\\\f" 415 | }, 416 | { 417 | "name": "constant.character.escape.backslash.pdf", 418 | "match": "\\\\\\\\" 419 | }, 420 | { 421 | "name": "constant.character.escape.eol.pdf", 422 | "match": "\\\\$" 423 | }, 424 | { 425 | "name": "constant.character.escape.octal.pdf", 426 | "match": "\\\\[0-7]{3}" 427 | } 428 | ] 429 | } 430 | ] 431 | }, 432 | { 433 | "name": "binary.data.inlineimage.pdf", 434 | "begin": "\\bID\\b", 435 | "end": "((EI\\b)|(?=endstream|endobj|xref|trailer))", 436 | "contentName": "text.plain" 437 | }, 438 | { 439 | "name": "keyword.operator.content-stream.pdf", 440 | "match": "\\b(w|J|j|M|d|ri|i|gs|q|Q|cm|cs|CS|SC|SCN|G|g|RG|rg|K|k|m|l|c|v|y|h|re|S|s|f|F|f\\*|B|B\\*|b|b\\*|n|W|W\\*|sh|BI|ID|EI|Do|Tc|Tw|Tz|TL|Tf|Tr|Ts|Td|TD|Tm|T\\*|do|d1|MP|DP|BMC|BDC|EMC|BX|EX|BT|ET|Tj|TJ|Tf|Tc|Tw|Td|cm|')\\b" 441 | } 442 | ] 443 | }, 444 | { 445 | "name": "keyword.section.xref-trailer.pdf", 446 | "begin": "\\bxref\\b", 447 | "end": "\\btrailer\\b", 448 | "patterns": [ 449 | { 450 | "name": "keyword.control.free-object.pdf", 451 | "match": "\\b\\d{10} \\d{5} f\\b" 452 | }, 453 | { 454 | "name": "keyword.control.inuse-object.pdf", 455 | "match": "\\b\\d{10} \\d{5} n\\b" 456 | }, 457 | { 458 | "name": "keyword.control.xref-subsection.pdf", 459 | "match": "\\b\\d+ \\d+\\b" 460 | } 461 | ] 462 | } 463 | ], 464 | "repository": { 465 | "main": { 466 | "patterns": [ 467 | { 468 | "include": "#main" 469 | } 470 | ] 471 | } 472 | } 473 | } 474 | -------------------------------------------------------------------------------- /syntaxes/pdf.tmLanguage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PDF", 3 | "scopeName": "source.pdf", 4 | "patterns": [ 5 | { 6 | "name": "markup.underline.link.pdf", 7 | "match": "\\b(http|https|ftp|ftps)://[a-zA-Z0-9\\-.]+(/[a-zA-Z0-9\\-._?,'+&%$#=~]*)*\\b" 8 | }, 9 | { 10 | "name": "comment.line.percent.pdf", 11 | "match": "%.*" 12 | }, 13 | { 14 | "name": "string.quoted.hex.pdf", 15 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 16 | }, 17 | { 18 | "name": "keyword.control.pdf", 19 | "match": "\\b(?:\\d+ \\d+ R|startxref|null|true|false|(^|(\\r?\\n)[ \\t\\r\\n\\f\\0]*)\\d+ \\d+ obj|endobj)\\b" 20 | }, 21 | { 22 | "name": "variable.other.name.pdf", 23 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 24 | }, 25 | { 26 | "name": "constant.numeric.real.pdf", 27 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 28 | }, 29 | { 30 | "name": "constant.numeric.integer.pdf", 31 | "match": "\\b[+-]?\\d+\\b" 32 | }, 33 | { 34 | "name": "string.quoted.literal.pdf", 35 | "begin": "\\(", 36 | "end": "(?<=[^\\\\])\\)", 37 | "patterns": [ 38 | { 39 | "name": "constant.character.escape.linefeed.pdf", 40 | "match": "\\\\n" 41 | }, 42 | { 43 | "name": "constant.character.escape.return.pdf", 44 | "match": "\\\\r" 45 | }, 46 | { 47 | "name": "constant.character.escape.tab.pdf", 48 | "match": "\\\\t" 49 | }, 50 | { 51 | "name": "constant.character.escape.backspace.pdf", 52 | "match": "\\\\b" 53 | }, 54 | { 55 | "name": "constant.character.escape.formfeed.pdf", 56 | "match": "\\\\f" 57 | }, 58 | { 59 | "name": "constant.character.escape.backslash.pdf", 60 | "match": "\\\\\\\\" 61 | }, 62 | { 63 | "name": "constant.character.escape.eol.pdf", 64 | "match": "\\\\$" 65 | }, 66 | { 67 | "name": "constant.character.escape.octal.pdf", 68 | "match": "\\\\[0-7]{3}" 69 | } 70 | ] 71 | }, 72 | { 73 | "name": "punctuation.definition.dictionary.pdf", 74 | "begin": "<<", 75 | "end": "(>>|(?=stream|endstream|endobj|xref|trailer))", 76 | "patterns": [ 77 | { 78 | "name": "comment.line.percent.pdf", 79 | "match": "%.*" 80 | }, 81 | { 82 | "name": "keyword.control.pdf", 83 | "match": "\\b(?:true|false|null)\\b" 84 | }, 85 | { 86 | "name": "keyword.control.reference.pdf", 87 | "match": "\\b\\d+ \\d+ R\\b" 88 | }, 89 | { 90 | "name": "variable.other.name.pdf", 91 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 92 | }, 93 | { 94 | "name": "constant.numeric.real.pdf", 95 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 96 | }, 97 | { 98 | "name": "constant.numeric.integer.pdf", 99 | "match": "\\b[+-]?\\d+\\b" 100 | }, 101 | { 102 | "name": "string.quoted.hex.pdf", 103 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 104 | }, 105 | { 106 | "name": "string.quoted.literal.pdf", 107 | "begin": "\\(", 108 | "end": "(?<=[^\\\\])\\)", 109 | "patterns": [ 110 | { 111 | "name": "constant.character.escape.linefeed.pdf", 112 | "match": "\\\\n" 113 | }, 114 | { 115 | "name": "constant.character.escape.return.pdf", 116 | "match": "\\\\r" 117 | }, 118 | { 119 | "name": "constant.character.escape.tab.pdf", 120 | "match": "\\\\t" 121 | }, 122 | { 123 | "name": "constant.character.escape.backspace.pdf", 124 | "match": "\\\\b" 125 | }, 126 | { 127 | "name": "constant.character.escape.formfeed.pdf", 128 | "match": "\\\\f" 129 | }, 130 | { 131 | "name": "constant.character.escape.backslash.pdf", 132 | "match": "\\\\\\\\" 133 | }, 134 | { 135 | "name": "constant.character.escape.eol.pdf", 136 | "match": "\\\\$" 137 | }, 138 | { 139 | "name": "constant.character.escape.octal.pdf", 140 | "match": "\\\\[0-7]{3}" 141 | } 142 | ] 143 | } 144 | ] 145 | }, 146 | { 147 | "name": "punctuation.definition.array.pdf", 148 | "begin": "\\[", 149 | "end": "(\\]|(?=stream|endstream|endobj|xref|trailer))", 150 | "patterns": [ 151 | { 152 | "name": "comment.line.percent.pdf", 153 | "match": "%.*" 154 | }, 155 | { 156 | "name": "keyword.control.pdf", 157 | "match": "\\b(?:true|false|null)\\b" 158 | }, 159 | { 160 | "name": "keyword.control.reference.pdf", 161 | "match": "\\b\\d+ \\d+ R\\b" 162 | }, 163 | { 164 | "name": "variable.other.name.pdf", 165 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 166 | }, 167 | { 168 | "name": "constant.numeric.real.pdf", 169 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 170 | }, 171 | { 172 | "name": "constant.numeric.integer.pdf", 173 | "match": "\\b[+-]?\\d+\\b" 174 | }, 175 | { 176 | "name": "string.quoted.hex.pdf", 177 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 178 | }, 179 | { 180 | "name": "string.quoted.literal.pdf", 181 | "begin": "\\(", 182 | "end": "(?<=[^\\\\])\\)", 183 | "patterns": [ 184 | { 185 | "name": "constant.character.escape.linefeed.pdf", 186 | "match": "\\\\n" 187 | }, 188 | { 189 | "name": "constant.character.escape.return.pdf", 190 | "match": "\\\\r" 191 | }, 192 | { 193 | "name": "constant.character.escape.tab.pdf", 194 | "match": "\\\\t" 195 | }, 196 | { 197 | "name": "constant.character.escape.backspace.pdf", 198 | "match": "\\\\b" 199 | }, 200 | { 201 | "name": "constant.character.escape.formfeed.pdf", 202 | "match": "\\\\f" 203 | }, 204 | { 205 | "name": "constant.character.escape.backslash.pdf", 206 | "match": "\\\\\\\\" 207 | }, 208 | { 209 | "name": "constant.character.escape.eol.pdf", 210 | "match": "\\\\$" 211 | }, 212 | { 213 | "name": "constant.character.escape.octal.pdf", 214 | "match": "\\\\[0-7]{3}" 215 | } 216 | ] 217 | } 218 | ] 219 | }, 220 | { 221 | "name": "keyword.section.content-stream.pdf", 222 | "begin": "\\bstream\\b", 223 | "end": "\\b(endstream|(?=endobj|xref|trailer))\\b", 224 | "patterns": [ 225 | { 226 | "name": "comment.line.percent.pdf", 227 | "match": "%.*" 228 | }, 229 | { 230 | "name": "keyword.control.pdf", 231 | "match": "\\b(?:true|false|null)\\b" 232 | }, 233 | { 234 | "name": "variable.other.name.pdf", 235 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 236 | }, 237 | { 238 | "name": "constant.numeric.real.pdf", 239 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 240 | }, 241 | { 242 | "name": "constant.numeric.integer.pdf", 243 | "match": "\\b[+-]?\\d+\\b" 244 | }, 245 | { 246 | "name": "string.quoted.hex.pdf", 247 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 248 | }, 249 | { 250 | "name": "string.quoted.literal.pdf", 251 | "begin": "\\(", 252 | "end": "(?<=[^\\\\])\\)", 253 | "patterns": [ 254 | { 255 | "name": "constant.character.escape.linefeed.pdf", 256 | "match": "\\\\n" 257 | }, 258 | { 259 | "name": "constant.character.escape.return.pdf", 260 | "match": "\\\\r" 261 | }, 262 | { 263 | "name": "constant.character.escape.tab.pdf", 264 | "match": "\\\\t" 265 | }, 266 | { 267 | "name": "constant.character.escape.backspace.pdf", 268 | "match": "\\\\b" 269 | }, 270 | { 271 | "name": "constant.character.escape.formfeed.pdf", 272 | "match": "\\\\f" 273 | }, 274 | { 275 | "name": "constant.character.escape.backslash.pdf", 276 | "match": "\\\\\\\\" 277 | }, 278 | { 279 | "name": "constant.character.escape.eol.pdf", 280 | "match": "\\\\$" 281 | }, 282 | { 283 | "name": "constant.character.escape.octal.pdf", 284 | "match": "\\\\[0-7]{3}" 285 | } 286 | ] 287 | }, 288 | { 289 | "name": "punctuation.definition.dictionary.pdf", 290 | "begin": "<<", 291 | "end": "(>>|(?=stream|endstream|endobj|xref|trailer))", 292 | "patterns": [ 293 | { 294 | "name": "comment.line.percent.pdf", 295 | "match": "%.*" 296 | }, 297 | { 298 | "name": "keyword.control.pdf", 299 | "match": "\\b(?:true|false|null)\\b" 300 | }, 301 | { 302 | "name": "keyword.control.reference.pdf", 303 | "match": "\\b\\d+ \\d+ R\\b" 304 | }, 305 | { 306 | "name": "variable.other.name.pdf", 307 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 308 | }, 309 | { 310 | "name": "constant.numeric.real.pdf", 311 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 312 | }, 313 | { 314 | "name": "constant.numeric.integer.pdf", 315 | "match": "\\b[+-]?\\d+\\b" 316 | }, 317 | { 318 | "name": "string.quoted.hex.pdf", 319 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 320 | }, 321 | { 322 | "name": "string.quoted.literal.pdf", 323 | "begin": "\\(", 324 | "end": "(?<=[^\\\\])\\)", 325 | "patterns": [ 326 | { 327 | "name": "constant.character.escape.linefeed.pdf", 328 | "match": "\\\\n" 329 | }, 330 | { 331 | "name": "constant.character.escape.return.pdf", 332 | "match": "\\\\r" 333 | }, 334 | { 335 | "name": "constant.character.escape.tab.pdf", 336 | "match": "\\\\t" 337 | }, 338 | { 339 | "name": "constant.character.escape.backspace.pdf", 340 | "match": "\\\\b" 341 | }, 342 | { 343 | "name": "constant.character.escape.formfeed.pdf", 344 | "match": "\\\\f" 345 | }, 346 | { 347 | "name": "constant.character.escape.backslash.pdf", 348 | "match": "\\\\\\\\" 349 | }, 350 | { 351 | "name": "constant.character.escape.eol.pdf", 352 | "match": "\\\\$" 353 | }, 354 | { 355 | "name": "constant.character.escape.octal.pdf", 356 | "match": "\\\\[0-7]{3}" 357 | } 358 | ] 359 | } 360 | ] 361 | }, 362 | { 363 | "name": "punctuation.definition.array.pdf", 364 | "begin": "\\[", 365 | "end": "(\\]|(?=stream|endstream|endobj|xref|trailer))", 366 | "patterns": [ 367 | { 368 | "name": "comment.line.percent.pdf", 369 | "match": "%.*" 370 | }, 371 | { 372 | "name": "keyword.control.pdf", 373 | "match": "\\b(?:true|false|null)\\b" 374 | }, 375 | { 376 | "name": "variable.other.name.pdf", 377 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 378 | }, 379 | { 380 | "name": "constant.numeric.real.pdf", 381 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 382 | }, 383 | { 384 | "name": "constant.numeric.integer.pdf", 385 | "match": "\\b[+-]?\\d+\\b" 386 | }, 387 | { 388 | "name": "string.quoted.hex.pdf", 389 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 390 | }, 391 | { 392 | "name": "string.quoted.literal.pdf", 393 | "begin": "\\(", 394 | "end": "(?<=[^\\\\])\\)", 395 | "patterns": [ 396 | { 397 | "name": "constant.character.escape.linefeed.pdf", 398 | "match": "\\\\n" 399 | }, 400 | { 401 | "name": "constant.character.escape.return.pdf", 402 | "match": "\\\\r" 403 | }, 404 | { 405 | "name": "constant.character.escape.tab.pdf", 406 | "match": "\\\\t" 407 | }, 408 | { 409 | "name": "constant.character.escape.backspace.pdf", 410 | "match": "\\\\b" 411 | }, 412 | { 413 | "name": "constant.character.escape.formfeed.pdf", 414 | "match": "\\\\f" 415 | }, 416 | { 417 | "name": "constant.character.escape.backslash.pdf", 418 | "match": "\\\\\\\\" 419 | }, 420 | { 421 | "name": "constant.character.escape.eol.pdf", 422 | "match": "\\\\$" 423 | }, 424 | { 425 | "name": "constant.character.escape.octal.pdf", 426 | "match": "\\\\[0-7]{3}" 427 | } 428 | ] 429 | } 430 | ] 431 | }, 432 | { 433 | "name": "binary.data.inlineimage.pdf", 434 | "begin": "\\bID\\b", 435 | "end": "((EI\\b)|(?=endstream|endobj|xref|trailer))", 436 | "contentName": "text.plain" 437 | }, 438 | { 439 | "name": "keyword.operator.content-stream.pdf", 440 | "match": "\\b(w|J|j|M|d|ri|i|gs|q|Q|cm|cs|CS|SC|SCN|G|g|RG|rg|K|k|m|l|c|v|y|h|re|S|s|f|F|f\\*|B|B\\*|b|b\\*|n|W|W\\*|sh|BI|ID|EI|Do|Tc|Tw|Tz|TL|Tf|Tr|Ts|Td|TD|Tm|T\\*|do|d1|MP|DP|BMC|BDC|EMC|BX|EX|BT|ET|Tj|TJ|'|\")\\b" 441 | } 442 | ] 443 | }, 444 | { 445 | "name": "keyword.section.xref-trailer.pdf", 446 | "begin": "\\bxref\\b", 447 | "end": "\\btrailer\\b", 448 | "patterns": [ 449 | { 450 | "name": "keyword.control.free-object.pdf", 451 | "match": "\\b\\d{10} \\d{5} f\\b" 452 | }, 453 | { 454 | "name": "keyword.control.inuse-object.pdf", 455 | "match": "\\b\\d{10} \\d{5} n\\b" 456 | }, 457 | { 458 | "name": "keyword.control.xref-subsection.pdf", 459 | "match": "\\b\\d+ \\d+\\b" 460 | } 461 | ] 462 | } 463 | ], 464 | "repository": { 465 | "main": { 466 | "patterns": [ 467 | { 468 | "include": "#main" 469 | } 470 | ] 471 | } 472 | } 473 | } 474 | -------------------------------------------------------------------------------- /syntaxes/pdfstreams.tmLanguage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PDF streams", 3 | "scopeName": "source.pdfstream", 4 | "patterns": [ 5 | { 6 | "name": "markup.underline.link.pdf", 7 | "match": "\\b(http|https|ftp|ftps)://[a-zA-Z0-9\\-.]+(/[a-zA-Z0-9\\-._?,'+&%$#=~]*)*\\b" 8 | }, 9 | { 10 | "name": "comment.line.percent.pdf", 11 | "match": "%.*" 12 | }, 13 | { 14 | "name": "string.quoted.hex.pdf", 15 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 16 | }, 17 | { 18 | "name": "variable.other.name.pdf", 19 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 20 | }, 21 | { 22 | "name": "constant.numeric.real.pdf", 23 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 24 | }, 25 | { 26 | "name": "constant.numeric.integer.pdf", 27 | "match": "\\b[+-]?\\d+\\b" 28 | }, 29 | { 30 | "name": "string.quoted.literal.pdf", 31 | "begin": "\\(", 32 | "end": "(?<=[^\\\\])\\)", 33 | "patterns": [ 34 | { 35 | "name": "constant.character.escape.linefeed.pdf", 36 | "match": "\\\\n" 37 | }, 38 | { 39 | "name": "constant.character.escape.return.pdf", 40 | "match": "\\\\r" 41 | }, 42 | { 43 | "name": "constant.character.escape.tab.pdf", 44 | "match": "\\\\t" 45 | }, 46 | { 47 | "name": "constant.character.escape.backspace.pdf", 48 | "match": "\\\\b" 49 | }, 50 | { 51 | "name": "constant.character.escape.formfeed.pdf", 52 | "match": "\\\\f" 53 | }, 54 | { 55 | "name": "constant.character.escape.backslash.pdf", 56 | "match": "\\\\\\\\" 57 | }, 58 | { 59 | "name": "constant.character.escape.eol.pdf", 60 | "match": "\\\\$" 61 | }, 62 | { 63 | "name": "constant.character.escape.octal.pdf", 64 | "match": "\\\\[0-7]{3}" 65 | } 66 | ] 67 | }, 68 | { 69 | "name": "punctuation.definition.dictionary.pdf", 70 | "begin": "<<", 71 | "end": ">>", 72 | "patterns": [ 73 | { 74 | "name": "comment.line.percent.pdf", 75 | "match": "%.*" 76 | }, 77 | { 78 | "name": "keyword.control.pdf", 79 | "match": "\\b(?:true|false|null)\\b" 80 | }, 81 | { 82 | "name": "variable.other.name.pdf", 83 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 84 | }, 85 | { 86 | "name": "constant.numeric.real.pdf", 87 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 88 | }, 89 | { 90 | "name": "constant.numeric.integer.pdf", 91 | "match": "\\b[+-]?\\d+\\b" 92 | }, 93 | { 94 | "name": "string.quoted.hex.pdf", 95 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 96 | }, 97 | { 98 | "name": "string.quoted.literal.pdf", 99 | "begin": "\\(", 100 | "end": "(?<=[^\\\\])\\)", 101 | "patterns": [ 102 | { 103 | "name": "constant.character.escape.linefeed.pdf", 104 | "match": "\\\\n" 105 | }, 106 | { 107 | "name": "constant.character.escape.return.pdf", 108 | "match": "\\\\r" 109 | }, 110 | { 111 | "name": "constant.character.escape.tab.pdf", 112 | "match": "\\\\t" 113 | }, 114 | { 115 | "name": "constant.character.escape.backspace.pdf", 116 | "match": "\\\\b" 117 | }, 118 | { 119 | "name": "constant.character.escape.formfeed.pdf", 120 | "match": "\\\\f" 121 | }, 122 | { 123 | "name": "constant.character.escape.backslash.pdf", 124 | "match": "\\\\\\\\" 125 | }, 126 | { 127 | "name": "constant.character.escape.eol.pdf", 128 | "match": "\\\\$" 129 | }, 130 | { 131 | "name": "constant.character.escape.octal.pdf", 132 | "match": "\\\\[0-7]{3}" 133 | } 134 | ] 135 | } 136 | ] 137 | }, 138 | { 139 | "name": "punctuation.definition.array.pdf", 140 | "begin": "\\[", 141 | "end": "\\]", 142 | "patterns": [ 143 | { 144 | "name": "comment.line.percent.pdf", 145 | "match": "%.*" 146 | }, 147 | { 148 | "name": "keyword.control.pdf", 149 | "match": "\\b(?:true|false|null)\\b" 150 | }, 151 | { 152 | "name": "variable.other.name.pdf", 153 | "match": "/[^ \\t\\r\\n\\f\\0<>\\[\\]\\(\\)\\/%]*" 154 | }, 155 | { 156 | "name": "constant.numeric.real.pdf", 157 | "match": "\\b[-+]?\\d*\\.\\d+\\b" 158 | }, 159 | { 160 | "name": "constant.numeric.integer.pdf", 161 | "match": "\\b[+-]?\\d+\\b" 162 | }, 163 | { 164 | "name": "string.quoted.hex.pdf", 165 | "match": "<[0-9a-fA-F \\t\\r\\n\\f\\0]*>" 166 | }, 167 | { 168 | "name": "string.quoted.literal.pdf", 169 | "begin": "\\(", 170 | "end": "(?<=[^\\\\])\\)", 171 | "patterns": [ 172 | { 173 | "name": "constant.character.escape.linefeed.pdf", 174 | "match": "\\\\n" 175 | }, 176 | { 177 | "name": "constant.character.escape.return.pdf", 178 | "match": "\\\\r" 179 | }, 180 | { 181 | "name": "constant.character.escape.tab.pdf", 182 | "match": "\\\\t" 183 | }, 184 | { 185 | "name": "constant.character.escape.backspace.pdf", 186 | "match": "\\\\b" 187 | }, 188 | { 189 | "name": "constant.character.escape.formfeed.pdf", 190 | "match": "\\\\f" 191 | }, 192 | { 193 | "name": "constant.character.escape.backslash.pdf", 194 | "match": "\\\\\\\\" 195 | }, 196 | { 197 | "name": "constant.character.escape.eol.pdf", 198 | "match": "\\\\$" 199 | }, 200 | { 201 | "name": "constant.character.escape.octal.pdf", 202 | "match": "\\\\[0-7]{3}" 203 | } 204 | ] 205 | } 206 | ] 207 | }, 208 | { 209 | "name": "keyword.operator.graphics.pdf.inline-image.begin", 210 | "match": "\\bBI\\b" 211 | }, 212 | { 213 | "name": "binary.data.inlineimage.pdf", 214 | "begin": "\\bID\\b", 215 | "end": "EI\\b", 216 | "contentName": "text.plain" 217 | }, 218 | { 219 | "name": "keyword.operator.graphics.pdf.matrix", 220 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){6}cm\\b" 221 | }, 222 | { 223 | "name": "keyword.operator.graphics.pdf.path.dash", 224 | "match": "\\\\[([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0])*\\]b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]d\\b" 225 | }, 226 | { 227 | "name": "keyword.operator.graphics.pdf.path.line-width", 228 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]w\\b" 229 | }, 230 | { 231 | "name": "keyword.operator.graphics.pdf.path.line-join", 232 | "match": "\\b(0|1|2)[ \\t\\r\\n\\f\\0]j\\b" 233 | }, 234 | { 235 | "name": "keyword.operator.graphics.pdf.path.line-cap", 236 | "match": "\\b(0|1|2)[ \\t\\r\\n\\f\\0]j\\b" 237 | }, 238 | { 239 | "name": "keyword.operator.graphics.pdf.path.miter-limit-ratio", 240 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]M\\b" 241 | }, 242 | { 243 | "name": "keyword.operator.graphics.pdf.color.space.stroking", 244 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]CS\\b" 245 | }, 246 | { 247 | "name": "keyword.operator.graphics.pdf.color.space.non-stroking", 248 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]cs\\b" 249 | }, 250 | { 251 | "name": "keyword.operator.graphics.pdf.color.gray.stroking", 252 | "match": "\\b[+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]G\\b" 253 | }, 254 | { 255 | "name": "keyword.operator.graphics.pdf.color.gray.non-stroking", 256 | "match": "\\b[+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]g\\b" 257 | }, 258 | { 259 | "name": "keyword.operator.graphics.pdf.color.rgb.stroking", 260 | "match": "\\b([+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){3}RG\\b" 261 | }, 262 | { 263 | "name": "keyword.operator.graphics.pdf.color.rgb.non-stroking", 264 | "match": "\\b([+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){3}rg\\b" 265 | }, 266 | { 267 | "name": "keyword.operator.graphics.pdf.color.cmyk.stroking", 268 | "match": "\\b([+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){4}K\\b" 269 | }, 270 | { 271 | "name": "keyword.operator.graphics.pdf.color.cmyk.non-stroking", 272 | "match": "\\b([+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){4}k\\b" 273 | }, 274 | { 275 | "name": "keyword.operator.graphics.pdf.color.cmyk.stroking", 276 | "match": "\\b([+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0])+(/[^ \\t\\r\\n\\f\\0><[\\]()/]*)?+SCN\\b" 277 | }, 278 | { 279 | "name": "keyword.operator.graphics.pdf.color.cmyk.non-stroking", 280 | "match": "\\b([+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0])+(/[^ \\t\\r\\n\\f\\0><[\\]()/]*)?+scn\\b" 281 | }, 282 | { 283 | "name": "keyword.operator.graphics.pdf.color.render-intent", 284 | "match": "\\b(/AbsoluteColorimetric|/RelativeColorimetric|/Perceptual|/Saturation)[ \\t\\r\\n\\f\\0]ri\\b" 285 | }, 286 | { 287 | "name": "keyword.operator.graphics.pdf.path.close", 288 | "match": "\\bh\\b" 289 | }, 290 | { 291 | "name": "keyword.operator.graphics.pdf.path.flatness-tolerance", 292 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]i\\b" 293 | }, 294 | { 295 | "name": "keyword.operator.graphics.pdf.path.close-fill-stroke-non-zero", 296 | "match": "\\bb\\b" 297 | }, 298 | { 299 | "name": "keyword.operator.graphics.pdf.path.fill-stroke-non-zero", 300 | "match": "\\bB\\b" 301 | }, 302 | { 303 | "name": "keyword.operator.graphics.pdf.path.close-fill-stroke-even-odd", 304 | "match": "\\bb\\*\\b" 305 | }, 306 | { 307 | "name": "keyword.operator.graphics.pdf.path.fill-stroke-even-odd", 308 | "match": "\\bB\\*\\b" 309 | }, 310 | { 311 | "name": "keyword.operator.graphics.pdf.path.close-fill-non-zero", 312 | "match": "\\b(f|F)\\b" 313 | }, 314 | { 315 | "name": "keyword.operator.graphics.pdf.path.fill-even-odd", 316 | "match": "\\bf\\*\\b" 317 | }, 318 | { 319 | "name": "keyword.operator.graphics.pdf.path.noop", 320 | "match": "\\bn\\b" 321 | }, 322 | { 323 | "name": "keyword.operator.graphics.pdf.path.close-stroke", 324 | "match": "\\bs\\b" 325 | }, 326 | { 327 | "name": "keyword.operator.graphics.pdf.path.stroke", 328 | "match": "\\bS\\b" 329 | }, 330 | { 331 | "name": "keyword.operator.graphics.pdf.path.clip-non-zero", 332 | "match": "\\bW\\b" 333 | }, 334 | { 335 | "name": "keyword.operator.graphics.pdf.path.clip-even-odd", 336 | "match": "\\bW\\*\\b" 337 | }, 338 | { 339 | "name": "keyword.operator.graphics.pdf.path.moveto", 340 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){2}m\\b" 341 | }, 342 | { 343 | "name": "keyword.operator.graphics.pdf.path.lineto", 344 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){2}l\\b" 345 | }, 346 | { 347 | "name": "keyword.operator.graphics.pdf.path.rectangle", 348 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){4}re\\b" 349 | }, 350 | { 351 | "name": "keyword.operator.graphics.pdf.path.bezier", 352 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){6}c\\b" 353 | }, 354 | { 355 | "name": "keyword.operator.graphics.pdf.path.bezier.current-point1", 356 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){4}v\\b" 357 | }, 358 | { 359 | "name": "keyword.operator.graphics.pdf.path.bezier.current-point2", 360 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){4}y\\b" 361 | }, 362 | { 363 | "name": "keyword.operator.graphics.pdf.graphics-state.push", 364 | "match": "\\bq\\b" 365 | }, 366 | { 367 | "name": "keyword.operator.graphics.pdf.graphics-state.pop", 368 | "match": "\\bQ\\b" 369 | }, 370 | { 371 | "name": "keyword.operator.graphics.pdf.text.begin", 372 | "match": "\\bBT\\b" 373 | }, 374 | { 375 | "name": "keyword.operator.graphics.pdf.text.position.next-line", 376 | "match": "\\bT\\*\\b" 377 | }, 378 | { 379 | "name": "keyword.operator.graphics.pdf.text.position.next-line-offset", 380 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){2}Td\\b" 381 | }, 382 | { 383 | "name": "keyword.operator.graphics.pdf.text.position.next-line-offset-leading", 384 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){2}TD\\b" 385 | }, 386 | { 387 | "name": "keyword.operator.graphics.pdf.text.paint.string", 388 | "match": "\\b(\\([^)]\\)|<[0-9a-fA-F \\t\\r\\n\\f\\0]*>)[ \\t\\r\\n\\f\\0]Tj\\b" 389 | }, 390 | { 391 | "name": "keyword.operator.graphics.pdf.text.paint.string-kern", 392 | "match": "\\b\\[((\\([^)]\\)|<[0-9a-fA-F \\t\\r\\n\\f\\0]*>)|[-+]?\\d*\\.?\\d+|[ \\t\\r\\n\\f\\0])\\][ \\t\\r\\n\\f\\0]?TJ\\b" 393 | }, 394 | { 395 | "name": "keyword.operator.graphics.pdf.text.paint.string-next-line", 396 | "match": "\\b(\\([^)]\\)|<[0-9a-fA-F \\t\\r\\n\\f\\0]*>)[ \\t\\r\\n\\f\\0]'\\b" 397 | }, 398 | { 399 | "name": "keyword.operator.graphics.pdf.text.paint.string-move", 400 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){2}(\\([^)]\\)|<[0-9a-fA-F \\t\\r\\n\\f\\0]*>)[ \\t\\r\\n\\f\\0]\"\\b" 401 | }, 402 | { 403 | "name": "keyword.operator.graphics.pdf.text.character-spacing", 404 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]Tc\\b" 405 | }, 406 | { 407 | "name": "keyword.operator.graphics.pdf.text.font-size", 408 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0][-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]Tf\\b" 409 | }, 410 | { 411 | "name": "keyword.operator.graphics.pdf.text.leading", 412 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]TL\\b" 413 | }, 414 | { 415 | "name": "keyword.operator.graphics.pdf.text.rendering-mode", 416 | "match": "\\b[0-7][ \\t\\r\\n\\f\\0]Tr\\b" 417 | }, 418 | { 419 | "name": "keyword.operator.graphics.pdf.text.rise", 420 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]Ts\\b" 421 | }, 422 | { 423 | "name": "keyword.operator.graphics.pdf.text.word-spacing", 424 | "match": "\\b[-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]Tw\\b" 425 | }, 426 | { 427 | "name": "keyword.operator.graphics.pdf.text.horizontal-text-scaling", 428 | "match": "\\b[+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]Tz\\b" 429 | }, 430 | { 431 | "name": "keyword.operator.graphics.pdf.text.matrix", 432 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){6}Tm\\b" 433 | }, 434 | { 435 | "name": "keyword.operator.graphics.pdf.type3.shape-color", 436 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){2}d0\\b" 437 | }, 438 | { 439 | "name": "keyword.operator.graphics.pdf.type3.shape", 440 | "match": "\\b([-+]?\\d*\\.?\\d+[ \\t\\r\\n\\f\\0]){6}d1\\b" 441 | }, 442 | { 443 | "name": "keyword.operator.graphics.pdf.text.end", 444 | "match": "\\bET\\b" 445 | }, 446 | { 447 | "name": "keyword.operator.graphics.pdf.compatibility.begin", 448 | "match": "\\bBX\\b" 449 | }, 450 | { 451 | "name": "keyword.operator.graphics.pdf.compatibility.end", 452 | "match": "\\bEX\\b" 453 | }, 454 | { 455 | "name": "keyword.operator.graphics.pdf.marked-content.point.begin", 456 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0](/[^ \\t\\r\\n\\f\\0><[\\]()/]*|<<.*>>)[ \\t\\r\\n\\f\\0]BMC\\b" 457 | }, 458 | { 459 | "name": "keyword.operator.graphics.pdf.marked-content.end", 460 | "match": "\\bEMC\\b" 461 | }, 462 | { 463 | "name": "keyword.operator.graphics.pdf.marked-content.begin", 464 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]BMC\\b" 465 | }, 466 | { 467 | "name": "keyword.operator.graphics.pdf.marked-content.point.property", 468 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0](/[^ \\t\\r\\n\\f\\0><[\\]()/]*|<<.*>>)[ \\t\\r\\n\\f\\0]DP\\b" 469 | }, 470 | { 471 | "name": "keyword.operator.graphics.pdf.marked-content.point", 472 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]MP\\b" 473 | }, 474 | { 475 | "name": "keyword.operator.graphics.pdf.do", 476 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]Do\\b" 477 | }, 478 | { 479 | "name": "keyword.operator.graphics.pdf.shading", 480 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]sh\\b" 481 | }, 482 | { 483 | "name": "keyword.operator.graphics.pdf.graphics-state.set", 484 | "match": "\\b/[^ \\t\\r\\n\\f\\0><[\\]()/]*[ \\t\\r\\n\\f\\0]gs\\b" 485 | }, 486 | { 487 | "name": "keyword.operator.postscript-type4-fn.pdf", 488 | "match": "\\b(abs|cvi|floor|mod|sin|add|cvr|idiv|mul|sqrt|atan|div|ln|neg|sub|ceiling|exp|log|round|truncate|cos|and|false|le|not|true|bitshift|ge|lt|or|xor|eq|gt|ne|If|ifelse|copy|exch|pop|dup|index|roll)\\b" 489 | }, 490 | { 491 | "name": "keyword.operator.cmaps.pdf", 492 | "match": "\\b(dict|begin|end|def|findresource|usefont|defineresource|pop|begincodespacerange|endcodespacerange|beginnotdefchar|endnotdefchar|beginnotdefrange|endnotdefrange|begincidrange|endcidrange|endcmap)\\b" 493 | } 494 | ], 495 | "repository": { 496 | "main": { 497 | "patterns": [ 498 | { 499 | "include": "#main" 500 | } 501 | ] 502 | } 503 | } 504 | } 505 | -------------------------------------------------------------------------------- /testing-resources/CDQ_WhitePaper_Accessibility.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/CDQ_WhitePaper_Accessibility.pdf -------------------------------------------------------------------------------- /testing-resources/CompactedPDFSyntaxTest.pdf: -------------------------------------------------------------------------------- 1 | %PDF-1.7 2 | %���� 3 | % 4 | % (c) PDF Association, 2020 5 | % Peter Wyatt, Principal Scientist - for SafeDocs 6 | % 7 | % This PDF has been hand-written to cover as many combinations of compacted PDF syntax as possible 8 | % (meaning not using whitespace between tokens and delimiters). This file will get RUINED if it gets 9 | % "repaired" by tools such as mutool clean!!!! 10 | % 11 | % Note also that this PDF file is NOT checked by rendering as many constructs are 12 | % necessarily artificial and will not relate to graphics. It is designed to check a parser's ability to 13 | % read compacted syntax without error, such as with the Trail Of Bit's PolyTracker/PolyFile tools. 14 | % However this is still a 100% valid PDF and should render in every viewer. 15 | % 16 | % See also the corresponding "Compacted PDF Syntax Matrix" document for the 121 possible combinations. 17 | % 18 | % This material is based upon work supported by the Defense Advanced 19 | % Research Projects Agency (DARPA) under Contract No. HR001119C0079. 20 | % Any opinions, findings and conclusions or recommendations expressed 21 | % in this material are those of the author(s) and do not necessarily 22 | % reflect the views of the Defense Advanced Research Projects Agency 23 | % (DARPA). Approved for public release. 24 | % 25 | 1 0 obj 26 | <>/StructTreeRoot null/AA<>>>/Pages 3 0 R>>%comment after dictionary close token 30 | endobj 31 | 2 0 obj 32 | endobj 33 | 3 0 obj 34 | <null<686932>null[/Dummy](hi3)[(hi4)(hi5)true(hi6)null(hi7)12(hi8)]-1.<>[](hi99)[]null[]<>true<>[<>]<686933>1 0 R[1 2 3]6 0 R<686934>4 0 R(hi9)2 0 R<>[true]<><686935><>3 0 R<>(hi10)<>null<686936>true(hi11)<686937>(hi12)+.0<686938>] 35 | /Type/Pages/Count 1/Kids[4 0 R%comment after indirect ref 36 | ]>>endobj 37 | 4 0 obj 38 | <>/ProcSet[null]/ExtGState<>/Font<>>>>>>> 43 | endobj 44 | 5 0 obj 45 | <> 46 | stream 47 | /BreakMyParser <null<686932>null[/Dummy](hi3)[(hi4)(hi5)true(hi6)null(hi7)12(hi8)]-1.<>[](hi99)[]null[]<>true<>[<>]<686933>[1 2 3]<686934>(hi9)<>[true]<><686935><><>(hi10)<>null<686936>true(hi11)<686937>(hi12)+.0<686938>]>> DP 48 | % Visible file marker 49 | BT/F1 30 Tf 0 Tr 1 0 0 1 10 950 Tm(PDF compacted syntax sequences according to ISO 32000)Tj 1 0 0 1 10 900 Tm 50 | (This file must NOT be resaved or modified by any tool!! v3.0) Tj ET 51 | % 3 colored vector graphic squares that are clipped 52 | / gs q 40 w 75 75 400 400 re W S % stroke then clip a path with a wide black border 53 | 1 0. .0 rg 75 75 200 200 re f 0 1 0 rg 275 75 200 200 re f .0 0 1 rg 275 275 200 200 re f Q 54 | endstream 55 | endobj 56 | 6 0 obj 57 | <>endobj 59 | 7 0 obj 60 | <%comment after hex string end 62 | /Keywords(PDF,Compacted,Syntax,ISO 32000-2:2020)/CreationDate(D:20200317)/Author(Peter Wyatt)/Creator<48616e642d65646974>/Producer<48616e642d65646974>>> 63 | endobj 64 | xref 65 | 0 8 66 | 0000000000 65535 f 67 | 0000001236 00000 n 68 | 0000001563 00000 n 69 | 0000001580 00000 n 70 | 0000002039 00000 n 71 | 0000002436 00000 n 72 | 0000003302 00000 n 73 | 0000003377 00000 n 74 | trailer 75 | <<6264992C92074533A46A019C7CF9BFB6>]/Size 8>> 77 | startxref 78 | 3687 79 | %%EOF -------------------------------------------------------------------------------- /testing-resources/ISO_32000-2-2020_Amd1.fdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/ISO_32000-2-2020_Amd1.fdf -------------------------------------------------------------------------------- /testing-resources/SyntaxChecker-INVALID.pdf: -------------------------------------------------------------------------------- 1 | %PDF-1.7 2 | %©© 3 | 4 | % DO NOT ATTEMPT TO OPEN THIS PDF WITH A PDF VIEWER!!! 5 | % 6 | % An intentionally HIGHLY invalid PDF designed to test syntax highlighting of the VSCode Language Server 7 | % 8 | % This material is based upon work supported by the Defense Advanced 9 | % Research Projects Agency (DARPA) under Contract No. HR001119C0079. 10 | % Any opinions, findings and conclusions or recommendations expressed 11 | % in this material are those of the author(s) and do not necessarily 12 | % reflect the views of the Defense Advanced Research Projects Agency 13 | % (DARPA). Approved for public release. 14 | % 15 | % Copyright (c) 2023, PDF Association Inc. Peter Wyatt. 16 | 1 0 obj 17 | << 18 | /Type /Catalog 19 | /Pages 2 0 R 20 | /PageLayout /An#20Undefined#20OptionWithHex % unknown value with hex 21 | /CatalogCustomKey 123 % unknown key 22 | /Version /1.4 % less than the header 23 | /MarkInfo << /Marked true /Suspects 1 >> % fake a Tagged PDF, also with a wrong type for Suspects 24 | /Lang /en-US %% wrong type (should be a string) 25 | /OpenAction << 26 | /Type /Action 27 | /S /URI 28 | /URI (\003\006Bad ASCII()) % unprintables in an ASCII string and nested brackets 29 | >>>> 30 | endobj 31 | 32 | 2 0 obj 33 | << 34 | /Type /Pages 35 | /Kids 36 | [ 37 | 3 0 R 38 | ] 39 | /Count 1 40 | 41 | >> 42 | endobj 43 | 3 0 obj 44 | << 45 | /Type /Page 46 | /PieceInfo << 47 | /Blah << 48 | /LastModified (is not a Date!\t\000) % with some escape sequences 49 | /LastModified (is a duplicate key for direct dictionary) 50 | /Data 123 51 | >> >> 52 | /Parent 2 0 R 53 | /Resources << 54 | /Font << /F1 5 0 R/F2 10 0 R >> 55 | /ExtGState << /GS1 <> >> %compacted names 56 | >> 57 | /MediaBox [ 841.890(string) -0 .0 +0 ] % various kinds of integer and a string which shouldn't be here 58 | /Contents 4 0 R 59 | /Rotate -32 % fails [fn:Eval((@Rotate mod 90)==0)] 60 | /StructParents -1 %% bad value 61 | /Annots [9 0 R] 62 | /NestedArray [ null [/A/B [true(String)false<>null]1.23 +0 -.45 [-123 +.3 +000987 <>]]] 63 | >> 64 | endobj 65 | 4 0 obj 66 | << 67 | /Length 41 68 | /String1FromSpec (These \ 69 | two strings \ 70 | are the same.) 71 | /String2FromSpec (These two strings are the same.) 72 | /NamesFromTable4 [ /A;Name_With-Various***Characters? /1.2 /$$ /@pattern /.notdef /Lime#20Green /paired#28#29parentheses /The_Key_of_F#23_Minor ] 73 | >> 74 | stream 75 | 5 w 76 | 1 0 0 rg 0 1 0 RG 77 | 10 10 500 500 re B 78 | endstream 79 | endobj 80 | 81 | 5 0 obj 82 | << 83 | /Type /Font 84 | /Type /Font %% duplicate key 85 | /Subtype /Type3 86 | /FontBBox [-36 -36. 786 786] 87 | /FontMatrix [0.001 0 0 0.001 0 0 1 2 4 ] %% Too many elements in a matrix 88 | % /FontDescriptor missing so will fail fn:IsRequired(fn:IsPDFTagged()) 89 | /CharProcs 6 0 R 90 | /Encoding 10 0 R 91 | /FirstChar 92 92 | /LastChar 106 93 | /Widths [1000 1000] %% fails [fn:Eval(fn:ArrayLength(Widths)==(@LastChar - @FirstChar+1))] 94 | >> 95 | endobj 96 | 97 | 6 0 obj % CharProcs but not enough content streams so fail predicate on /Font /Widths key 98 | << 99 | /Glyph1 7 0 R 100 | /Glyph2 8 0 R 101 | >> 102 | endobj 103 | 104 | 7 0 obj 105 | <> 106 | stream 107 | q % push graphics state 108 | [] 0 d % solid line (dash array and phase) 109 | 20 w % thick line width for stroking 110 | 1 0 0 rg % red fill 111 | 0 1 0 RG % green stroke 112 | 2 j % Bevel line join 113 | 10 M % Mitre limit 114 | 1 j % Round line join 115 | 116 | % Star-shape (self-intersecting shape) 117 | 100 100 m 118 | 550 550 l 119 | 20 550 l 120 | 540 50 l 121 | 270 700 l 122 | b* % close, fill and stroke using Odd/Even Rule 123 | Q % pop graphics state 124 | BX EX % compatibility operators 125 | 0 g % Black text 126 | BT 127 | 1 0 0 1 12 12 Tm 128 | /Helv 10 Tf 129 | 12 TL 130 | (Hello ) ' 131 | (World) ' 132 | ET 133 | endstream 134 | endobj 135 | 136 | 8 0 obj 137 | << /Length 1441 /XX_ThirdClassKey 12.34 /AAPL_SecondClass (2nd class name) >> 138 | stream 139 | q 140 | BI % inline image "Begin Image" 141 | /Width 20 142 | /Height 10 143 | /BitsPerComponent 8 144 | /ColorSpace /DeviceRGB 145 | /Filter [/ASCIIHexDecode] 146 | /Length 1276 147 | ID 148 | ff0000ff0000ff0000ff0000ff0000ff 149 | 0000ff0000ff0000ff0000ff0000ff00 150 | 00ff0000ff0000ff0000ff0000ff0000 151 | ff0000ff0000ff0000ff0000ff0000ff 152 | ff00ffff00ffff00ffff00ffff00ffff 153 | 00ffff00ffff00ffff00ffff00ffff00 154 | ffff00ffff00ffff00ffff00ffff00ff 155 | ff00ffff00ff0000ff0000ffff000000 156 | ff0000ff0000ffffff000000ffffff00 157 | ffff00ffff000000ffffff00ffff00ff 158 | ff000000ff0000ffffff00ffff00ffff 159 | 00ff0000ff0000ffff00ffff000000ff 160 | ffff00ffff000000ff0000ffffff0000 161 | 00ff0000ffffff00ffff000000ffffff 162 | 00ffff000000ffffff00ffff00ff0000 163 | ff0000ffff00ffff000000ffffff00ff 164 | ff000000ffffff000000ffffff000000 165 | ffffff000000ffffff00ffff00ffff00 166 | ffff00ffff00ffff00ff0000ff0000ff 167 | ff00ffff000000ffffff00ffff000000 168 | ffffff000000ffffff000000ffffff00 169 | 0000ffffff00ffff000000ff0000ffff 170 | ff00ffff00ff0000ff0000ffff00ffff 171 | 000000ffffff00ffff000000ffffff00 172 | ffff00ffff000000ffffff00ffff0000 173 | 00ffffff00ffff000000ffffff00ffff 174 | 00ff0000ff0000ffff000000ff0000ff 175 | 0000ffffff000000ffffff00ffff00ff 176 | ff000000ffffff00ffff00ffff000000 177 | ff0000ff0000ffffff00ffff00ff0000 178 | ff0000ffff00ffff00ffff00ffff00ff 179 | ff00ffff00ffff00ffff00ffff00ffff 180 | 00ffff00ffff00ffff00ffff00ffff00 181 | ffff00ffff00ffff00ff0000ff0000ff 182 | 0000ff0000ff0000ff0000ff0000ff00 183 | 00ff0000ff0000ff0000ff0000ff0000 184 | ff0000ff0000ff0000ff0000ff0000ff 185 | 0000ff0000ff0000 > 186 | EI 187 | Q 188 | endstream 189 | endobj 190 | 191 | 9 0 obj 192 | <> 197 | endobj 198 | 199 | 10 0 obj 200 | << 201 | /Type /Font 202 | /Subtype /CIDFontType0 203 | /BaseFont /AnExceedinglyLongBaseFontNameThatExceedsThe127LimitThatWasSpecifiedInEarlierVersionsOfPDFAndThatKeepsGoingAndGoingAndGoingSoItTriggersTheWarning 204 | /FontDescriptor << 205 | /Style << /Panose (Not 12 characters long) >> %% fails [fn:Eval(fn:StringLength(Panose)==12)] 206 | /Flags 458863 % SHOULD PASS 32 bit check! (bits 1-4,6-7,17-19 --> as zero-based: 0-3,5-6,16-18) 207 | /FontWeight 250 % not a predefined value 208 | /Descent 123 % not <= 0 209 | /FontName /BaseFontName 210 | >> 211 | >> 212 | endobj 213 | xref 214 | 0 11 215 | 0000000000 65535 f 216 | 0000000626 00000 n 217 | 0000001096 00000 n 218 | 0000001160 00000 n 219 | 0000001835 00000 n 220 | 0000002190 00000 n 221 | 0000002597 00000 n 222 | 0000002729 00000 n 223 | 0000003342 00000 n 224 | 0000004894 00000 n 225 | 0000005067 00000 n 226 | trailer 227 | << 228 | /Size 11 229 | /Root 1 0 R 230 | /Info <<%Info is not an indirect reference 231 | /CreationDate (D:20220714283724+10'00') %% bad date string (hours is > 24) 232 | /Subject(\377\376UTF-16LE BOM!) %% UTF-16LE BOM string 233 | /SomeHexString < 65 45 234 | 6c6C6F >>>%% hex string with whitespace 235 | >> 236 | startxref 237 | 5589 238 | %%EOF -------------------------------------------------------------------------------- /testing-resources/keyboard-shortcuts-macos-human-readable.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/keyboard-shortcuts-macos-human-readable.pdf -------------------------------------------------------------------------------- /testing-resources/keyboard-shortcuts-macos-qdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/keyboard-shortcuts-macos-qdf.pdf -------------------------------------------------------------------------------- /testing-resources/keyboard-shortcuts-macos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/keyboard-shortcuts-macos.pdf -------------------------------------------------------------------------------- /testing-resources/significant-properties-human-readable.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/significant-properties-human-readable.pdf -------------------------------------------------------------------------------- /testing-resources/significant-properties-qdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/significant-properties-qdf.pdf -------------------------------------------------------------------------------- /testing-resources/significant-properties.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-association/pdf-cos-syntax/8adcc72046ad40579add080379161273315fdbb2/testing-resources/significant-properties.pdf -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es2020", 5 | "lib": ["es2020"], 6 | "outDir": "out", 7 | "rootDir": "src", 8 | "sourceMap": true, 9 | "strict": true 10 | }, 11 | "include": [ 12 | "src" 13 | ], 14 | "exclude": [ 15 | "node_modules", 16 | ".vscode-test" 17 | ], 18 | "references": [ 19 | { "path": "./client" }, 20 | { "path": "./server" } 21 | ] 22 | } --------------------------------------------------------------------------------