├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── CODEOWNERS
    ├── CODE_OF_CONDUCT.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   ├── rhub.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .vscode
    ├── extensions.json
    └── settings.json
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R
    ├── expr_as_xml.R
    ├── package.R
    ├── utils.R
    └── xmlparsedata-package.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── air.toml
├── codecov.yml
├── man
    ├── expr_as_xml.Rd
    ├── xml_parse_data.Rd
    ├── xml_parse_token_map.Rd
    ├── xmlparsedata-package.Rd
    └── xmlparsedata.Rd
└── tests
    ├── testthat.R
    └── testthat
        ├── test-expr_as_xml.R
        └── test-xml_parse_data.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^Makefile$
 4 | ^README.Rmd$
 5 | ^README\.html$
 6 | ^.travis.yml$
 7 | ^appveyor.yml$
 8 | ^\.github$
 9 | ^revdep$
10 | ^_pkgdown\.yml$
11 | ^docs$
12 | ^pkgdown$
13 | ^codecov\.yml$
14 | ^LICENSE\.md$
15 | ^[\.]?air\.toml$
16 | ^\.vscode$
17 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # CODEOWNERS for xmlparsedata
2 | # https://www.tidyverse.org/development/understudies
3 | * @gaborcsardi @jimhester
4 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at codeofconduct@posit.co. 
 63 | All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.1, available at
118 | <https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
119 | 
120 | Community Impact Guidelines were inspired by
121 | [Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].
122 | 
123 | For answers to common questions about this code of conduct, see the FAQ at
124 | <https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.
125 | 
126 | [homepage]: https://www.contributor-covenant.org
127 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow is overkill for most R packages and
 5 | # check-standard.yaml is likely a better choice.
 6 | # usethis::use_github_action("check-standard") will install it.
 7 | on:
 8 |   push:
 9 |     branches: [main, master]
10 |   pull_request:
11 | 
12 | name: R-CMD-check.yaml
13 | 
14 | permissions: read-all
15 | 
16 | jobs:
17 |   R-CMD-check:
18 |     runs-on: ${{ matrix.config.os }}
19 | 
20 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         config:
26 |           - {os: macos-latest,   r: 'release'}
27 | 
28 |           - {os: windows-latest, r: 'release'}
29 |           # use 4.0 or 4.1 to check with rtools40's older compiler
30 |           - {os: windows-latest, r: 'oldrel-4'}
31 | 
32 |           - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
33 |           - {os: ubuntu-latest,  r: 'release'}
34 |           - {os: ubuntu-latest,  r: 'oldrel-1'}
35 |           - {os: ubuntu-latest,  r: 'oldrel-2'}
36 |           - {os: ubuntu-latest,  r: 'oldrel-3'}
37 |           - {os: ubuntu-latest,  r: 'oldrel-4'}
38 | 
39 |     env:
40 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
41 |       R_KEEP_PKG_SOURCE: yes
42 | 
43 |     steps:
44 |       - uses: actions/checkout@v4
45 | 
46 |       - uses: r-lib/actions/setup-pandoc@v2
47 | 
48 |       - uses: r-lib/actions/setup-r@v2
49 |         with:
50 |           r-version: ${{ matrix.config.r }}
51 |           http-user-agent: ${{ matrix.config.http-user-agent }}
52 |           use-public-rspm: true
53 | 
54 |       - uses: r-lib/actions/setup-r-dependencies@v2
55 |         with:
56 |           extra-packages: any::rcmdcheck
57 |           needs: check
58 | 
59 |       - uses: r-lib/actions/check-r-package@v2
60 |         with:
61 |           upload-snapshots: true
62 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
63 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |   release:
 8 |     types: [published]
 9 |   workflow_dispatch:
10 | 
11 | name: pkgdown.yaml
12 | 
13 | permissions: read-all
14 | 
15 | jobs:
16 |   pkgdown:
17 |     runs-on: ubuntu-latest
18 |     # Only restrict concurrency for non-PR jobs
19 |     concurrency:
20 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
21 |     env:
22 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 |     permissions:
24 |       contents: write
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - uses: r-lib/actions/setup-pandoc@v2
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 |         with:
32 |           use-public-rspm: true
33 | 
34 |       - uses: r-lib/actions/setup-r-dependencies@v2
35 |         with:
36 |           extra-packages: any::pkgdown, local::.
37 |           needs: website
38 | 
39 |       - name: Build site
40 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
41 |         shell: Rscript {0}
42 | 
43 |       - name: Deploy to GitHub pages 🚀
44 |         if: github.event_name != 'pull_request'
45 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
46 |         with:
47 |           clean: false
48 |           branch: gh-pages
49 |           folder: docs
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: pr-commands.yaml
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   document:
13 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 |     name: document
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - uses: r-lib/actions/pr-fetch@v2
24 |         with:
25 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::roxygen2
34 |           needs: pr-document
35 | 
36 |       - name: Document
37 |         run: roxygen2::roxygenise()
38 |         shell: Rscript {0}
39 | 
40 |       - name: commit
41 |         run: |
42 |           git config --local user.name "$GITHUB_ACTOR"
43 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 |           git add man/\* NAMESPACE
45 |           git commit -m 'Document'
46 | 
47 |       - uses: r-lib/actions/pr-push@v2
48 |         with:
49 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
50 | 
51 |   style:
52 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 |     name: style
54 |     runs-on: ubuntu-latest
55 |     env:
56 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 |     permissions:
58 |       contents: write
59 |     steps:
60 |       - uses: actions/checkout@v4
61 | 
62 |       - uses: r-lib/actions/pr-fetch@v2
63 |         with:
64 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
65 | 
66 |       - uses: r-lib/actions/setup-r@v2
67 | 
68 |       - name: Install dependencies
69 |         run: install.packages("styler")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Style
73 |         run: styler::style_pkg()
74 |         shell: Rscript {0}
75 | 
76 |       - name: commit
77 |         run: |
78 |           git config --local user.name "$GITHUB_ACTOR"
79 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 |           git add \*.R
81 |           git commit -m 'Style'
82 | 
83 |       - uses: r-lib/actions/pr-push@v2
84 |         with:
85 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
86 | 


--------------------------------------------------------------------------------
/.github/workflows/rhub.yaml:
--------------------------------------------------------------------------------
 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at
 2 | # https://github.com/r-hub/rhub2/blob/v1/inst/workflow/rhub.yaml
 3 | # You can update this file to a newer version using the rhub2 package:
 4 | #
 5 | # rhub2::rhub_setup()
 6 | #
 7 | # It is unlikely that you need to modify this file manually.
 8 | 
 9 | name: R-hub
10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}"
11 | 
12 | on:
13 |   workflow_dispatch:
14 |     inputs:
15 |       config:
16 |         description: 'A comma separated list of R-hub platforms to use.'
17 |         type: string
18 |         default: 'linux,windows,macos'
19 |       name:
20 |         description: 'Run name. You can leave this empty now.'
21 |         type: string
22 |       id:
23 |         description: 'Unique ID. You can leave this empty now.'
24 |         type: string
25 | 
26 | jobs:
27 | 
28 |   setup:
29 |     runs-on: ubuntu-latest
30 |     outputs:
31 |       containers: ${{ steps.rhub-setup.outputs.containers }}
32 |       platforms: ${{ steps.rhub-setup.outputs.platforms }}
33 | 
34 |     steps:
35 |     # NO NEED TO CHECKOUT HERE
36 |     - uses: r-hub/rhub2/actions/rhub-setup@v1
37 |       with:
38 |         config: ${{ github.event.inputs.config }}
39 |       id: rhub-setup
40 | 
41 |   linux-containers:
42 |     needs: setup
43 |     if: ${{ needs.setup.outputs.containers != '[]' }}
44 |     runs-on: ubuntu-latest
45 |     name: ${{ matrix.config.label }}
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         config: ${{ fromJson(needs.setup.outputs.containers) }}
50 |     container:
51 |       image: ${{ matrix.config.container }}
52 | 
53 |     steps:
54 |       - uses: r-hub/rhub2/actions/rhub-checkout@v1
55 |       - uses: r-hub/rhub2/actions/rhub-check@v1
56 |         with:
57 |           token: ${{ secrets.RHUB_TOKEN }}
58 |           job-config: ${{ matrix.config.job-config }}
59 | 
60 |   other-platforms:
61 |     needs: setup
62 |     if: ${{ needs.setup.outputs.platforms != '[]' }}
63 |     runs-on: ${{ matrix.config.os }}
64 |     name: ${{ matrix.config.label }}
65 |     strategy:
66 |       fail-fast: false
67 |       matrix:
68 |         config: ${{ fromJson(needs.setup.outputs.platforms) }}
69 | 
70 |     steps:
71 |       - uses: r-hub/rhub2/actions/rhub-checkout@v1
72 |       - uses: r-hub/rhub2/actions/rhub-setup-r@v1
73 |         with:
74 |           job-config: ${{ matrix.config.job-config }}
75 |           token: ${{ secrets.RHUB_TOKEN }}
76 |       - uses: r-hub/rhub2/actions/rhub-check@v1
77 |         with:
78 |           job-config: ${{ matrix.config.job-config }}
79 |           token: ${{ secrets.RHUB_TOKEN }}
80 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: test-coverage.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   test-coverage:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: any::covr, any::xml2
28 |           needs: coverage
29 | 
30 |       - name: Test coverage
31 |         run: |
32 |           cov <- covr::package_coverage(
33 |             quiet = FALSE,
34 |             clean = FALSE,
35 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
36 |           )
37 |           print(cov)
38 |           covr::to_cobertura(cov)
39 |         shell: Rscript {0}
40 | 
41 |       - uses: codecov/codecov-action@v5
42 |         with:
43 |           # Fail if error if not on PR, or if on PR and token is given
44 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
45 |           files: ./cobertura.xml
46 |           plugins: noop
47 |           disable_search: true
48 |           token: ${{ secrets.CODECOV_TOKEN }}
49 | 
50 |       - name: Show testthat output
51 |         if: always()
52 |         run: |
53 |           ## --------------------------------------------------------------------
54 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
55 |         shell: bash
56 | 
57 |       - name: Upload test results
58 |         if: failure()
59 |         uses: actions/upload-artifact@v4
60 |         with:
61 |           name: coverage-test-failures
62 |           path: ${{ runner.temp }}/package
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | /revdep
5 | docs
6 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |     "recommendations": [
3 |         "Posit.air-vscode"
4 |     ]
5 | }
6 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "[r]": {
3 |         "editor.formatOnSave": true,
4 |         "editor.defaultFormatter": "Posit.air-vscode"
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: xmlparsedata
 2 | Title: Parse Data of 'R' Code as an 'XML' Tree
 3 | Version: 1.0.5.9000
 4 | Authors@R: c(
 5 |     person("Gábor", "Csárdi", , "csardi.gabor@gmail.com", role = c("aut", "cre")),
 6 |     person("Posit Software, PBC", role = c("cph", "fnd"),
 7 |            comment = c(ROR = "03wc8by49")),
 8 |     person("Mango Solutions", role = c("cph", "fnd"))
 9 |   )
10 | Description: Convert the output of 'utils::getParseData()' to an 'XML'
11 |     tree, that one can search via 'XPath', and easier to manipulate in
12 |     general.
13 | License: MIT + file LICENSE
14 | URL: https://github.com/r-lib/xmlparsedata#readme,
15 |     https://r-lib.github.io/xmlparsedata/
16 | BugReports: https://github.com/r-lib/xmlparsedata/issues
17 | Depends: 
18 |     R (>= 3.0.0)
19 | Suggests:
20 |     covr,
21 |     testthat (>= 3.0.0),
22 |     xml2
23 | Config/Needs/website: tidyverse/tidytemplate
24 | Config/testthat/edition: 3
25 | Config/usethis/last-upkeep: 2025-05-07
26 | Encoding: UTF-8
27 | LazyData: true
28 | Roxygen: list(markdown = TRUE)
29 | RoxygenNote: 7.2.3
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: xmlparsedata authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2025 xmlparsedata authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | all: README.md
3 | 
4 | README.md: README.Rmd
5 | 	Rscript -e "library(knitr); knit('$<', output = '$@', quiet = TRUE)"
6 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(expr_as_xml)
4 | export(xml_parse_data)
5 | export(xml_parse_token_map)
6 | importFrom(utils,getParseData)
7 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # xmlparsedata Development version
 3 | 
 4 | * Re-parse character literals with octal-escaped expressions of width 1 or 2,
 5 |   e.g. `"\1"`, to work around a bug (in R<4.3.0) in `utils::getParseData()`
 6 |   (#25, @michaelchirico).
 7 | 
 8 | * New `expr_as_xml()` to get an XML representation of R expressions (#27 @MichaelChirico).
 9 | 
10 | # xmlparsedata 1.0.5
11 | 
12 | * Translate `\` in lambda expression to `OP-LAMBDA` (#18 @renkun-ken).
13 | 
14 | * Drop all control characters, except horizontal tab and newline (#19).
15 | 
16 | # xmlparsedata 1.0.4
17 | 
18 | * Translate ] tokens to `OP-RIGHT-BRACKET` instead of
19 |   `OP-RIGHT-BRACE` (#11 @AshesITR).
20 | 
21 | * `xml_parse_data()` now works if `includeText = FALSE`
22 |   (#14 @renkun-ken).
23 | 
24 | # xmlparsedata 1.0.3
25 | 
26 | * Ensure that closing xml-tags for code expressions that end at the same
27 |   position in a file respect start-first-end-last ordering in the produced xml.
28 |   Ensures that the new `equal_assign` token in `getParseData()` for R-3.6 is
29 |   handled appropriately. #5 @russHyde
30 | 
31 | # xmlparsedata 1.0.2
32 | 
33 | * Remove control characters `\003`, `\007`, `\010`, `\027`, as they are
34 |   not allowed in XML 1.0, #1 @GregoireGauriot
35 | 
36 | * Always convert parsed text to UTF-8
37 | 
38 | # xmlparsedata 1.0.1
39 | 
40 | * Fix a bug when the input is already a `getParseData()` data frame.
41 |   https://github.com/jimhester/lintr filters the parsed data to include
42 |   individual functions only, but only filters the data frame, not the
43 |   underlying srcrefs, so when we call `getParseData()` on the data frame
44 |   again, we get the data for the whole source file. This is fixed now by
45 |   noticing that the input is already a data frame
46 | 
47 | # xmlparsedata 1.0.0
48 | 
49 | First public release.
50 | 


--------------------------------------------------------------------------------
/R/expr_as_xml.R:
--------------------------------------------------------------------------------
 1 | #' Get an XML representation of an expression
 2 | #'
 3 | #' @param expr An expression.
 4 | #' @export
 5 | expr_as_xml <- function(expr) {
 6 |   if (!requireNamespace("xml2", quietly = TRUE)) {
 7 |     stop("'xml2' is required to return an XML object")
 8 |   }
 9 |   tmp_source <- tempfile()
10 |   on.exit(unlink(tmp_source))
11 | 
12 |   # NB: deparse() approach struggles with `{` expressions
13 |   dput(substitute(expr), file = tmp_source)
14 |   parsed_expr <- parse(tmp_source, keep.source = TRUE)
15 |   # TODO(#28): Strip the line/column metadata which
16 |   #   is technically 'missing' for this case.
17 |   xml2::read_xml(xml_parse_data(parsed_expr))
18 | }
19 | 


--------------------------------------------------------------------------------
/R/package.R:
--------------------------------------------------------------------------------
  1 | #' Parse Data of R Code as an 'XML' Tree
  2 | #'
  3 | #' Convert the output of 'utils::getParseData()' to an 'XML' tree, that is
  4 | #' searchable and easier to manipulate in general.
  5 | #'
  6 | #' @docType package
  7 | #' @name xmlparsedata
  8 | NULL
  9 | 
 10 | #' Convert R parse data to XML
 11 | #'
 12 | #' In recent R versions the parser can attach source code location
 13 | #' information to the parsed expressions. This information is often
 14 | #' useful for static analysis, e.g. code linting. It can be accessed
 15 | #' via the [utils::getParseData()] function.
 16 | #'
 17 | #' `xml_parse_data()` converts this information to an XML tree.
 18 | #' The R parser's token names are preserved in the XML as much as
 19 | #' possible, but some of them are not valid XML tag names, so they are
 20 | #' renamed, see the [xml_parse_token_map] vector for the
 21 | #' mapping.
 22 | #'
 23 | #' The top XML tag is `<exprlist>`, which is a list of
 24 | #' expressions, each expression is an `<expr>` tag. Each tag
 25 | #' has attributes that define the location: `line1`, `col1`,
 26 | #' `line2`, `col2`. These are from the [getParseData()]
 27 | #' data frame column names. Next, there are two attributes,
 28 | #' `start` and `end`, which can be used as an ordering of
 29 | #' expressions in the document. Note that while the values
 30 | #' are correlated with (and in some cases may match exactly)
 31 | #' positions in the document, this cannot be relied upon.
 32 | #'
 33 | #' See an example below. See also the README at
 34 | #' <https://github.com/r-lib/xmlparsedata#readme>
 35 | #' for examples on how to search the XML tree with the `xml2` package
 36 | #' and XPath expressions.
 37 | #'
 38 | #' Note that `xml_parse_data()` silently drops all control characters
 39 | #' (0x01-0x1f) from the input, except horizontal tab (0x09) and newline
 40 | #' (0x0a), because they are invalid in XML 1.0.
 41 | #'
 42 | #' @param pretty Whether to pretty-indent the XML output. It has a small
 43 | #'   overhead which probably only matters for very large source files.
 44 | #' @inheritParams utils::getParseData
 45 | #' @return An XML string representing the parse data. See details below.
 46 | #'
 47 | #' @export
 48 | #' @importFrom utils getParseData
 49 | #' @seealso [xml_parse_token_map] for the token names.
 50 | #' <https://github.com/r-lib/xmlparsedata#readme> for more
 51 | #' information and use cases.
 52 | #' @examples
 53 | #' code <- "function(a = 1, b = 2) {\n  a + b\n}\n"
 54 | #' expr <- parse(text = code, keep.source = TRUE)
 55 | #'
 56 | #' # The base R way:
 57 | #' getParseData(expr)
 58 | #'
 59 | #' cat(xml_parse_data(expr, pretty = TRUE))
 60 | xml_parse_data <- function(x, includeText = NA, pretty = FALSE) {
 61 |   xml_header <- paste0(
 62 |     "<?xml version=\"1.0\" encoding=\"UTF-8\" ",
 63 |     "standalone=\"yes\" ?>\n<exprlist>\n"
 64 |   )
 65 |   xml_footer <- "\n</exprlist>\n"
 66 | 
 67 |   ## Maybe it is already a data frame, e.g. when used in lintr
 68 |   if (is.data.frame(x)) {
 69 |     pd <- x
 70 |   } else {
 71 |     pd <- getParseData(x, includeText = includeText)
 72 |     if (is.null(pd)) {
 73 |       tmp_source <- tempfile()
 74 |       on.exit(unlink(tmp_source))
 75 |       dput(x, file = tmp_source)
 76 | 
 77 |       x <- parse(tmp_source, keep.source = TRUE)
 78 |       pd <- getParseData(x, includeText = includeText)
 79 |       pd$line1 <- pd$line2 <- pd$col1 <- pd$col2 <- NA_integer_
 80 |     }
 81 |   }
 82 | 
 83 |   if (!nrow(pd)) {
 84 |     return(paste0(xml_header, xml_footer))
 85 |   }
 86 | 
 87 |   pd <- fix_comments(pd)
 88 | 
 89 |   if (!is.data.frame(x)) {
 90 |     # workaround for R parser bug #18323; see #25
 91 |     str_const_mismatch <- pd$token == "STR_CONST" &
 92 |       pd$col2 - pd$col1 != nchar(pd$text) - 1L &
 93 |       # skip if there are tabs, which would require complicating the logic a lot
 94 |       !grepl("\t", pd$text, fixed = TRUE)
 95 |     if (any(str_const_mismatch)) {
 96 |       pd$text[str_const_mismatch] <- reparse_octal(
 97 |         pd[str_const_mismatch, ],
 98 |         attr(x, "srcfile")$lines
 99 |       )
100 |     }
101 |   }
102 | 
103 |   if (!is.null(pd$text)) {
104 |     pd$text <- enc2utf8(pd$text)
105 |   }
106 | 
107 |   ## Tags for all nodes, teminal nodes have end tags as well
108 |   pd$token <- map_token(pd$token)
109 | 
110 |   ## Positions, to make it easy to compare what comes first
111 |   maxcol <- max(pd$col1, pd$col2) + 1L
112 |   pd$start <- pd$line1 * maxcol + pd$col1
113 |   pd$end <- pd$line2 * maxcol + pd$col2
114 | 
115 |   terminal_tag <- character(nrow(pd))
116 |   terminal_tag[pd$terminal] <- paste0("</", pd$token[pd$terminal], ">")
117 |   if (anyNA(pd$line1)) {
118 |     pd$tag <- paste0(
119 |       "<",
120 |       pd$token,
121 |       ">",
122 |       if (!is.null(pd$text)) xml_encode(pd$text) else "",
123 |       terminal_tag
124 |     )
125 |   } else {
126 |     pd$tag <- paste0(
127 |       "<",
128 |       pd$token,
129 |       " line1=\"",
130 |       pd$line1,
131 |       "\" col1=\"",
132 |       pd$col1,
133 |       "\" line2=\"",
134 |       pd$line2,
135 |       "\" col2=\"",
136 |       pd$col2,
137 |       "\" start=\"",
138 |       pd$start,
139 |       "\" end=\"",
140 |       pd$end,
141 |       "\">",
142 |       if (!is.null(pd$text)) xml_encode(pd$text) else "",
143 |       terminal_tag
144 |     )
145 |   }
146 | 
147 |   ## Add an extra terminal tag for each non-terminal one
148 |   pd2 <- pd[!pd$terminal, ]
149 |   if (nrow(pd2)) {
150 |     pd2$terminal <- TRUE
151 |     pd2$parent <- -1
152 |     pd2$line1 <- pd2$line2
153 |     pd2$col1 <- pd2$col2
154 |     pd2$line2 <- pd2$line2 - 1L
155 |     pd2$col2 <- pd2$col2 - 1L
156 |     pd2$tag <- paste0("</", pd2$token, ">")
157 |     pd <- rbind(pd, pd2, make.row.names = FALSE)
158 |   }
159 | 
160 |   ## Order the nodes properly
161 |   ## - the terminal nodes from pd2 may be nested inside each other, when
162 |   ##   this happens they will have the same line1, col1, line2, col2 and
163 |   ##   terminal status; and 'start' is used to break ties
164 |   ord <- order(pd$line1, pd$col1, -pd$line2, -pd$col2, pd$terminal, -pd$start)
165 |   pd <- pd[ord, ]
166 | 
167 |   if (pretty) {
168 |     str <- !pd$terminal
169 |     end <- pd$parent == -1
170 |     ind <- 2L + cumsum(str * 2L + end * (-2L)) - str * 2L
171 |     xml <- paste0(strrep(" ", ind), pd$tag, collapse = "\n")
172 |   } else {
173 |     xml <- paste(pd$tag, collapse = "\n")
174 |   }
175 | 
176 |   paste0(xml_header, xml, xml_footer)
177 | }
178 | 
179 | fix_comments <- function(pd) {
180 |   pd$parent[pd$parent < 0] <- 0
181 |   pd
182 | }
183 | 
184 | map_token <- function(token) {
185 |   needs_translation <- token %in% names(xml_parse_token_map)
186 |   token[needs_translation] <- xml_parse_token_map[token[needs_translation]]
187 |   token
188 | }
189 | 
190 | #' Map token names of the R parser to token names in
191 | #' [xml_parse_data()]
192 | #'
193 | #' Some of the R token names are not valid XML tag names,
194 | #' so [xml_parse_data()] needs to replace them to create a
195 | #' valid XML file.
196 | #'
197 | #' @export
198 | #' @seealso [xml_parse_data()]
199 | 
200 | xml_parse_token_map <- c(
201 |   "'?'" = "OP-QUESTION",
202 |   "'~'" = "OP-TILDE",
203 |   "'+'" = "OP-PLUS",
204 |   "'-'" = "OP-MINUS",
205 |   "'*'" = "OP-STAR",
206 |   "'/'" = "OP-SLASH",
207 |   "':'" = "OP-COLON",
208 |   "'^'" = "OP-CARET",
209 |   "'$'" = "OP-DOLLAR",
210 |   "'@'" = "OP-AT",
211 |   "'('" = "OP-LEFT-PAREN",
212 |   "'['" = "OP-LEFT-BRACKET",
213 |   "';'" = "OP-SEMICOLON",
214 |   "'{'" = "OP-LEFT-BRACE",
215 |   "'}'" = "OP-RIGHT-BRACE",
216 |   "')'" = "OP-RIGHT-PAREN",
217 |   "'!'" = "OP-EXCLAMATION",
218 |   "']'" = "OP-RIGHT-BRACKET",
219 |   "','" = "OP-COMMA",
220 |   "'\\\\'" = "OP-LAMBDA"
221 | )
222 | 
223 | xml_encode <- function(x) {
224 |   x <- gsub("&", "&amp;", x, fixed = TRUE)
225 |   x <- gsub("<", "&lt;", x, fixed = TRUE)
226 |   x <- gsub(">", "&gt;", x, fixed = TRUE)
227 |   # most control characters are not allowed in XML, except tab and nl
228 |   x <- gsub("[\x01-\x08\x0b-\x1f]", "", x, useBytes = TRUE)
229 |   x
230 | }
231 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | reparse_octal <- function(pd, lines) {
 2 |   out <- character(nrow(pd))
 3 |   single_line <- pd$line1 == pd$line2
 4 |   out[single_line] <- substr(
 5 |     lines[pd$line1[single_line]],
 6 |     pd$col1[single_line],
 7 |     pd$col2[single_line]
 8 |   )
 9 |   for (ii in which(!single_line)) {
10 |     out[ii] <- paste(
11 |       c(
12 |         substring(lines[pd$line1[ii]], pd$col1[ii]),
13 |         if (pd$line1[ii] < pd$line2[ii] - 1L)
14 |           lines[(pd$line1[ii] + 1L):(pd$line2[ii] - 1L)],
15 |         substr(lines[pd$line2[ii]], 1L, pd$col2[ii])
16 |       ),
17 |       collapse = "\n"
18 |     )
19 |   }
20 |   out
21 | }
22 | 


--------------------------------------------------------------------------------
/R/xmlparsedata-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | #' @aliases xmlparsedata-package
3 | "_PACKAGE"
4 | 
5 | ## usethis namespace: start
6 | ## usethis namespace: end
7 | NULL
8 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | ```{r}
  6 | #| label: setup
  7 | #| echo: false
  8 | #| message: false
  9 | knitr::opts_chunk$set(
 10 |   comment = "#>",
 11 |   tidy = FALSE,
 12 |   error = FALSE
 13 | )
 14 | ```
 15 | 
 16 | # xmlparsedata
 17 | 
 18 | > Parse Data of R Code as an 'XML' Tree
 19 | 
 20 | <!-- badges: start -->
 21 | [![R-CMD-check](https://github.com/r-lib/xmlparsedata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/xmlparsedata/actions/workflows/R-CMD-check.yaml)
 22 | [![](https://www.r-pkg.org/badges/version/xmlparsedata)](https://www.r-pkg.org/pkg/xmlparsedata)
 23 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/xmlparsedata)](https://www.r-pkg.org/pkg/xmlparsedata)
 24 | [![Codecov test coverage](https://codecov.io/gh/r-lib/xmlparsedata/graph/badge.svg)](https://app.codecov.io/gh/r-lib/xmlparsedata)
 25 | <!-- badges: end -->
 26 | 
 27 | Convert the output of 'utils::getParseData()' to an 'XML' tree, that is
 28 | searchable and easier to manipulate in general.
 29 | 
 30 | ---
 31 | 
 32 |   - [Installation](#installation)
 33 |   - [Usage](#usage)
 34 |     - [Introduction](#introduction)
 35 |     - [`utils::getParseData()`](#utilsgetparsedata)
 36 |     - [`xml_parse_data()`](#xml_parse_data)
 37 |     - [Renaming some tokens](#renaming-some-tokens)
 38 |     - [Search the parse tree with `xml2`](#search-the-parse-tree-with-xml2)
 39 |   - [License](#license)
 40 | 
 41 | ## Installation
 42 | 
 43 | Stable version:
 44 | 
 45 | ```{r}
 46 | #| eval: false
 47 | install.packages("xmlparsedata")
 48 | ```
 49 | 
 50 | Development version:
 51 | 
 52 | ```{r}
 53 | #| eval: false
 54 | pak::pak("r-lib/zip")
 55 | ```
 56 | 
 57 | ## Usage
 58 | 
 59 | ### Introduction
 60 | 
 61 | In recent R versions the parser can attach source code location
 62 | information to the parsed expressions. This information is often
 63 | useful for static analysis, e.g. code linting. It can be accessed
 64 | via the `utils::getParseData()` function.
 65 | 
 66 | `xmlparsedata` converts this information to an XML tree.
 67 | The R parser's token names are preserved in the XML as much as
 68 | possible, but some of them are not valid XML tag names, so they are
 69 | renamed, see below.
 70 | 
 71 | ### `utils::getParseData()`
 72 | 
 73 | `utils::getParseData()` summarizes the parse information in a data
 74 | frame. The data frame has one row per expression tree node, and each
 75 | node points to its parent. Here is a small example:
 76 | 
 77 | ```{r}
 78 | p <- parse(
 79 |   text = "function(a = 1, b = 2) { \n  a + b\n}\n",
 80 |   keep.source = TRUE
 81 |   )
 82 | getParseData(p)
 83 | ```
 84 | 
 85 | ### `xml_parse_data()`
 86 | 
 87 | `xmlparsedata::xml_parse_data()` converts the parse information to
 88 | an XML document. It works similarly to `getParseData()`. Specify the
 89 | `pretty = TRUE` option to pretty-indent the XML output. Note that this
 90 | has a small overhead, so if you are parsing large files, I suggest you
 91 | omit it.
 92 | 
 93 | ```{r}
 94 | library(xmlparsedata)
 95 | xml <- xml_parse_data(p, pretty = TRUE)
 96 | cat(xml)
 97 | ```
 98 | 
 99 | The top XML tag is `<exprlist>`, which is a list of
100 | expressions, each expression is an `<expr>` tag. Each tag
101 | has attributes that define the location: `line1`, `col1`,
102 | `line2`, `col2`. These are from the `getParseData()`
103 | data frame column names.
104 | 
105 | ### Renaming some tokens
106 | 
107 | The R parser's token names are preserved in the XML as much as
108 | possible, but some of them are not valid XML tag names, so they are
109 | renamed, see the `xml_parse_token_map` vector for the mapping:
110 | 
111 | ```{r}
112 | xml_parse_token_map
113 | ```
114 | 
115 | ### Search the parse tree with `xml2`
116 | 
117 | The `xml2` package can search XML documents using
118 | [XPath](https://en.wikipedia.org/wiki/XPath) expressions. This is often
119 | useful to search for specific code patterns.
120 | 
121 | As an example we search a source file from base R for `1:nrow(<expr>)`
122 | expressions, which are usually unsafe, as `nrow()` might be zero,
123 | and then the expression is equivalent to `1:0`, i.e. `c(1, 0)`, which
124 | is usually not the intended behavior.
125 | 
126 | We load and parse the file directly from the the R source code mirror
127 | at https://github.com/wch/r-source:
128 | 
129 | ```{r}
130 | url <- paste0(
131 |   "https://raw.githubusercontent.com/wch/r-source/",
132 |   "4fc93819fc7401b8695ce57a948fe163d4188f47/src/library/tools/R/xgettext.R"
133 | )
134 | src <- readLines(url)
135 | p <- parse(text = src, keep.source = TRUE)
136 | ```
137 | 
138 | and we convert it to an XML tree:
139 | 
140 | ```{r}
141 | library(xml2)
142 | xml <- read_xml(xml_parse_data(p))
143 | ```
144 | 
145 | The `1:nrow(<expr>)` expression corresponds to the following
146 | tree in R:
147 | 
148 | ```
149 | <expr>
150 |   +-- <expr>
151 |     +-- NUM_CONST: 1
152 |   +-- ':'
153 |   +-- <expr>
154 |     +-- <expr>
155 |       +-- SYMBOL_FUNCTION_CALL nrow
156 |     +-- '('
157 | 	+-- <expr>
158 | 	+-- ')'
159 | ```
160 | 
161 | ```{r}
162 | bad <- xml_parse_data(
163 |   parse(text = "1:nrow(expr)", keep.source = TRUE),
164 |   pretty = TRUE
165 | )
166 | cat(bad)
167 | ```
168 | 
169 | This translates to the following XPath expression (ignoring
170 | the last tree tokens from the `length(expr)` expressions):
171 | 
172 | ```{r}
173 | xp <- paste0(
174 |   "//expr",
175 |      "[expr[NUM_CONST[text()='1']]]",
176 |      "[OP-COLON]",
177 |      "[expr[expr[SYMBOL_FUNCTION_CALL[text()='nrow']]]]"
178 | )
179 | ```
180 | 
181 | We can search for this subtree with `xml2::xml_find_all()`:
182 | 
183 | ```{r}
184 | bad_nrow <- xml_find_all(xml, xp)
185 | bad_nrow
186 | ```
187 | 
188 | There is only one hit, in line 334:
189 | 
190 | ```{r}
191 | cbind(332:336, src[332:336])
192 | ```
193 | 
194 | ## Code of Conduct
195 | 
196 | Please note that the xmlparsedata project is released with a
197 | [Contributor Code of Conduct](https://r-lib.github.io/xmlparsedata/CODE_OF_CONDUCT.html).
198 | By contributing to this project, you agree to abide by its terms.
199 | 
200 | ## License
201 | 
202 | MIT © Mango Solutions, RStudio
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # xmlparsedata
  3 | 
  4 | > Parse Data of R Code as an ‘XML’ Tree
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![R-CMD-check](https://github.com/r-lib/xmlparsedata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/xmlparsedata/actions/workflows/R-CMD-check.yaml)
  9 | [![](https://www.r-pkg.org/badges/version/xmlparsedata)](https://www.r-pkg.org/pkg/xmlparsedata)
 10 | [![CRAN RStudio mirror
 11 | downloads](https://cranlogs.r-pkg.org/badges/xmlparsedata)](https://www.r-pkg.org/pkg/xmlparsedata)
 12 | [![Codecov test
 13 | coverage](https://codecov.io/gh/r-lib/xmlparsedata/graph/badge.svg)](https://app.codecov.io/gh/r-lib/xmlparsedata)
 14 | <!-- badges: end -->
 15 | 
 16 | Convert the output of ‘utils::getParseData()’ to an ‘XML’ tree, that is
 17 | searchable and easier to manipulate in general.
 18 | 
 19 | ------------------------------------------------------------------------
 20 | 
 21 | -   [Installation](#installation)
 22 | -   [Usage](#usage)
 23 |     -   [Introduction](#introduction)
 24 |     -   [`utils::getParseData()`](#utilsgetparsedata)
 25 |     -   [`xml_parse_data()`](#xml_parse_data)
 26 |     -   [Renaming some tokens](#renaming-some-tokens)
 27 |     -   [Search the parse tree with
 28 |         `xml2`](#search-the-parse-tree-with-xml2)
 29 | -   [License](#license)
 30 | 
 31 | ## Installation
 32 | 
 33 | Stable version:
 34 | 
 35 | ``` r
 36 | install.packages("xmlparsedata")
 37 | ```
 38 | 
 39 | Development version:
 40 | 
 41 | ``` r
 42 | pak::pak("r-lib/zip")
 43 | ```
 44 | 
 45 | ## Usage
 46 | 
 47 | ### Introduction
 48 | 
 49 | In recent R versions the parser can attach source code location
 50 | information to the parsed expressions. This information is often useful
 51 | for static analysis, e.g. code linting. It can be accessed via the
 52 | `utils::getParseData()` function.
 53 | 
 54 | `xmlparsedata` converts this information to an XML tree. The R parser’s
 55 | token names are preserved in the XML as much as possible, but some of
 56 | them are not valid XML tag names, so they are renamed, see below.
 57 | 
 58 | ### `utils::getParseData()`
 59 | 
 60 | `utils::getParseData()` summarizes the parse information in a data
 61 | frame. The data frame has one row per expression tree node, and each
 62 | node points to its parent. Here is a small example:
 63 | 
 64 | ``` r
 65 | p <- parse(
 66 |   text = "function(a = 1, b = 2) { \n  a + b\n}\n",
 67 |   keep.source = TRUE
 68 |   )
 69 | getParseData(p)
 70 | ```
 71 | 
 72 |     #>    line1 col1 line2 col2 id parent          token terminal     text
 73 |     #> 33     1    1     3    1 33      0           expr    FALSE         
 74 |     #> 1      1    1     1    8  1     33       FUNCTION     TRUE function
 75 |     #> 2      1    9     1    9  2     33            '('     TRUE        (
 76 |     #> 3      1   10     1   10  3     33 SYMBOL_FORMALS     TRUE        a
 77 |     #> 4      1   12     1   12  4     33     EQ_FORMALS     TRUE        =
 78 |     #> 5      1   14     1   14  5      6      NUM_CONST     TRUE        1
 79 |     #> 6      1   14     1   14  6     33           expr    FALSE         
 80 |     #> 7      1   15     1   15  7     33            ','     TRUE        ,
 81 |     #> 10     1   17     1   17 10     33 SYMBOL_FORMALS     TRUE        b
 82 |     #> 11     1   19     1   19 11     33     EQ_FORMALS     TRUE        =
 83 |     #> 12     1   21     1   21 12     13      NUM_CONST     TRUE        2
 84 |     #> 13     1   21     1   21 13     33           expr    FALSE         
 85 |     #> 14     1   22     1   22 14     33            ')'     TRUE        )
 86 |     #> 30     1   24     3    1 30     33           expr    FALSE         
 87 |     #> 17     1   24     1   24 17     30            '{'     TRUE        {
 88 |     #> 25     2    3     2    7 25     30           expr    FALSE         
 89 |     #> 19     2    3     2    3 19     21         SYMBOL     TRUE        a
 90 |     #> 21     2    3     2    3 21     25           expr    FALSE         
 91 |     #> 20     2    5     2    5 20     25            '+'     TRUE        +
 92 |     #> 22     2    7     2    7 22     24         SYMBOL     TRUE        b
 93 |     #> 24     2    7     2    7 24     25           expr    FALSE         
 94 |     #> 28     3    1     3    1 28     30            '}'     TRUE        }
 95 | 
 96 | ### `xml_parse_data()`
 97 | 
 98 | `xmlparsedata::xml_parse_data()` converts the parse information to an
 99 | XML document. It works similarly to `getParseData()`. Specify the
100 | `pretty = TRUE` option to pretty-indent the XML output. Note that this
101 | has a small overhead, so if you are parsing large files, I suggest you
102 | omit it.
103 | 
104 | ``` r
105 | library(xmlparsedata)
106 | xml <- xml_parse_data(p, pretty = TRUE)
107 | cat(xml)
108 | ```
109 | 
110 |     #> <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
111 |     #> <exprlist>
112 |     #>   <expr line1="1" col1="1" line2="3" col2="1" start="26" end="76">
113 |     #>     <FUNCTION line1="1" col1="1" line2="1" col2="8" start="26" end="33">function</FUNCTION>
114 |     #>     <OP-LEFT-PAREN line1="1" col1="9" line2="1" col2="9" start="34" end="34">(</OP-LEFT-PAREN>
115 |     #>     <SYMBOL_FORMALS line1="1" col1="10" line2="1" col2="10" start="35" end="35">a</SYMBOL_FORMALS>
116 |     #>     <EQ_FORMALS line1="1" col1="12" line2="1" col2="12" start="37" end="37">=</EQ_FORMALS>
117 |     #>     <expr line1="1" col1="14" line2="1" col2="14" start="39" end="39">
118 |     #>       <NUM_CONST line1="1" col1="14" line2="1" col2="14" start="39" end="39">1</NUM_CONST>
119 |     #>     </expr>
120 |     #>     <OP-COMMA line1="1" col1="15" line2="1" col2="15" start="40" end="40">,</OP-COMMA>
121 |     #>     <SYMBOL_FORMALS line1="1" col1="17" line2="1" col2="17" start="42" end="42">b</SYMBOL_FORMALS>
122 |     #>     <EQ_FORMALS line1="1" col1="19" line2="1" col2="19" start="44" end="44">=</EQ_FORMALS>
123 |     #>     <expr line1="1" col1="21" line2="1" col2="21" start="46" end="46">
124 |     #>       <NUM_CONST line1="1" col1="21" line2="1" col2="21" start="46" end="46">2</NUM_CONST>
125 |     #>     </expr>
126 |     #>     <OP-RIGHT-PAREN line1="1" col1="22" line2="1" col2="22" start="47" end="47">)</OP-RIGHT-PAREN>
127 |     #>     <expr line1="1" col1="24" line2="3" col2="1" start="49" end="76">
128 |     #>       <OP-LEFT-BRACE line1="1" col1="24" line2="1" col2="24" start="49" end="49">{</OP-LEFT-BRACE>
129 |     #>       <expr line1="2" col1="3" line2="2" col2="7" start="53" end="57">
130 |     #>         <expr line1="2" col1="3" line2="2" col2="3" start="53" end="53">
131 |     #>           <SYMBOL line1="2" col1="3" line2="2" col2="3" start="53" end="53">a</SYMBOL>
132 |     #>         </expr>
133 |     #>         <OP-PLUS line1="2" col1="5" line2="2" col2="5" start="55" end="55">+</OP-PLUS>
134 |     #>         <expr line1="2" col1="7" line2="2" col2="7" start="57" end="57">
135 |     #>           <SYMBOL line1="2" col1="7" line2="2" col2="7" start="57" end="57">b</SYMBOL>
136 |     #>         </expr>
137 |     #>       </expr>
138 |     #>       <OP-RIGHT-BRACE line1="3" col1="1" line2="3" col2="1" start="76" end="76">}</OP-RIGHT-BRACE>
139 |     #>     </expr>
140 |     #>   </expr>
141 |     #> </exprlist>
142 | 
143 | The top XML tag is `<exprlist>`, which is a list of expressions, each
144 | expression is an `<expr>` tag. Each tag has attributes that define the
145 | location: `line1`, `col1`, `line2`, `col2`. These are from the
146 | `getParseData()` data frame column names.
147 | 
148 | ### Renaming some tokens
149 | 
150 | The R parser’s token names are preserved in the XML as much as possible,
151 | but some of them are not valid XML tag names, so they are renamed, see
152 | the `xml_parse_token_map` vector for the mapping:
153 | 
154 | ``` r
155 | xml_parse_token_map
156 | ```
157 | 
158 |     #>                '?'                '~'                '+'                '-' 
159 |     #>      "OP-QUESTION"         "OP-TILDE"          "OP-PLUS"         "OP-MINUS" 
160 |     #>                '*'                '/'                ':'                '^' 
161 |     #>          "OP-STAR"         "OP-SLASH"         "OP-COLON"         "OP-CARET" 
162 |     #>                '$'                '@'                '('                '[' 
163 |     #>        "OP-DOLLAR"            "OP-AT"    "OP-LEFT-PAREN"  "OP-LEFT-BRACKET" 
164 |     #>                ';'                '{'                '}'                ')' 
165 |     #>     "OP-SEMICOLON"    "OP-LEFT-BRACE"   "OP-RIGHT-BRACE"   "OP-RIGHT-PAREN" 
166 |     #>                '!'                ']'                ','             '\\\\' 
167 |     #>   "OP-EXCLAMATION" "OP-RIGHT-BRACKET"         "OP-COMMA"        "OP-LAMBDA"
168 | 
169 | ### Search the parse tree with `xml2`
170 | 
171 | The `xml2` package can search XML documents using
172 | [XPath](https://en.wikipedia.org/wiki/XPath) expressions. This is often
173 | useful to search for specific code patterns.
174 | 
175 | As an example we search a source file from base R for `1:nrow(<expr>)`
176 | expressions, which are usually unsafe, as `nrow()` might be zero, and
177 | then the expression is equivalent to `1:0`, i.e. `c(1, 0)`, which is
178 | usually not the intended behavior.
179 | 
180 | We load and parse the file directly from the the R source code mirror at
181 | <https://github.com/wch/r-source>:
182 | 
183 | ``` r
184 | url <- paste0(
185 |   "https://raw.githubusercontent.com/wch/r-source/",
186 |   "4fc93819fc7401b8695ce57a948fe163d4188f47/src/library/tools/R/xgettext.R"
187 | )
188 | src <- readLines(url)
189 | p <- parse(text = src, keep.source = TRUE)
190 | ```
191 | 
192 | and we convert it to an XML tree:
193 | 
194 | ``` r
195 | library(xml2)
196 | xml <- read_xml(xml_parse_data(p))
197 | ```
198 | 
199 | The `1:nrow(<expr>)` expression corresponds to the following tree in R:
200 | 
201 |     <expr>
202 |       +-- <expr>
203 |         +-- NUM_CONST: 1
204 |       +-- ':'
205 |       +-- <expr>
206 |         +-- <expr>
207 |           +-- SYMBOL_FUNCTION_CALL nrow
208 |         +-- '('
209 |         +-- <expr>
210 |         +-- ')'
211 | 
212 | ``` r
213 | bad <- xml_parse_data(
214 |   parse(text = "1:nrow(expr)", keep.source = TRUE),
215 |   pretty = TRUE
216 | )
217 | cat(bad)
218 | ```
219 | 
220 |     #> <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
221 |     #> <exprlist>
222 |     #>   <expr line1="1" col1="1" line2="1" col2="12" start="14" end="25">
223 |     #>     <expr line1="1" col1="1" line2="1" col2="1" start="14" end="14">
224 |     #>       <NUM_CONST line1="1" col1="1" line2="1" col2="1" start="14" end="14">1</NUM_CONST>
225 |     #>     </expr>
226 |     #>     <OP-COLON line1="1" col1="2" line2="1" col2="2" start="15" end="15">:</OP-COLON>
227 |     #>     <expr line1="1" col1="3" line2="1" col2="12" start="16" end="25">
228 |     #>       <expr line1="1" col1="3" line2="1" col2="6" start="16" end="19">
229 |     #>         <SYMBOL_FUNCTION_CALL line1="1" col1="3" line2="1" col2="6" start="16" end="19">nrow</SYMBOL_FUNCTION_CALL>
230 |     #>       </expr>
231 |     #>       <OP-LEFT-PAREN line1="1" col1="7" line2="1" col2="7" start="20" end="20">(</OP-LEFT-PAREN>
232 |     #>       <expr line1="1" col1="8" line2="1" col2="11" start="21" end="24">
233 |     #>         <SYMBOL line1="1" col1="8" line2="1" col2="11" start="21" end="24">expr</SYMBOL>
234 |     #>       </expr>
235 |     #>       <OP-RIGHT-PAREN line1="1" col1="12" line2="1" col2="12" start="25" end="25">)</OP-RIGHT-PAREN>
236 |     #>     </expr>
237 |     #>   </expr>
238 |     #> </exprlist>
239 | 
240 | This translates to the following XPath expression (ignoring the last
241 | tree tokens from the `length(expr)` expressions):
242 | 
243 | ``` r
244 | xp <- paste0(
245 |   "//expr",
246 |      "[expr[NUM_CONST[text()='1']]]",
247 |      "[OP-COLON]",
248 |      "[expr[expr[SYMBOL_FUNCTION_CALL[text()='nrow']]]]"
249 | )
250 | ```
251 | 
252 | We can search for this subtree with `xml2::xml_find_all()`:
253 | 
254 | ``` r
255 | bad_nrow <- xml_find_all(xml, xp)
256 | bad_nrow
257 | ```
258 | 
259 |     #> {xml_nodeset (1)}
260 |     #> [1] <expr line1="334" col1="19" line2="334" col2="27" start="38763" end="3877 ...
261 | 
262 | There is only one hit, in line 334:
263 | 
264 | ``` r
265 | cbind(332:336, src[332:336])
266 | ```
267 | 
268 |     #>      [,1]  [,2]                                           
269 |     #> [1,] "332" "\tcat(\"No errors\\n\")"                      
270 |     #> [2,] "333" "    else"                                     
271 |     #> [3,] "334" "\tfor (i in 1:nrow(x)) {"                     
272 |     #> [4,] "335" "\t    if (is.na(x[i, 2L])) cols <- c(1L, 3:5)"
273 |     #> [5,] "336" "\t    else cols <- 1:5"
274 | 
275 | ## Code of Conduct
276 | 
277 | Please note that the xmlparsedata project is released with a
278 | [Contributor Code of
279 | Conduct](https://r-lib.github.io/xmlparsedata/CODE_OF_CONDUCT.html). By
280 | contributing to this project, you agree to abide by its terms.
281 | 
282 | ## License
283 | 
284 | MIT © Mango Solutions, RStudio
285 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://r-lib.github.io/xmlparsedata/
 2 | 
 3 | template:
 4 |   package: tidytemplate
 5 |   bootstrap: 5
 6 |   includes:
 7 |     in_header: |
 8 |       <script defer data-domain="r-lib.github.io/xmlparsedata,all.tidyverse.org" src="https://plausible.io/js/plausible.js"></script>
 9 | 
10 | development:
11 |   mode: auto
12 | 


--------------------------------------------------------------------------------
/air.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-lib/xmlparsedata/e6c9977f518bb6006328c30e4cbd21f54ab00475/air.toml


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/man/expr_as_xml.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/expr_as_xml.R
 3 | \name{expr_as_xml}
 4 | \alias{expr_as_xml}
 5 | \title{Get an XML representation of an expression}
 6 | \usage{
 7 | expr_as_xml(expr)
 8 | }
 9 | \arguments{
10 | \item{expr}{An expression.}
11 | }
12 | \description{
13 | Get an XML representation of an expression
14 | }
15 | 


--------------------------------------------------------------------------------
/man/xml_parse_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \name{xml_parse_data}
 4 | \alias{xml_parse_data}
 5 | \title{Convert R parse data to XML}
 6 | \usage{
 7 | xml_parse_data(x, includeText = NA, pretty = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{
11 |     an expression returned from \code{\link{parse}}, or a function or other
12 |     object with source reference information
13 |   }
14 | 
15 | \item{includeText}{
16 |     logical; whether to include the text of parsed items in the result
17 |   }
18 | 
19 | \item{pretty}{Whether to pretty-indent the XML output. It has a small
20 | overhead which probably only matters for very large source files.}
21 | }
22 | \value{
23 | An XML string representing the parse data. See details below.
24 | }
25 | \description{
26 | In recent R versions the parser can attach source code location
27 | information to the parsed expressions. This information is often
28 | useful for static analysis, e.g. code linting. It can be accessed
29 | via the \code{\link[utils:getParseData]{utils::getParseData()}} function.
30 | }
31 | \details{
32 | \code{xml_parse_data()} converts this information to an XML tree.
33 | The R parser's token names are preserved in the XML as much as
34 | possible, but some of them are not valid XML tag names, so they are
35 | renamed, see the \link{xml_parse_token_map} vector for the
36 | mapping.
37 | 
38 | The top XML tag is \verb{<exprlist>}, which is a list of
39 | expressions, each expression is an \verb{<expr>} tag. Each tag
40 | has attributes that define the location: \code{line1}, \code{col1},
41 | \code{line2}, \code{col2}. These are from the \code{\link[=getParseData]{getParseData()}}
42 | data frame column names. Next, there are two attributes,
43 | \code{start} and \code{end}, which can be used as an ordering of
44 | expressions in the document. Note that while the values
45 | are correlated with (and in some cases may match exactly)
46 | positions in the document, this cannot be relied upon.
47 | 
48 | See an example below. See also the README at
49 | \url{https://github.com/r-lib/xmlparsedata#readme}
50 | for examples on how to search the XML tree with the \code{xml2} package
51 | and XPath expressions.
52 | 
53 | Note that \code{xml_parse_data()} silently drops all control characters
54 | (0x01-0x1f) from the input, except horizontal tab (0x09) and newline
55 | (0x0a), because they are invalid in XML 1.0.
56 | }
57 | \examples{
58 | code <- "function(a = 1, b = 2) {\n  a + b\n}\n"
59 | expr <- parse(text = code, keep.source = TRUE)
60 | 
61 | # The base R way:
62 | getParseData(expr)
63 | 
64 | cat(xml_parse_data(expr, pretty = TRUE))
65 | }
66 | \seealso{
67 | \link{xml_parse_token_map} for the token names.
68 | \url{https://github.com/r-lib/xmlparsedata#readme} for more
69 | information and use cases.
70 | }
71 | 


--------------------------------------------------------------------------------
/man/xml_parse_token_map.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \docType{data}
 4 | \name{xml_parse_token_map}
 5 | \alias{xml_parse_token_map}
 6 | \title{Map token names of the R parser to token names in
 7 | \code{\link[=xml_parse_data]{xml_parse_data()}}}
 8 | \format{
 9 | An object of class \code{character} of length 20.
10 | }
11 | \usage{
12 | xml_parse_token_map
13 | }
14 | \description{
15 | Some of the R token names are not valid XML tag names,
16 | so \code{\link[=xml_parse_data]{xml_parse_data()}} needs to replace them to create a
17 | valid XML file.
18 | }
19 | \seealso{
20 | \code{\link[=xml_parse_data]{xml_parse_data()}}
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/xmlparsedata-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xmlparsedata-package.R
 3 | \docType{package}
 4 | \name{xmlparsedata-package}
 5 | \alias{xmlparsedata-package}
 6 | \alias{_PACKAGE}
 7 | \title{xmlparsedata: Parse Data of 'R' Code as an 'XML' Tree}
 8 | \description{
 9 | Convert the output of 'utils::getParseData()' to an 'XML' tree, that one can search via 'XPath', and easier to manipulate in general.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/r-lib/xmlparsedata#readme}
15 |   \item \url{https://r-lib.github.io/xmlparsedata/}
16 |   \item Report bugs at \url{https://github.com/r-lib/xmlparsedata/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Gábor Csárdi \email{csardi.gabor@gmail.com}
22 | 
23 | Other contributors:
24 | \itemize{
25 |   \item Posit Software, PBC [copyright holder, funder]
26 |   \item Mango Solutions [copyright holder, funder]
27 | }
28 | 
29 | }
30 | \keyword{internal}
31 | 


--------------------------------------------------------------------------------
/man/xmlparsedata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \docType{package}
 4 | \name{xmlparsedata}
 5 | \alias{xmlparsedata}
 6 | \title{Parse Data of R Code as an 'XML' Tree}
 7 | \description{
 8 | Convert the output of 'utils::getParseData()' to an 'XML' tree, that is
 9 | searchable and easier to manipulate in general.
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(xmlparsedata)
3 | 
4 | if (requireNamespace("xml2", quietly = TRUE)) {
5 |   test_check("xmlparsedata")
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/testthat/test-expr_as_xml.R:
--------------------------------------------------------------------------------
 1 | test_that("XML object is returned with correct structure", {
 2 |   skip_if_not_installed("xml2")
 3 | 
 4 |   expect_silent({
 5 |     expr_xml <- expr_as_xml(mtcars[, "cyl"])
 6 |   })
 7 |   expect_s3_class(expr_xml, "xml_document")
 8 |   expect_identical(
 9 |     vapply(
10 |       xml2::xml_children(xml2::xml_child(expr_xml)),
11 |       xml2::xml_name,
12 |       character(1L)
13 |     ),
14 |     c("expr", "OP-LEFT-BRACKET", "OP-COMMA", "expr", "OP-RIGHT-BRACKET")
15 |   )
16 | })
17 | 
18 | test_that("multi-expression case also works", {
19 |   expect_silent({
20 |     expr_xml <- expr_as_xml({
21 |       1 + 1
22 |       sqrt(rnorm(100))
23 |     })
24 |   })
25 |   expect_identical(xml2::xml_name(expr_xml), "exprlist")
26 |   # `{`, `1 + 1`, `sqrt(...)`, and `}`
27 |   expect_length(xml2::xml_children(xml2::xml_child(expr_xml)), 4L)
28 | })
29 | 
30 | test_that("literals are also fine", {
31 |   expect_silent(expr_as_xml("a b c"))
32 |   expect_silent(expr_as_xml(100L))
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_parse_data.R:
--------------------------------------------------------------------------------
  1 | test_that("empty input", {
  2 |   xml <- xml_parse_data(parse(text = "", keep.source = TRUE))
  3 |   expect_true(is.character(xml))
  4 |   expect_true(length(xml) == 1)
  5 |   expect_match(xml, "<exprlist>\\s*</exprlist>")
  6 |   expect_silent(x <- xml2::read_xml(xml))
  7 | })
  8 | 
  9 | test_that("trivial input", {
 10 |   xml <- xml_parse_data(parse(text = "# comment\n", keep.source = TRUE))
 11 |   expect_true(is.character(xml))
 12 |   expect_true(length(xml) == 1)
 13 |   expect_match(xml, "<exprlist>\\s*<COMMENT [^<]*</COMMENT>\\s*</exprlist>")
 14 |   expect_silent(x <- xml2::read_xml(xml))
 15 | 
 16 |   xml <- xml_parse_data(parse(text = "1", keep.source = TRUE))
 17 |   expect_match(
 18 |     xml,
 19 |     paste0(
 20 |       "<exprlist>\\s*<expr [^<]*<NUM_CONST.*</NUM_CONST>\\s*",
 21 |       "</expr>\\s*</exprlist>"
 22 |     )
 23 |   )
 24 |   expect_silent(x <- xml2::read_xml(xml))
 25 | })
 26 | 
 27 | test_that("non-trivial input", {
 28 |   ip <- deparse(utils::install.packages)
 29 |   xml <- xml_parse_data(parse(text = ip, keep.source = TRUE))
 30 |   expect_silent(x <- xml2::read_xml(xml))
 31 | 
 32 |   dp <- deparse(utils::install.packages)
 33 |   xml <- xml_parse_data(
 34 |     parse(text = dp, keep.source = TRUE),
 35 |     pretty = TRUE
 36 |   )
 37 |   expect_silent(x <- xml2::read_xml(xml))
 38 | })
 39 | 
 40 | test_that("UTF-8 is OK", {
 41 |   src <- enc2native("# comment with éápő")
 42 |   xml <- xml_parse_data(parse(text = src, keep.source = TRUE))
 43 |   x <- xml2::read_xml(xml)
 44 | 
 45 |   comment <- xml2::xml_children(x)
 46 |   col1 <- xml2::xml_attr(comment, "col1")
 47 |   col2 <- xml2::xml_attr(comment, "col2")
 48 | 
 49 |   expect_equal(
 50 |     substring(src, col1, col2),
 51 |     src
 52 |   )
 53 | 
 54 |   src <- enc2native("# 現行の学校文法では、英語にあるような「目的語」「補語」")
 55 |   xml <- xml_parse_data(parse(text = src, keep.source = TRUE))
 56 |   x <- xml2::read_xml(xml)
 57 | 
 58 |   comment <- xml2::xml_children(x)
 59 |   col1 <- xml2::xml_attr(comment, "col1")
 60 |   col2 <- xml2::xml_attr(comment, "col2")
 61 | 
 62 |   expect_equal(
 63 |     substring(src, col1, col2),
 64 |     iconv(src, to = "UTF-8")
 65 |   )
 66 | 
 67 |   src <- enc2native("`%ééé%` <- function(l, r) l + r")
 68 |   xml <- xml_parse_data(parse(text = src, keep.source = TRUE), pretty = TRUE)
 69 | 
 70 |   op <- xml2::xml_find_all(
 71 |     xml2::read_xml(xml),
 72 |     iconv(
 73 |       enc2native("/exprlist/expr/expr/SYMBOL[text()='`%ééé%`']"),
 74 |       to = "UTF-8"
 75 |     )
 76 |   )
 77 |   expect_equal(length(op), 1)
 78 | })
 79 | 
 80 | test_that("data frame input", {
 81 |   p <- parse(text = "1 + 1", keep.source = TRUE)
 82 | 
 83 |   pd <- getParseData(p)
 84 |   attr(pd, "srcfile") <- NULL
 85 |   class(pd) <- "data.frame"
 86 |   x1 <- xml_parse_data(pd)
 87 | 
 88 |   x2 <- xml_parse_data(p)
 89 | 
 90 |   expect_equal(x1, x2)
 91 | })
 92 | 
 93 | 
 94 | test_that("Control-C character", {
 95 |   src <- "# Control-C \003
 96 |           # Bell  \007
 97 |           # Escape \027
 98 |           # Form feed \f
 99 |           # Vertical tab \t
100 |           "
101 |   xml <- xml_parse_data(parse(text = src, keep.source = TRUE))
102 |   x <- xml2::read_xml(xml)
103 |   expect_s3_class(x, "xml_document")
104 | })
105 | 
106 | 
107 | test_that("equal_assign is handled on R 3.6", {
108 |   # `a = 1` is an example of an R statement that gets parsed into nested xml
109 |   # nodes that have different token / tagnames (following the introduction of
110 |   # the `equal_assign` token to getParseData() in R-3.6), but the same ending
111 |   # position in the original code. Tokens/expressions that start before should
112 |   # end after any nested subexpressions in the resulting xml:
113 | 
114 |   xml <- xml_parse_data(parse(text = "a = 1", keep.source = TRUE))
115 |   expect_true(is.character(xml))
116 |   expect_true(length(xml) == 1)
117 |   expect_silent(x <- xml2::read_xml(xml))
118 | })
119 | 
120 | test_that("includeText=FALSE works", {
121 |   # getParseData(..., includeText = FALSE) returns a data.frame
122 |   # without `text` column. xml_parse_data should handle this case
123 |   # correctly and the resulting xml text should not contain text
124 |   # elements.
125 |   xml <- xml_parse_data(
126 |     parse(text = "x <- 1", keep.source = TRUE),
127 |     includeText = FALSE
128 |   )
129 |   expect_true(is.character(xml))
130 |   expect_true(length(xml) == 1)
131 |   expect_silent(x <- xml2::read_xml(xml))
132 |   expect_true(xml2::xml_text(x) == "")
133 | })
134 | 
135 | test_that("lambda operator works", {
136 |   testthat::skip_if_not(
137 |     getRversion() >= "4.1.0" && as.numeric(R.version[["svn rev"]]) >= 79553
138 |   )
139 |   # r-devel rev 79553 introduces native pipe syntax (|>) and lambda expression (e.g \(x) x + 1).
140 |   xml <- xml_parse_data(parse(text = "\\(x) x + 1", keep.source = TRUE))
141 |   expect_true(is.character(xml))
142 |   expect_true(length(xml) == 1)
143 |   expect_silent(x <- xml2::read_xml(xml))
144 |   expect_true(length(xml2::xml_find_all(x, "//OP-LAMBDA")) == 1)
145 | })
146 | 
147 | test_that("narrow octal strings are parsed correctly", {
148 |   expect_match(
149 |     xml_parse_data(parse(text = "'\\1'", keep.source = TRUE)),
150 |     "'\\1'",
151 |     fixed = TRUE
152 |   )
153 |   expect_match(
154 |     xml_parse_data(parse(text = '"\\1"', keep.source = TRUE)),
155 |     '"\\1"',
156 |     fixed = TRUE
157 |   )
158 | 
159 |   # multiple literals
160 |   expect_match(
161 |     xml_parse_data(parse(text = "'\\1'\n'\\2'", keep.source = TRUE)),
162 |     "'[\\]1'.*'[\\]2'"
163 |   )
164 |   # multiple escapes
165 |   expect_match(
166 |     xml_parse_data(parse(text = "'\\1\\2'", keep.source = TRUE)),
167 |     "'\\1\\2'",
168 |     fixed = TRUE
169 |   )
170 |   # multi-line strings
171 |   expect_match(
172 |     xml_parse_data(parse(text = "'\n\\1\n'", keep.source = TRUE)),
173 |     "'\n\\1\n'",
174 |     fixed = TRUE
175 |   )
176 |   expect_match(
177 |     xml_parse_data(parse(text = "a <- '\\1\n\\2'", keep.source = TRUE)),
178 |     "'\\1\n\\2'",
179 |     fixed = TRUE
180 |   )
181 |   # mixed-length strings
182 |   expect_match(
183 |     xml_parse_data(parse(
184 |       text = "foo('\\1',\n  '\n\\2\n')",
185 |       keep.source = TRUE
186 |     )),
187 |     "'[\\]1'.*'\n[\\]2\n'"
188 |   )
189 | })
190 | 


--------------------------------------------------------------------------------