├── .Rbuildignore ├── .github ├── .gitignore ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── ISSUE_TEMPLATE │ └── issue_template.md ├── SUPPORT.md └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ ├── recheck.yaml │ ├── rhub.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── S4.R ├── as_list.R ├── as_xml_document.R ├── classes.R ├── format.R ├── import-standalone-obj-type.R ├── import-standalone-purrr.R ├── import-standalone-types-check.R ├── init.R ├── nodeset_apply.R ├── paths.R ├── utils.R ├── xml2-package.R ├── xml_attr.R ├── xml_children.R ├── xml_document.R ├── xml_find.R ├── xml_missing.R ├── xml_modify.R ├── xml_name.R ├── xml_namespaces.R ├── xml_node.R ├── xml_nodeset.R ├── xml_parse.R ├── xml_path.R ├── xml_schema.R ├── xml_serialize.R ├── xml_structure.R ├── xml_text.R ├── xml_type.R ├── xml_url.R ├── xml_write.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── cleanup ├── codecov.yml ├── configure ├── configure.win ├── cran-comments.md ├── docker └── r-devel-san │ └── Dockerfile ├── inst ├── extdata │ ├── cd_catalog.xml │ ├── order-doc.xml │ ├── order-schema.xml │ └── r-project.html └── include │ └── xml2_types.h ├── man ├── as_list.Rd ├── as_xml_document.Rd ├── download_xml.Rd ├── figures │ ├── lifecycle-archived.svg │ ├── lifecycle-defunct.svg │ ├── lifecycle-deprecated.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-maturing.svg │ ├── lifecycle-questioning.svg │ ├── lifecycle-soft-deprecated.svg │ ├── lifecycle-stable.svg │ └── lifecycle-superseded.svg ├── oldclass.Rd ├── read_xml.Rd ├── url_absolute.Rd ├── url_escape.Rd ├── url_parse.Rd ├── write_xml.Rd ├── xml2-package.Rd ├── xml2_example.Rd ├── xml_attr.Rd ├── xml_cdata.Rd ├── xml_children.Rd ├── xml_comment.Rd ├── xml_dtd.Rd ├── xml_find_all.Rd ├── xml_missing.Rd ├── xml_name.Rd ├── xml_new_document.Rd ├── xml_ns.Rd ├── xml_ns_strip.Rd ├── xml_path.Rd ├── xml_replace.Rd ├── xml_serialize.Rd ├── xml_set_namespace.Rd ├── xml_structure.Rd ├── xml_text.Rd ├── xml_type.Rd ├── xml_url.Rd └── xml_validate.Rd ├── revdep ├── .gitignore ├── README.md ├── cran.md ├── email.yml ├── failures.md └── problems.md ├── src ├── .gitignore ├── Makevars.in ├── Makevars.win ├── connection.cpp ├── connection.h ├── init.c ├── xml2_doc.cpp ├── xml2_init.c ├── xml2_namespace.cpp ├── xml2_node.cpp ├── xml2_output.cpp ├── xml2_schema.cpp ├── xml2_url.cpp ├── xml2_utils.h └── xml2_xpath.cpp ├── tests ├── testthat.R └── testthat │ ├── _snaps │ ├── xml_attr.md │ ├── xml_children.md │ ├── xml_document.md │ ├── xml_find.md │ ├── xml_name.md │ ├── xml_node.md │ ├── xml_nodeset.md │ ├── xml_parse.md │ ├── xml_structure.md │ └── xml_write.md │ ├── helper.R │ ├── lego.html.bz2 │ ├── ns-multiple-aliases.xml │ ├── ns-multiple-default.xml │ ├── ns-multiple-prefix.xml │ ├── ns-multiple.xml │ ├── records.dtd │ ├── records.xml │ ├── setup.R │ ├── test-as_list.R │ ├── test-as_xml_document.R │ ├── test-classes.R │ ├── test-format.R │ ├── test-null.R │ ├── test-xml_attr.R │ ├── test-xml_children.R │ ├── test-xml_document.R │ ├── test-xml_find.R │ ├── test-xml_missing.R │ ├── test-xml_modify.R │ ├── test-xml_name.R │ ├── test-xml_namespaces.R │ ├── test-xml_node.R │ ├── test-xml_nodeset.R │ ├── test-xml_parse.R │ ├── test-xml_schema.R │ ├── test-xml_serialize.R │ ├── test-xml_structure.R │ ├── test-xml_text.R │ ├── test-xml_type.R │ ├── test-xml_url.R │ ├── test-xml_write.R │ └── xml ├── tools └── winlibs.R ├── vignettes └── modification.Rmd └── xml2.Rproj /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | windows 4 | .travis.yml 5 | ^cran-comments\.md$ 6 | ^revdep$ 7 | ^src/Makevars$ 8 | ^codecov\.yml$ 9 | ^docker$ 10 | ^src/\.ycm_extra_conf\.py$ 11 | ^src/\.ycm_extra_conf\.pyc$ 12 | ^appveyor\.yml$ 13 | ^script\.R$ 14 | ^\.github$ 15 | ^TODO\.md$ 16 | ^_pkgdown.yml$ 17 | ^docs/$ 18 | ^docs$ 19 | ^CRAN-RELEASE$ 20 | ^LICENSE\.md$ 21 | ^Makefile$ 22 | \.dll$ 23 | \.o$ 24 | \.so$ 25 | ^CRAN-SUBMISSION$ 26 | ^README\.Rmd$ 27 | ^configure.log$ 28 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # CODEOWNERS for xml2 2 | # https://www.tidyverse.org/development/understudies 3 | .github/CODEOWNERS @jimhester @jennybc 4 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to xml2 2 | 3 | This outlines how to propose a change to xml2. 4 | For more detailed info about contributing to this, and other tidyverse packages, please see the 5 | [**development contributing guide**](https://rstd.io/tidy-contrib). 6 | 7 | ## Fixing typos 8 | 9 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 10 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 11 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line. 12 | 13 | ## Bigger changes 14 | 15 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 16 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 17 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed). 18 | 19 | ### Pull request process 20 | 21 | * Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("r-lib/xml2", fork = TRUE)`. 22 | 23 | * Install all development dependences with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 24 | If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 25 | * Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`. 26 | 27 | * Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser. 28 | The title of your PR should briefly describe the change. 29 | The body of your PR should contain `Fixes #issue-number`. 30 | 31 | * For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in . 32 | 33 | ### Code style 34 | 35 | * New code should follow the tidyverse [style guide](https://style.tidyverse.org). 36 | You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR. 37 | 38 | * We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/markdown.html), for documentation. 39 | 40 | * We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 41 | Contributions with test cases included are easier to accept. 42 | 43 | ## Code of Conduct 44 | 45 | Please note that the xml2 project is released with a 46 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this 47 | project you agree to abide by its terms. 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Issue Description and Expected Result 2 | 3 | 4 | ### Reproducible Example 5 | 20 | 21 |
22 | Session Info 23 | 24 | ```r 25 | devtools::session_info() # Paste output below 26 | #> output 27 | ``` 28 |
29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report or feature request 3 | about: Describe a bug you've seen or make a case for a new feature 4 | --- 5 | 6 | Please briefly describe your problem and what output you expect. If you have a question, please don't use this form. Instead, ask on or . 7 | 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . 9 | 10 | Brief description of the problem 11 | 12 | ```r 13 | # insert reprex here 14 | ``` 15 | -------------------------------------------------------------------------------- /.github/SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Getting help with xml2 2 | 3 | Thanks for using xml2! 4 | Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible. 5 | 6 | ## Make a reprex 7 | 8 | Start by making a minimal **repr**oducible **ex**ample using the [reprex](https://reprex.tidyverse.org/) package. 9 | If you haven't heard of or used reprex before, you're in for a treat! 10 | Seriously, reprex will make all of your R-question-asking endeavors easier (which is a pretty insane ROI for the five to ten minutes it'll take you to learn what it's all about). 11 | For additional reprex pointers, check out the [Get help!](https://www.tidyverse.org/help/) section of the tidyverse site. 12 | 13 | ## Where to ask? 14 | 15 | Armed with your reprex, the next step is to figure out [where to ask](https://www.tidyverse.org/help/#where-to-ask). 16 | 17 | * If it's a question: start with [community.rstudio.com](https://community.rstudio.com/), and/or StackOverflow. There are more people there to answer questions. 18 | 19 | * If it's a bug: you're in the right place, [file an issue](https://github.com/r-lib/xml2/issues/new). 20 | 21 | * If you're not sure: let the community help you figure it out! 22 | If your problem _is_ a bug or a feature request, you can easily return here and report it. 23 | 24 | Before opening a new issue, be sure to [search issues and pull requests](https://github.com/r-lib/xml2/issues) to make sure the bug hasn't been reported and/or already fixed in the development version. 25 | By default, the search will be pre-populated with `is:issue is:open`. 26 | You can [edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/) (e.g. `is:pr`, `is:closed`) as needed. 27 | For example, you'd simply remove `is:open` to search _all_ issues in the repo, open or closed. 28 | 29 | ## What happens next? 30 | 31 | To be as efficient as possible, development of tidyverse packages tends to be very bursty, so you shouldn't worry if you don't get an immediate response. 32 | Typically we don't look at a repo until a sufficient quantity of issues accumulates, then there’s a burst of intense activity as we focus our efforts. 33 | That makes development more efficient because it avoids expensive context switching between problems, at the cost of taking longer to get back to you. 34 | This process makes a good reprex particularly important because it might be multiple months between your initial report and when we start working on it. 35 | If we can’t reproduce the bug, we can’t fix it! 36 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | branches: [main, master] 12 | 13 | name: R-CMD-check.yaml 14 | 15 | permissions: read-all 16 | 17 | jobs: 18 | R-CMD-check: 19 | runs-on: ${{ matrix.config.os }} 20 | 21 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | config: 27 | - {os: macos-latest, r: 'release'} 28 | 29 | - {os: windows-latest, r: 'release'} 30 | - {os: windows-latest, r: 'oldrel-1'} 31 | - {os: windows-latest, r: 'oldrel-2'} 32 | - {os: windows-latest, r: 'oldrel-3'} 33 | - {os: windows-latest, r: 'oldrel-4'} 34 | 35 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 36 | - {os: ubuntu-latest, r: 'release'} 37 | - {os: ubuntu-latest, r: 'oldrel-1'} 38 | - {os: ubuntu-latest, r: 'oldrel-2'} 39 | - {os: ubuntu-latest, r: 'oldrel-3'} 40 | - {os: ubuntu-latest, r: 'oldrel-4'} 41 | 42 | env: 43 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 44 | R_KEEP_PKG_SOURCE: yes 45 | 46 | steps: 47 | - uses: actions/checkout@v4 48 | 49 | - uses: r-lib/actions/setup-pandoc@v2 50 | 51 | - uses: r-lib/actions/setup-r@v2 52 | with: 53 | r-version: ${{ matrix.config.r }} 54 | http-user-agent: ${{ matrix.config.http-user-agent }} 55 | use-public-rspm: true 56 | 57 | - uses: r-lib/actions/setup-r-dependencies@v2 58 | with: 59 | extra-packages: any::rcmdcheck 60 | needs: check 61 | 62 | - uses: r-lib/actions/check-r-package@v2 63 | with: 64 | upload-snapshots: true 65 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 66 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown.yaml 13 | 14 | permissions: read-all 15 | 16 | jobs: 17 | pkgdown: 18 | runs-on: ubuntu-latest 19 | # Only restrict concurrency for non-PR jobs 20 | concurrency: 21 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 22 | env: 23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 24 | permissions: 25 | contents: write 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::pkgdown, local::. 38 | needs: website 39 | 40 | - name: Build site 41 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 42 | shell: Rscript {0} 43 | 44 | - name: Deploy to GitHub pages 🚀 45 | if: github.event_name != 'pull_request' 46 | uses: JamesIves/github-pages-deploy-action@v4.5.0 47 | with: 48 | clean: false 49 | branch: gh-pages 50 | folder: docs 51 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: pr-commands.yaml 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | document: 13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 14 | name: document 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/pr-fetch@v2 24 | with: 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::roxygen2 34 | needs: pr-document 35 | 36 | - name: Document 37 | run: roxygen2::roxygenise() 38 | shell: Rscript {0} 39 | 40 | - name: commit 41 | run: | 42 | git config --local user.name "$GITHUB_ACTOR" 43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 44 | git add man/\* NAMESPACE 45 | git commit -m 'Document' 46 | 47 | - uses: r-lib/actions/pr-push@v2 48 | with: 49 | repo-token: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | style: 52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 53 | name: style 54 | runs-on: ubuntu-latest 55 | env: 56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 57 | permissions: 58 | contents: write 59 | steps: 60 | - uses: actions/checkout@v4 61 | 62 | - uses: r-lib/actions/pr-fetch@v2 63 | with: 64 | repo-token: ${{ secrets.GITHUB_TOKEN }} 65 | 66 | - uses: r-lib/actions/setup-r@v2 67 | 68 | - name: Install dependencies 69 | run: install.packages("styler") 70 | shell: Rscript {0} 71 | 72 | - name: Style 73 | run: styler::style_pkg() 74 | shell: Rscript {0} 75 | 76 | - name: commit 77 | run: | 78 | git config --local user.name "$GITHUB_ACTOR" 79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 80 | git add \*.R 81 | git commit -m 'Style' 82 | 83 | - uses: r-lib/actions/pr-push@v2 84 | with: 85 | repo-token: ${{ secrets.GITHUB_TOKEN }} 86 | -------------------------------------------------------------------------------- /.github/workflows/recheck.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | inputs: 4 | which: 5 | type: choice 6 | description: Which dependents to check 7 | options: 8 | - strong 9 | - most 10 | schedule: 11 | - cron: '0 4 1 * *' 12 | 13 | name: Reverse dependency check 14 | 15 | jobs: 16 | revdep_check: 17 | name: Reverse check ${{ inputs.which }} dependents 18 | uses: r-devel/recheck/.github/workflows/recheck.yml@v1 19 | with: 20 | which: ${{ inputs.which }} 21 | subdirectory: '' 22 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's genetic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/rhub2/blob/v1/inst/workflow/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub2::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }} (${{ github.event.inputs.id }}) 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/rhub2/actions/rhub-setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: actions/checkout@v3 55 | - uses: r-hub/rhub2/actions/rhub-check@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | 60 | other-platforms: 61 | needs: setup 62 | if: ${{ needs.setup.outputs.platforms != '[]' }} 63 | runs-on: ${{ matrix.config.os }} 64 | name: ${{ matrix.config.label }} 65 | strategy: 66 | fail-fast: false 67 | matrix: 68 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 69 | 70 | steps: 71 | - uses: actions/checkout@v3 72 | - uses: r-hub/rhub2/actions/rhub-setup-r@v1 73 | with: 74 | job-config: ${{ matrix.config.job-config }} 75 | token: ${{ secrets.RHUB_TOKEN }} 76 | - uses: r-hub/rhub2/actions/rhub-check@v1 77 | with: 78 | job-config: ${{ matrix.config.job-config }} 79 | token: ${{ secrets.RHUB_TOKEN }} 80 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage.yaml 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::covr, any::xml2 29 | needs: coverage 30 | 31 | - name: Test coverage 32 | run: | 33 | cov <- covr::package_coverage( 34 | quiet = FALSE, 35 | clean = FALSE, 36 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 37 | ) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | src/*.o 5 | src/*.so 6 | src/*.dll 7 | src/*.a 8 | src/Makevars 9 | inst/doc 10 | script.R 11 | TODO.md 12 | windows 13 | docs/ 14 | Makefile 15 | configure.log 16 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: xml2 2 | Title: Parse XML 3 | Version: 1.3.8 4 | Authors@R: c( 5 | person("Hadley", "Wickham", role = "aut"), 6 | person("Jim", "Hester", role = "aut"), 7 | person("Jeroen", "Ooms", email = "jeroenooms@gmail.com", role = c("aut", "cre")), 8 | person("Posit Software, PBC", role = c("cph", "fnd")), 9 | person("R Foundation", role = "ctb", 10 | comment = "Copy of R-project homepage cached as example") 11 | ) 12 | Description: Bindings to 'libxml2' for working with XML data using a simple, 13 | consistent interface based on 'XPath' expressions. Also supports XML schema 14 | validation; for 'XSLT' transformations see the 'xslt' package. 15 | License: MIT + file LICENSE 16 | URL: https://xml2.r-lib.org, https://r-lib.r-universe.dev/xml2 17 | BugReports: https://github.com/r-lib/xml2/issues 18 | Depends: 19 | R (>= 3.6.0) 20 | Imports: 21 | cli, 22 | methods, 23 | rlang (>= 1.1.0) 24 | Suggests: 25 | covr, 26 | curl, 27 | httr, 28 | knitr, 29 | magrittr, 30 | mockery, 31 | rmarkdown, 32 | testthat (>= 3.2.0), 33 | xslt 34 | VignetteBuilder: 35 | knitr 36 | Config/Needs/website: tidyverse/tidytemplate 37 | Encoding: UTF-8 38 | Roxygen: list(markdown = TRUE) 39 | RoxygenNote: 7.2.3 40 | SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel (rpm) 41 | Collate: 42 | 'S4.R' 43 | 'as_list.R' 44 | 'xml_parse.R' 45 | 'as_xml_document.R' 46 | 'classes.R' 47 | 'format.R' 48 | 'import-standalone-obj-type.R' 49 | 'import-standalone-purrr.R' 50 | 'import-standalone-types-check.R' 51 | 'init.R' 52 | 'nodeset_apply.R' 53 | 'paths.R' 54 | 'utils.R' 55 | 'xml2-package.R' 56 | 'xml_attr.R' 57 | 'xml_children.R' 58 | 'xml_document.R' 59 | 'xml_find.R' 60 | 'xml_missing.R' 61 | 'xml_modify.R' 62 | 'xml_name.R' 63 | 'xml_namespaces.R' 64 | 'xml_node.R' 65 | 'xml_nodeset.R' 66 | 'xml_path.R' 67 | 'xml_schema.R' 68 | 'xml_serialize.R' 69 | 'xml_structure.R' 70 | 'xml_text.R' 71 | 'xml_type.R' 72 | 'xml_url.R' 73 | 'xml_write.R' 74 | 'zzz.R' 75 | Config/testthat/edition: 3 76 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: xml2 authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 xml2 authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/S4.R: -------------------------------------------------------------------------------- 1 | #' Register S4 classes 2 | #' 3 | #' @description 4 | #' Classes are exported so they can be re-used within S4 classes, see [methods::setOldClass()]. 5 | #' 6 | #' * `xml_document`: a complete document. 7 | #' * `xml_nodeset`: a _set_ of nodes within a document. 8 | #' * `xml_missing`: a missing object, e.g. for an empty result set. 9 | #' * `xml_node`: a single node in a document. 10 | #' 11 | #' @importFrom methods setOldClass 12 | #' @keywords internal 13 | #' @rdname oldclass 14 | #' @name xml_document-class 15 | #' @exportClass xml_document 16 | setOldClass("xml_document") 17 | 18 | #' @name xml_missing-class 19 | #' @exportClass xml_missing 20 | #' @rdname oldclass 21 | setOldClass("xml_missing") 22 | 23 | #' @name xml_node-class 24 | #' @exportClass xml_node 25 | #' @rdname oldclass 26 | setOldClass("xml_node") 27 | 28 | #' @name xml_nodeset-class 29 | #' @exportClass xml_nodeset 30 | #' @rdname oldclass 31 | setOldClass("xml_nodeset") 32 | -------------------------------------------------------------------------------- /R/as_list.R: -------------------------------------------------------------------------------- 1 | #' Coerce xml nodes to a list. 2 | #' 3 | #' This turns an XML document (or node or nodeset) into the equivalent R 4 | #' list. Note that this is `as_list()`, not `as.list()`: 5 | #' `lapply()` automatically calls `as.list()` on its inputs, so 6 | #' we can't override the default. 7 | #' 8 | #' `as_list` currently only handles the four most common types of 9 | #' children that an element might have: 10 | #' 11 | #' \itemize{ 12 | #' \item Other elements, converted to lists. 13 | #' \item Attributes, stored as R attributes. Attributes that have special meanings in R 14 | #' ([class()], [comment()], [dim()], 15 | #' [dimnames()], [names()], [row.names()] and 16 | #' [tsp()]) are escaped with '.' 17 | #' \item Text, stored as a character vector. 18 | #' } 19 | #' 20 | #' @inheritParams xml_name 21 | #' @param ... Needed for compatibility with generic. Unused. 22 | #' @export 23 | #' @examples 24 | #' as_list(read_xml(" a ]]>")) 25 | #' as_list(read_xml(" ")) 26 | #' as_list(read_xml("")) 27 | #' as_list(read_xml("")) 28 | as_list <- function(x, ns = character(), ...) { 29 | UseMethod("as_list") 30 | } 31 | 32 | #' @export 33 | as_list.xml_missing <- function(x, ns = character(), ...) { 34 | list() 35 | } 36 | 37 | #' @export 38 | as_list.xml_document <- function(x, ns = character(), ...) { 39 | if (!inherits(x, "xml_node")) { 40 | return(list()) 41 | } 42 | 43 | out <- list(NextMethod()) 44 | names(out) <- xml_name(x) 45 | out 46 | } 47 | 48 | #' @export 49 | as_list.xml_node <- function(x, ns = character(), ...) { 50 | contents <- xml_contents(x) 51 | if (length(contents) == 0) { 52 | # Base case - contents 53 | type <- xml_type(x) 54 | 55 | if (type %in% c("text", "cdata")) { 56 | return(xml_text(x)) 57 | } 58 | if (type != "element" && type != "document") { 59 | return(paste("[", type, "]")) 60 | } 61 | 62 | out <- list() 63 | } else { 64 | out <- lapply(seq_along(contents), function(i) as_list(contents[[i]], ns = ns)) 65 | 66 | nms <- ifelse(xml_type(contents) == "element", xml_name(contents, ns = ns), "") 67 | if (any(nms != "")) { 68 | names(out) <- nms 69 | } 70 | } 71 | 72 | # Add xml attributes as R attributes 73 | attributes(out) <- c(list(names = names(out)), xml_to_r_attrs(xml_attrs(x, ns = ns))) 74 | 75 | out 76 | } 77 | 78 | #' @export 79 | as_list.xml_nodeset <- function(x, ns = character(), ...) { 80 | lapply(seq_along(x), function(i) as_list(x[[i]], ns = ns)) 81 | } 82 | 83 | special_attributes <- c("class", "comment", "dim", "dimnames", "names", "row.names", "tsp") 84 | 85 | xml_to_r_attrs <- function(x) { 86 | if (length(x) == 0) { 87 | return(NULL) 88 | } 89 | # escape special names 90 | special <- names(x) %in% special_attributes 91 | names(x)[special] <- paste0(".", names(x)[special]) 92 | as.list(x) 93 | } 94 | 95 | r_attrs_to_xml <- function(x) { 96 | if (length(x) == 0) { 97 | return(NULL) 98 | } 99 | 100 | # Drop R special attributes 101 | x <- x[!names(x) %in% special_attributes] 102 | 103 | # Rename any xml attributes needed 104 | special <- names(x) %in% paste0(".", special_attributes) 105 | 106 | names(x)[special] <- sub("^\\.", "", names(x)[special]) 107 | x 108 | } 109 | -------------------------------------------------------------------------------- /R/as_xml_document.R: -------------------------------------------------------------------------------- 1 | #' Coerce a R list to xml nodes. 2 | #' 3 | #' This turns an R list into the equivalent XML document. Not all R lists will 4 | #' produce valid XML, in particular there can only be one root node and all 5 | #' child nodes need to be named (or empty) lists. R attributes become XML 6 | #' attributes and R names become XML node names. 7 | #' 8 | #' @inheritParams as_list 9 | #' @include as_list.R xml_parse.R 10 | #' @export 11 | #' @examples 12 | # empty lists generate empty nodes 13 | #' as_xml_document(list(x = list())) 14 | #' 15 | #' # Nesting multiple nodes 16 | #' as_xml_document(list(foo = list(bar = list(baz = list())))) 17 | #' 18 | #' # attributes are stored as R attributes 19 | #' as_xml_document(list(foo = structure(list(), id = "a"))) 20 | #' as_xml_document(list(foo = list( 21 | #' bar = structure(list(), id = "a"), 22 | #' bar = structure(list(), id = "b") 23 | #' ))) 24 | as_xml_document <- function(x, ...) { 25 | UseMethod("as_xml_document") 26 | } 27 | 28 | #' @export 29 | as_xml_document.character <- read_xml.character 30 | 31 | #' @export 32 | as_xml_document.raw <- read_xml.raw 33 | 34 | #' @export 35 | as_xml_document.connection <- read_xml.connection 36 | 37 | #' @export 38 | as_xml_document.response <- read_xml.response 39 | 40 | #' @export 41 | as_xml_document.list <- function(x, ...) { 42 | if (length(x) > 1) { 43 | cli::cli_abort("Root nodes must be of length 1.") 44 | } 45 | 46 | 47 | add_node <- function(x, parent, tag = NULL) { 48 | if (is.atomic(x)) { 49 | return(.Call(node_new_text, parent$node, as.character(x))) 50 | } 51 | if (!is.null(tag)) { 52 | parent <- xml_add_child(parent, tag) 53 | attr <- r_attrs_to_xml(attributes(x)) 54 | for (i in seq_along(attr)) { 55 | xml_set_attr(parent, names(attr)[[i]], attr[[i]]) 56 | } 57 | } 58 | for (i in seq_along(x)) { 59 | add_node(x[[i]], parent, names(x)[[i]]) 60 | } 61 | } 62 | 63 | doc <- xml_new_document() 64 | add_node(x, doc) 65 | xml_root(doc) 66 | } 67 | 68 | #' @export 69 | as_xml_document.xml_node <- function(x, ...) { 70 | xml_new_root(.value = x, ..., .copy = TRUE) 71 | } 72 | 73 | #' @export 74 | as_xml_document.xml_nodeset <- function(x, root, ...) { 75 | doc <- xml_new_root(.value = root, ..., .copy = TRUE) 76 | for (i in seq_along(x)) { 77 | xml_add_child(doc, x[[i]], .copy = TRUE) 78 | } 79 | doc 80 | } 81 | 82 | #' @export 83 | as_xml_document.xml_document <- function(x, ...) { 84 | x 85 | } 86 | -------------------------------------------------------------------------------- /R/classes.R: -------------------------------------------------------------------------------- 1 | #' @useDynLib xml2, .registration = TRUE 2 | NULL 3 | 4 | #' Construct a cdata node 5 | #' @param content The CDATA content, does not include `")) 9 | #' as.character(x) 10 | #' @export 11 | xml_cdata <- function(content) { 12 | class(content) <- "xml_cdata" 13 | content 14 | } 15 | 16 | #' Construct a comment node 17 | #' @param content The comment content 18 | #' @examples 19 | #' x <- xml_new_document() 20 | #' r <- xml_add_child(x, "root") 21 | #' xml_add_child(r, xml_comment("Hello!")) 22 | #' as.character(x) 23 | #' @export 24 | xml_comment <- function(content) { 25 | class(content) <- "xml_comment" 26 | content 27 | } 28 | 29 | #' Construct a document type definition 30 | #' 31 | #' This is used to create simple document type definitions. If you need to 32 | #' create a more complicated definition with internal subsets it is recommended 33 | #' to parse a string directly with `read_xml()`. 34 | #' @param name The name of the declaration 35 | #' @param external_id The external ID of the declaration 36 | #' @param system_id The system ID of the declaration 37 | #' @examples 38 | #' r <- xml_new_root( 39 | #' xml_dtd( 40 | #' "html", 41 | #' "-//W3C//DTD XHTML 1.0 Transitional//EN", 42 | #' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" 43 | #' ) 44 | #' ) 45 | #' 46 | #' # Use read_xml directly for more complicated DTD 47 | #' d <- read_xml( 48 | #' ' 50 | #' 51 | #' ]> 52 | #' This is a valid document &foo; !' 53 | #' ) 54 | #' @export 55 | xml_dtd <- function(name = "", external_id = "", system_id = "") { 56 | out <- list(name = name, external_id = external_id, system_id = system_id) 57 | class(out) <- "xml_dtd" 58 | out 59 | } 60 | -------------------------------------------------------------------------------- /R/format.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | format.xml_node <- function(x, ...) { 3 | attrs <- xml_attrs(x) 4 | paste0("<", 5 | paste( 6 | c( 7 | xml_name(x), 8 | format_attributes(attrs) 9 | ), 10 | collapse = " " 11 | ), 12 | ">" 13 | ) 14 | } 15 | 16 | format_attributes <- function(x) { 17 | if (length(x) == 0) { 18 | character(0) 19 | } else { 20 | paste(names(x), quote_str(x), sep = "=") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /R/init.R: -------------------------------------------------------------------------------- 1 | libxml2_version <- function() { 2 | as.numeric_version(.Call(libxml2_version_)) 3 | } 4 | 5 | xml_parse_options <- function() { 6 | .Call(xml_parse_options_) 7 | } 8 | 9 | xml_save_options <- function() { 10 | .Call(xml_save_options_) 11 | } 12 | -------------------------------------------------------------------------------- /R/nodeset_apply.R: -------------------------------------------------------------------------------- 1 | nodeset_apply <- function(x, fun, ...) UseMethod("nodeset_apply") 2 | 3 | #' @export 4 | nodeset_apply.xml_missing <- function(x, fun, ...) { 5 | xml_nodeset() 6 | } 7 | 8 | #' @export 9 | nodeset_apply.xml_nodeset <- function(x, fun, ...) { 10 | if (length(x) == 0) { 11 | return(xml_nodeset()) 12 | } 13 | 14 | is_missing <- is.na(x) 15 | res <- list(length(x)) 16 | 17 | res[is_missing] <- list(xml_missing()) 18 | if (any(!is_missing)) { 19 | res[!is_missing] <- lapply(x[!is_missing], function(x) fun(x$node, ...)) 20 | } 21 | 22 | make_nodeset(res, x[[1]]$doc) 23 | } 24 | 25 | #' @export 26 | nodeset_apply.xml_node <- function(x, fun, ...) { 27 | nodes <- fun(x$node, ...) 28 | xml_nodeset(lapply(nodes, xml_node, doc = x$doc)) 29 | } 30 | 31 | #' @export 32 | nodeset_apply.xml_document <- function(x, fun, ...) { 33 | if (inherits(x, "xml_node")) { 34 | NextMethod() 35 | } else { 36 | xml_nodeset() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /R/paths.R: -------------------------------------------------------------------------------- 1 | path_to_connection <- function(path, check = c("file", "dir")) { 2 | check <- match.arg(check) 3 | 4 | if (!is.character(path) || length(path) != 1L) { 5 | return(path) 6 | } 7 | 8 | if (is_url(path)) { 9 | if (is_installed("curl")) { 10 | return(curl::curl(path)) 11 | } else { 12 | return(url(path)) 13 | } 14 | } 15 | 16 | if (check == "file") { 17 | path <- check_path(path) 18 | } else { 19 | path <- file.path(check_path(dirname(path)), basename(path)) 20 | } 21 | switch(tools::file_ext(path), 22 | gz = gzfile(path, ""), 23 | bz2 = bzfile(path, ""), 24 | xz = xzfile(path, ""), 25 | zip = zipfile(path, ""), 26 | path 27 | ) 28 | } 29 | 30 | is_url <- function(path) { 31 | grepl("^(http|ftp)s?://", path) 32 | } 33 | 34 | check_path <- function(path, call = caller_env()) { 35 | if (file.exists(path)) { 36 | return(normalizePath(path, "/", mustWork = FALSE)) 37 | } 38 | 39 | 40 | msg <- "{.file {path}} does not exist" 41 | if (!is_absolute_path(path)) { 42 | msg <- paste0(msg, " in current working directory ({.path {getwd()}})") 43 | } 44 | msg <- paste0(msg, ".") 45 | 46 | cli::cli_abort(msg, call = call) 47 | } 48 | 49 | is_absolute_path <- function(path) { 50 | grepl("^(/|[A-Za-z]:|\\\\|~)", path) 51 | } 52 | 53 | zipfile <- function(path, open = "r") { 54 | files <- utils::unzip(path, list = TRUE) 55 | file <- files$Name[[1]] 56 | 57 | if (nrow(files) > 1) { 58 | cli::cli_inform("Multiple files in zip: reading {.file {file}}") 59 | } 60 | 61 | unz(path, file, open = open) 62 | } 63 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | `%||%` <- function(a, b) if (is.null(a)) b else a 2 | 3 | is_named <- function(x) { 4 | all(has_names(x)) 5 | } 6 | 7 | has_names <- function(x) { 8 | nms <- names(x) 9 | if (is.null(nms)) { 10 | rep(FALSE, length(x)) 11 | } else { 12 | !(is.na(nms) | nms == "") 13 | } 14 | } 15 | 16 | # non smart quote version of sQuote 17 | quote_str <- function(x, quote = "\"") { 18 | if (!length(x)) { 19 | return(character(0)) 20 | } 21 | 22 | paste0(quote, x, quote) 23 | } 24 | 25 | # Format the C bitwise flags for display in Rd. The input object is a named 26 | # integer vector with a 'descriptions' character vector attribute that 27 | # corresponds to each flag. 28 | describe_options <- function(x) { 29 | paste0( 30 | "\\describe{\n", 31 | paste0(" \\item{", names(x), "}{", attr(x, "descriptions"), "}", collapse = "\n"), 32 | "\n}" 33 | ) 34 | } 35 | 36 | s_quote <- function(x) paste0("'", x, "'") 37 | 38 | # Similar to match.arg, but returns character() with NULL or empty input and 39 | # errors if any of the inputs are not found (fixing 40 | # https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16659) 41 | parse_options <- function(arg, options, error_call = caller_env()) { 42 | if (is.numeric(arg)) { 43 | return(as.integer(arg)) 44 | } 45 | 46 | if (is.null(arg) || !any(nzchar(arg))) { 47 | return(0L) 48 | } 49 | 50 | # set duplicates.ok = TRUE so any duplicates are counted differently than 51 | # non-matches, then take only unique results 52 | i <- pmatch(arg, names(options), duplicates.ok = TRUE) 53 | if (anyNA(i)) { 54 | cli::cli_abort(c( 55 | x = "{.arg options} {.val {arg[is.na(i)][1L]}} is not a valid option.", 56 | i = "Valid options are one of {.or {.val {names(options)}}}.", 57 | i = "See {.help [read_html](xml2::read_html)} for all options." 58 | ), 59 | call = error_call) 60 | } 61 | sum(options[unique(i)]) 62 | } 63 | 64 | #' Get path to a xml2 example 65 | #' 66 | #' xml2 comes bundled with a number of sample files in its \sQuote{inst/extdata} 67 | #' directory. This function makes them easy to access. 68 | #' @param path Name of file. If `NULL`, the example files will be listed. 69 | #' @export 70 | xml2_example <- function(path = NULL) { 71 | if (is.null(path)) { 72 | dir(system.file("extdata", package = "xml2")) 73 | } else { 74 | system.file("extdata", path, package = "xml2", mustWork = TRUE) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /R/xml2-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | ## usethis namespace: start 5 | #' @import rlang 6 | ## usethis namespace: end 7 | NULL 8 | -------------------------------------------------------------------------------- /R/xml_children.R: -------------------------------------------------------------------------------- 1 | #' Navigate around the family tree. 2 | #' 3 | #' `xml_children` returns only elements, `xml_contents` returns 4 | #' all nodes. `xml_length` returns the number of children. 5 | #' `xml_parent` returns the parent node, `xml_parents` 6 | #' returns all parents up to the root. `xml_siblings` returns all nodes 7 | #' at the same level. `xml_child` makes it easy to specify a specific 8 | #' child to return. 9 | #' 10 | #' @inheritParams xml_name 11 | #' @param only_elements For `xml_length`, should it count all children, 12 | #' or just children that are elements (the default)? 13 | #' @param search For `xml_child`, either the child number to return (by 14 | #' position), or the name of the child node to return. If there are multiple 15 | #' child nodes with the same name, the first will be returned 16 | #' @return A node or nodeset (possibly empty). Results are always de-duplicated. 17 | #' @export 18 | #' @examples 19 | #' x <- read_xml(" ") 20 | #' xml_children(x) 21 | #' xml_children(xml_children(x)) 22 | #' xml_siblings(xml_children(x)[[1]]) 23 | #' 24 | #' # Note the each unique node only appears once in the output 25 | #' xml_parent(xml_children(x)) 26 | #' 27 | #' # Mixed content 28 | #' x <- read_xml(" a c e f") 29 | #' # Childen gets the elements, contents gets all node types 30 | #' xml_children(x) 31 | #' xml_contents(x) 32 | #' 33 | #' xml_length(x) 34 | #' xml_length(x, only_elements = FALSE) 35 | #' 36 | #' # xml_child makes it easier to select specific children 37 | #' xml_child(x) 38 | #' xml_child(x, 2) 39 | #' xml_child(x, "baz") 40 | xml_children <- function(x) { 41 | nodeset_apply(x, function(x) .Call(node_children, x, TRUE)) 42 | } 43 | 44 | #' @export 45 | #' @rdname xml_children 46 | xml_child <- function(x, search = 1, ns = xml_ns(x)) { 47 | if (length(search) != 1) { 48 | cli::cli_abort("{.arg {search}} must be of length 1.") 49 | } 50 | 51 | if (is.numeric(search)) { 52 | xml_children(x)[[search]] 53 | } else if (is.character(search)) { 54 | xml_find_first(x, xpath = paste0("./", search), ns = ns) 55 | } else { 56 | cli::cli_abort("{.arg search} must be `numeric` or `character`.") 57 | } 58 | } 59 | 60 | #' @export 61 | #' @rdname xml_children 62 | xml_contents <- function(x) { 63 | nodeset_apply(x, function(x) .Call(node_children, x, FALSE)) 64 | } 65 | 66 | #' @export 67 | #' @rdname xml_children 68 | xml_parents <- function(x) { 69 | nodeset_apply(x, function(x) .Call(node_parents, x)) 70 | } 71 | 72 | #' @export 73 | #' @rdname xml_children 74 | xml_siblings <- function(x) { 75 | nodeset_apply(x, function(x) .Call(node_siblings, x, TRUE)) 76 | } 77 | 78 | #' @export 79 | #' @rdname xml_children 80 | xml_parent <- function(x) { 81 | UseMethod("xml_parent") 82 | } 83 | 84 | #' @export 85 | xml_parent.xml_missing <- function(x) { 86 | xml_missing() 87 | } 88 | 89 | #' @export 90 | xml_parent.xml_node <- function(x) { 91 | xml_node(.Call(node_parent, x$node), x$doc) 92 | } 93 | 94 | #' @export 95 | xml_parent.xml_nodeset <- function(x) { 96 | nodeset_apply(x, function(x) .Call(node_parent, x)) 97 | } 98 | 99 | 100 | #' @export 101 | #' @rdname xml_children 102 | xml_length <- function(x, only_elements = TRUE) { 103 | .Call(node_length, x, only_elements) 104 | } 105 | 106 | #' @export 107 | #' @rdname xml_children 108 | xml_root <- function(x) { 109 | stopifnot(inherits(x, c("xml_node", "xml_document", "xml_nodeset"))) 110 | 111 | if (inherits(x, "xml_nodeset")) { 112 | if (length(x) == 0) { 113 | return(NULL) 114 | } else { 115 | return(xml_root(x[[1]])) 116 | } 117 | } 118 | if (!.Call(doc_has_root, x$doc)) { 119 | xml_missing() 120 | } else { 121 | xml_document(x$doc) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /R/xml_document.R: -------------------------------------------------------------------------------- 1 | xml_document <- function(doc) { 2 | if (.Call(doc_has_root, doc)) { 3 | x <- xml_node(.Call(doc_root, doc), doc) 4 | class(x) <- c("xml_document", class(x)) 5 | x 6 | } else { 7 | out <- list(doc = doc) 8 | class(out) <- "xml_document" 9 | out 10 | } 11 | } 12 | 13 | doc_type <- function(x) { 14 | if (is.null(x$doc)) { 15 | return("xml") 16 | } 17 | if (.Call(doc_is_html, x$doc)) { 18 | "html" 19 | } else { 20 | "xml" 21 | } 22 | } 23 | 24 | #' @export 25 | print.xml_document <- function(x, width = getOption("width"), max_n = 20, ...) { 26 | doc <- xml_document(x$doc) 27 | cat("{", doc_type(x), "_document}\n", sep = "") 28 | if (inherits(doc, "xml_node")) { 29 | cat(format(doc), "\n", sep = "") 30 | show_nodes(xml_children(doc), width = width, max_n = max_n) 31 | } 32 | } 33 | 34 | #' @export 35 | as.character.xml_document <- function(x, ..., options = "format", encoding = "UTF-8") { 36 | options <- parse_options(options, xml_save_options()) 37 | .Call(doc_write_character, x$doc, encoding, options) 38 | } 39 | -------------------------------------------------------------------------------- /R/xml_missing.R: -------------------------------------------------------------------------------- 1 | #' Construct an missing xml object 2 | #' @export 3 | #' @keywords internal 4 | xml_missing <- function() { 5 | out <- list() 6 | class(out) <- "xml_missing" 7 | out 8 | } 9 | 10 | format.xml_missing <- function(x, ...) { 11 | "" 12 | } 13 | 14 | #' @export 15 | print.xml_missing <- function(x, width = getOption("width"), max_n = 20, ...) { 16 | cat("{xml_missing}\n") 17 | cat(format(x), "\n", sep = "") 18 | } 19 | 20 | #' @export 21 | as.character.xml_missing <- function(x, ...) { 22 | NA_character_ 23 | } 24 | 25 | # These mimic the behavior of NA[[1]], NA[[2]], NA[1], NA[2] 26 | 27 | #' @export 28 | `[.xml_missing` <- function(x, i, ...) x 29 | 30 | #' @export 31 | `[[.xml_missing` <- function(x, i, ...) if (i == 1L) x else cli::cli_abort("subscript out of bounds") 32 | 33 | #' @export 34 | is.na.xml_missing <- function(x) { 35 | TRUE 36 | } 37 | -------------------------------------------------------------------------------- /R/xml_name.R: -------------------------------------------------------------------------------- 1 | #' The (tag) name of an xml element. 2 | #' 3 | #' @param x A document, node, or node set. 4 | #' @param ns Optionally, a named vector giving prefix-url pairs, as produced 5 | #' by [xml_ns()]. If provided, all names will be explicitly 6 | #' qualified with the ns prefix, i.e. if the element `bar` is defined 7 | #' in namespace `foo`, it will be called `foo:bar`. (And 8 | #' similarly for attributes). Default namespaces must be given an explicit 9 | #' name. The ns is ignored when using [xml_name<-()] and 10 | #' [xml_set_name()]. 11 | #' @return A character vector. 12 | #' @export 13 | #' @examples 14 | #' x <- read_xml("123") 15 | #' xml_name(x) 16 | #' 17 | #' y <- read_xml("1abc") 18 | #' z <- xml_children(y) 19 | #' xml_name(xml_children(y)) 20 | xml_name <- function(x, ns = character()) { 21 | .Call(node_name, x, ns) 22 | } 23 | 24 | #' Modify the (tag) name of an element 25 | #' 26 | #' @param value a character vector with replacement name. 27 | #' @rdname xml_name 28 | #' @export 29 | `xml_name<-` <- function(x, ns = character(), value) { 30 | UseMethod("xml_name<-") 31 | } 32 | 33 | #' @export 34 | `xml_name<-.xml_node` <- function(x, ns = character(), value) { 35 | .Call(node_set_name, x$node, value) 36 | x 37 | } 38 | 39 | #' @export 40 | `xml_name<-.xml_nodeset` <- function(x, ns = character(), value) { 41 | if (length(x) == 0) { 42 | return(x) 43 | } 44 | if (!is.list(ns)) { 45 | ns <- list(ns) 46 | } 47 | Map(`xml_name<-`, x, ns, value) 48 | x 49 | } 50 | 51 | #' @export 52 | `xml_name<-.xml_missing` <- function(x, ns = character(), value) { 53 | x 54 | } 55 | 56 | set_name <- function(x, value, ns = character()) { 57 | xml_name(x = x, ns = ns) <- value 58 | x 59 | } 60 | 61 | #' @rdname xml_name 62 | #' @export 63 | xml_set_name <- function(x, value, ns = character()) { 64 | UseMethod("xml_set_name") 65 | } 66 | 67 | #' @export 68 | xml_set_name.xml_node <- set_name 69 | 70 | #' @export 71 | xml_set_name.xml_nodeset <- set_name 72 | 73 | #' @export 74 | xml_set_name.xml_missing <- set_name 75 | -------------------------------------------------------------------------------- /R/xml_namespaces.R: -------------------------------------------------------------------------------- 1 | #' XML namespaces. 2 | #' 3 | #' `xml_ns` extracts all namespaces from a document, matching each 4 | #' unique namespace url with the prefix it was first associated with. Default 5 | #' namespaces are named `d1`, `d2` etc. Use `xml_ns_rename` 6 | #' to change the prefixes. Once you have a namespace object, you can pass it to 7 | #' other functions to work with fully qualified names instead of local names. 8 | #' 9 | #' @export 10 | #' @inheritParams xml_name 11 | #' @param old,... An existing xml_namespace object followed by name-value 12 | #' (old prefix-new prefix) pairs to replace. 13 | #' @return A character vector with class `xml_namespace` so the 14 | #' default display is a little nicer. 15 | #' @examples 16 | #' x <- read_xml(' 17 | #' 18 | #' 19 | #' 20 | #' 21 | #' ') 22 | #' xml_ns(x) 23 | #' 24 | #' # When there are default namespaces, it's a good idea to rename 25 | #' # them to give informative names: 26 | #' ns <- xml_ns_rename(xml_ns(x), d1 = "foo", d2 = "bar") 27 | #' ns 28 | #' 29 | #' # Now we can pass ns to other xml function to use fully qualified names 30 | #' baz <- xml_children(xml_children(x)) 31 | #' xml_name(baz) 32 | #' xml_name(baz, ns) 33 | #' 34 | #' xml_find_all(x, "//baz") 35 | #' xml_find_all(x, "//foo:baz", ns) 36 | #' 37 | #' str(as_list(x)) 38 | #' str(as_list(x, ns)) 39 | xml_ns <- function(x) { 40 | UseMethod("xml_ns") 41 | } 42 | 43 | #' @export 44 | xml_ns.xml_document <- function(x) { 45 | if (length(x) == 0) { 46 | return(character()) 47 | } 48 | 49 | stopifnot(inherits(x, "xml_document")) 50 | doc <- x$doc 51 | x <- .Call(doc_namespaces, doc) 52 | 53 | # Number default namespaces 54 | is_default <- names(x) == "" 55 | names(x)[is_default] <- paste0("d", seq_len(sum(is_default))) 56 | 57 | # Make prefixes unique 58 | names(x) <- make.unique(names(x), "") 59 | 60 | class(x) <- "xml_namespace" 61 | 62 | x 63 | } 64 | 65 | #' @export 66 | xml_ns.xml_node <- function(x) { 67 | xml_ns(xml_root(x)) 68 | } 69 | 70 | #' @export 71 | xml_ns.xml_nodeset <- function(x) { 72 | if (length(x) == 0) { 73 | return(character()) 74 | } 75 | xml_ns(x[[1]]) 76 | } 77 | 78 | #' @export 79 | xml_ns.xml_missing <- function(x) { 80 | character() 81 | } 82 | 83 | #' @export 84 | print.xml_namespace <- function(x, ...) { 85 | prefix <- format(names(x)) 86 | 87 | cat(paste0(prefix, " <-> ", x, collapse = "\n"), "\n", sep = "") 88 | } 89 | 90 | #' @export 91 | #' @rdname xml_ns 92 | xml_ns_rename <- function(old, ...) { 93 | new <- c(...) 94 | 95 | m <- match(names(new), names(old)) 96 | if (anyNA(m)) { 97 | missing <- paste(names(new)[is.na(m)], collapse = ", ") 98 | cli::cli_abort("Some prefixes [{missing}] don't already exist.") 99 | } 100 | 101 | names(old)[m] <- new 102 | old 103 | } 104 | -------------------------------------------------------------------------------- /R/xml_node.R: -------------------------------------------------------------------------------- 1 | # node ------------------------------------------------------------------------- 2 | 3 | xml_node <- function(node = NULL, doc = NULL) { 4 | if (inherits(node, "xml_node")) { 5 | node 6 | } else { 7 | out <- list(node = node, doc = doc) 8 | class(out) <- "xml_node" 9 | out 10 | } 11 | } 12 | 13 | #' @export 14 | as.character.xml_node <- function(x, ..., options = "format", encoding = "UTF-8") { 15 | options <- parse_options(options, xml_save_options()) 16 | .Call(node_write_character, x$node, encoding, options) 17 | } 18 | 19 | #' @export 20 | print.xml_node <- function(x, width = getOption("width"), max_n = 20, ...) { 21 | cat("{", doc_type(x), "_node}\n", sep = "") 22 | cat(format(x), "\n", sep = "") 23 | show_nodes(xml_children(x), width = width, max_n = max_n) 24 | } 25 | 26 | #' @export 27 | is.na.xml_node <- function(x) { 28 | FALSE 29 | } 30 | -------------------------------------------------------------------------------- /R/xml_nodeset.R: -------------------------------------------------------------------------------- 1 | xml_nodeset <- function(nodes = list(), deduplicate = TRUE) { 2 | if (isTRUE(deduplicate)) { 3 | nodes <- nodes[!.Call(nodes_duplicated, nodes)] 4 | } 5 | class(nodes) <- "xml_nodeset" 6 | nodes 7 | } 8 | 9 | #' @param nodes A list (possible nested) of external pointers to nodes 10 | #' @return a nodeset 11 | #' @noRd 12 | make_nodeset <- function(nodes, doc) { 13 | nodes <- unlist(nodes, recursive = FALSE) 14 | 15 | xml_nodeset(lapply(nodes, xml_node, doc = doc)) 16 | } 17 | 18 | #' @export 19 | print.xml_nodeset <- function(x, width = getOption("width"), max_n = 20, ...) { 20 | n <- length(x) 21 | cat("{", doc_type(x), "_nodeset (", n, ")}\n", sep = "") 22 | 23 | if (n > 0) { 24 | show_nodes(x, width = width, max_n = max_n) 25 | } 26 | } 27 | 28 | #' @export 29 | as.character.xml_nodeset <- function(x, ...) { 30 | vapply(x, as.character, FUN.VALUE = character(1)) 31 | } 32 | 33 | #' @export 34 | `[.xml_nodeset` <- function(x, i, ...) { 35 | if (length(x) == 0) { 36 | return(x) 37 | } 38 | xml_nodeset(NextMethod()) 39 | } 40 | 41 | #' Wrapper for encodeString() that takes width into consideration 42 | #' 43 | #' encodeString() is relatively expensive to run (see #366), so 44 | #' avoid doing so to very wide inputs by first trimming inputs 45 | #' to approximately the correct width, then encoding. A second 46 | #' round of truncation occurs after encoding to account for 47 | #' any newly-inserted characters bumping an input too wide. 48 | #' @noRd 49 | encode_with_width <- function(x, width) { 50 | truncate_raw <- nchar(x) > width 51 | x[truncate_raw] <- substr(x[truncate_raw], 1L, width - 3L) 52 | x <- encodeString(x) 53 | truncate_encoded <- truncate_raw | nchar(x) > width 54 | x[truncate_encoded] <- paste(substr(x[truncate_encoded], 1L, width - 3L), "...") 55 | x 56 | } 57 | 58 | show_nodes <- function(x, width = getOption("width"), max_n = 20) { 59 | stopifnot(inherits(x, "xml_nodeset")) 60 | 61 | n <- length(x) 62 | if (n == 0) { 63 | return() 64 | } 65 | 66 | trunc <- n > max_n 67 | if (trunc) { 68 | n <- max_n 69 | x <- x[seq_len(n)] 70 | } 71 | 72 | label <- format(paste0("[", seq_len(n), "]"), justify = "right") 73 | contents <- vapply(x, as.character, FUN.VALUE = character(1L)) 74 | 75 | desc <- encode_with_width(paste(label, contents), width) 76 | 77 | cat(desc, sep = "\n") 78 | if (trunc) { 79 | cat("...\n") 80 | } 81 | invisible() 82 | } 83 | 84 | #' @export 85 | is.na.xml_nodeset <- function(x) { 86 | vapply(x, is.na, logical(1)) 87 | } 88 | -------------------------------------------------------------------------------- /R/xml_path.R: -------------------------------------------------------------------------------- 1 | #' Retrieve the xpath to a node 2 | #' 3 | #' This is useful when you want to figure out where nodes matching an 4 | #' xpath expression live in a document. 5 | #' 6 | #' @inheritParams xml_name 7 | #' @return A character vector. 8 | #' @export 9 | #' @examples 10 | #' x <- read_xml("") 11 | #' xml_path(xml_find_all(x, ".//baz")) 12 | xml_path <- function(x) { 13 | .Call(node_path, x) 14 | } 15 | -------------------------------------------------------------------------------- /R/xml_schema.R: -------------------------------------------------------------------------------- 1 | #' Validate XML schema 2 | #' 3 | #' Validate an XML document against an XML 1.0 schema. 4 | #' 5 | #' @inheritParams xml_name 6 | #' @return TRUE or FALSE 7 | #' @export 8 | #' @param schema an XML document containing the schema 9 | #' @examples # Example from https://msdn.microsoft.com/en-us/library/ms256129(v=vs.110).aspx 10 | #' doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2")) 11 | #' schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2")) 12 | #' xml_validate(doc, schema) 13 | xml_validate <- function(x, schema) { 14 | UseMethod("xml_validate") 15 | } 16 | 17 | #' @export 18 | xml_validate.xml_document <- function(x, schema) { 19 | stopifnot(inherits(schema, "xml_document")) 20 | .Call(doc_validate, x$doc, schema$doc) 21 | } 22 | -------------------------------------------------------------------------------- /R/xml_serialize.R: -------------------------------------------------------------------------------- 1 | #' Serializing XML objects to connections. 2 | #' 3 | #' @inheritParams base::serialize 4 | #' @param ... Additional arguments passed to [read_xml()]. 5 | #' @inherit base::serialize return 6 | #' @examples 7 | #' library(xml2) 8 | #' x <- read_xml(" 9 | #' 123 10 | #' 456 11 | #' ") 12 | #' 13 | #' b <- xml_find_all(x, "//b") 14 | #' out <- xml_serialize(b, NULL) 15 | #' xml_unserialize(out) 16 | #' @export 17 | xml_serialize <- function(object, connection, ...) UseMethod("xml_serialize") 18 | 19 | #' @export 20 | xml_serialize.xml_document <- function(object, connection, ...) { 21 | if (is.character(connection)) { 22 | connection <- file(connection, "w", raw = TRUE) 23 | on.exit(close(connection)) 24 | } 25 | serialize(structure(as.character(object, ...), doc_type = doc_type(object), class = "xml_serialized_document"), connection) 26 | } 27 | 28 | #' @export 29 | xml_serialize.xml_node <- function(object, connection, ...) { 30 | if (is.character(connection)) { 31 | connection <- file(connection, "w", raw = TRUE) 32 | on.exit(close(connection)) 33 | } 34 | x <- as_xml_document(object) 35 | serialize(structure(as.character(x, ...), class = "xml_serialized_node"), connection) 36 | } 37 | 38 | #' @export 39 | xml_serialize.xml_nodeset <- function(object, connection, ...) { 40 | if (is.character(connection)) { 41 | connection <- file(connection, "w", raw = TRUE) 42 | on.exit(close(connection)) 43 | } 44 | x <- as_xml_document(object, "root") 45 | serialize(structure(as.character(x, ...), class = "xml_serialized_nodeset"), connection) 46 | } 47 | 48 | #' @rdname xml_serialize 49 | #' @export 50 | xml_unserialize <- function(connection, ...) { 51 | if (is.character(connection)) { 52 | connection <- file(connection, "r", raw = TRUE) 53 | on.exit(close(connection)) 54 | } 55 | object <- unserialize(connection) 56 | if (inherits(object, "xml_serialized_nodeset")) { 57 | x <- read_xml(unclass(object), ...) 58 | 59 | # Select only the direct children of the root 60 | res <- xml_find_all(x, "/*/node()") 61 | } else if (inherits(object, "xml_serialized_node")) { 62 | x <- read_xml(unclass(object), ...) 63 | 64 | # Select only the root 65 | res <- xml_find_first(x, "/node()") 66 | } else if (inherits(object, "xml_serialized_document")) { 67 | read_xml_int <- function(object, as_html = FALSE, ...) { 68 | if (missing(as_html)) { 69 | as_html <- identical(attr(object, "doc_type", exact = TRUE), "html") 70 | } 71 | read_xml(unclass(object), as_html = as_html, ...) 72 | } 73 | res <- read_xml_int(unclass(object), ...) 74 | } else { 75 | cli::cli_abort("Not a serialized xml2 object.") 76 | } 77 | res 78 | } 79 | -------------------------------------------------------------------------------- /R/xml_structure.R: -------------------------------------------------------------------------------- 1 | #' Show the structure of an html/xml document. 2 | #' 3 | #' Show the structure of an html/xml document without displaying any of 4 | #' the values. This is useful if you want to get a high level view of the 5 | #' way a document is organised. Compared to `xml_structure`, 6 | #' `html_structure` prints the id and class attributes. 7 | #' 8 | #' @param x HTML/XML document (or part there of) 9 | #' @param indent Number of spaces to ident 10 | #' @inheritParams base::cat 11 | #' @export 12 | #' @examples 13 | #' xml_structure(read_xml("")) 14 | #' 15 | #' rproj <- read_html(system.file("extdata", "r-project.html", package = "xml2")) 16 | #' xml_structure(rproj) 17 | #' xml_structure(xml_find_all(rproj, ".//p")) 18 | #' 19 | #' h <- read_html("

") 20 | #' html_structure(h) 21 | xml_structure <- function(x, indent = 2, file = "") { 22 | cat(file = file) 23 | tree_structure(x, indent = indent, html = FALSE, file = file) 24 | } 25 | 26 | #' @export 27 | #' @rdname xml_structure 28 | html_structure <- function(x, indent = 2, file = "") { 29 | cat(file = file) 30 | tree_structure(x, indent = indent, html = TRUE, file = file) 31 | } 32 | 33 | tree_structure <- function(x, indent = 2, html = FALSE, file = "") { 34 | UseMethod("tree_structure") 35 | } 36 | 37 | #' @export 38 | tree_structure.xml_missing <- function(x, indent = 2, html = FALSE, file = "") { 39 | NA_character_ 40 | } 41 | 42 | #' @export 43 | tree_structure.xml_nodeset <- function(x, indent = 2, html = FALSE, file = "") { 44 | for (i in seq_along(x)) { 45 | cat("[[", i, "]]\n", sep = "", file = file, append = TRUE) 46 | print_xml_structure(x[[i]], indent = indent, html = html, file = file) 47 | cat("\n", file = file, append = TRUE) 48 | } 49 | 50 | invisible() 51 | } 52 | 53 | #' @export 54 | tree_structure.xml_node <- function(x, indent = 2, html = FALSE, file = "") { 55 | print_xml_structure(x, indent = indent, html = html, file = file) 56 | invisible() 57 | } 58 | 59 | print_xml_structure <- function(x, prefix = 0, indent = 2, html = FALSE, file = "") { 60 | padding <- paste(rep(" ", prefix), collapse = "") 61 | type <- xml_type(x) 62 | 63 | if (type == "element") { 64 | attr <- xml_attrs(x) 65 | if (html) { 66 | html_attrs <- list() 67 | if ("id" %in% names(attr)) { 68 | html_attrs$id <- paste0("#", attr[["id"]]) 69 | attr <- attr[setdiff(names(attr), "id")] 70 | } 71 | 72 | if ("class" %in% names(attr)) { 73 | html_attrs$class <- paste0(".", gsub(" ", ".", attr[["class"]])) 74 | attr <- attr[setdiff(names(attr), "class")] 75 | } 76 | 77 | attr_str <- paste(unlist(html_attrs), collapse = " ") 78 | } else { 79 | attr_str <- "" 80 | } 81 | 82 | if (length(attr) > 0) { 83 | attr_str <- paste0(attr_str, " [", paste0(names(attr), collapse = ", "), "]") 84 | } 85 | 86 | node <- paste0("<", xml_name(x), attr_str, ">") 87 | 88 | cat(padding, node, "\n", sep = "", file = file, append = TRUE) 89 | lapply( 90 | xml_contents(x), 91 | print_xml_structure, 92 | prefix = prefix + indent, 93 | indent = indent, 94 | html = html, 95 | file = file 96 | ) 97 | } else { 98 | cat(padding, "{", type, "}\n", sep = "", file = file, append = TRUE) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /R/xml_text.R: -------------------------------------------------------------------------------- 1 | #' Extract or modify the text 2 | #' 3 | #' `xml_text` returns a character vector, `xml_double` returns a 4 | #' numeric vector, `xml_integer` returns an integer vector. 5 | #' @inheritParams xml_name 6 | #' @param trim If `TRUE` will trim leading and trailing spaces. 7 | #' @return A character vector, the same length as x. 8 | #' @examples 9 | #' x <- read_xml("

This is some text. This is bold!

") 10 | #' xml_text(x) 11 | #' xml_text(xml_children(x)) 12 | #' 13 | #' x <- read_xml("This is some text. This is some nested text.") 14 | #' xml_text(x) 15 | #' xml_text(xml_find_all(x, "//x")) 16 | #' 17 | #' x <- read_xml("

Some text

") 18 | #' xml_text(x, trim = TRUE) 19 | #' 20 | #' # xml_double() and xml_integer() are useful for extracting numeric attributes 21 | #' x <- read_xml("") 22 | #' xml_integer(xml_find_all(x, "//@x")) 23 | #' @export 24 | xml_text <- function(x, trim = FALSE) { 25 | res <- .Call(node_text, x) 26 | if (isTRUE(trim)) { 27 | res <- trim_text(res) 28 | } 29 | res 30 | } 31 | 32 | trim_text <- function(x) { 33 | x <- sub("^[[:space:]\u00a0]+", "", x) 34 | sub("[[:space:]\u00a0]+$", "", x) 35 | } 36 | 37 | #' @rdname xml_text 38 | #' @param value character vector with replacement text. 39 | #' @export 40 | `xml_text<-` <- function(x, value) { 41 | UseMethod("xml_text<-") 42 | } 43 | 44 | #' @export 45 | `xml_text<-.xml_nodeset` <- function(x, value) { 46 | if (length(x) == 0) { 47 | return(x) 48 | } 49 | # We need to do the modification in reverse order as the modification can 50 | # potentially delete nodes 51 | Map(`xml_text<-`, rev(x), rev(value)) 52 | 53 | # what to return here, setting the text could invalidate some nodes in 54 | # the nodeset having pointers to free'd memory. 55 | x 56 | } 57 | 58 | #' @export 59 | `xml_text<-.xml_node` <- function(x, value) { 60 | if (xml_type(x) != "text") { 61 | text_child <- xml_find_first(x, ".//text()[1]", ns = character()) 62 | if (inherits(text_child, "xml_missing")) { 63 | .Call(node_append_content, x$node, value) 64 | } else { 65 | .Call(node_set_content, text_child$node, value) 66 | } 67 | } else { 68 | .Call(node_set_content, x$node, value) 69 | } 70 | 71 | x 72 | } 73 | 74 | #' @export 75 | `xml_text<-.xml_missing` <- function(x, value) { 76 | NA_character_ 77 | } 78 | 79 | #' @export 80 | #' @rdname xml_text 81 | `xml_set_text` <- `xml_text<-` 82 | 83 | #' @rdname xml_text 84 | #' @export 85 | xml_double <- function(x) { 86 | as.numeric(xml_text(x)) 87 | } 88 | 89 | #' @rdname xml_text 90 | #' @export 91 | xml_integer <- function(x) { 92 | as.integer(xml_text(x)) 93 | } 94 | -------------------------------------------------------------------------------- /R/xml_type.R: -------------------------------------------------------------------------------- 1 | #' Determine the type of a node. 2 | #' 3 | #' @inheritParams xml_name 4 | #' @export 5 | #' @examples 6 | #' x <- read_xml(" a ") 7 | #' xml_type(x) 8 | #' xml_type(xml_contents(x)) 9 | xml_type <- function(x) { 10 | types <- .Call(node_type, x) 11 | xmlElementType[types] 12 | } 13 | 14 | xmlElementType <- c( 15 | "element", 16 | "attribute", 17 | "text", 18 | "cdata", 19 | "entity_ref", 20 | "entity", 21 | "pi", 22 | "comment", 23 | "document", 24 | "document_type", 25 | "document_frag", 26 | "notation", 27 | "html_document", 28 | "dtd", 29 | "element_decl", 30 | "attribute_decl", 31 | "entity_decl", 32 | "namespace_decl", 33 | "xinclude_start", 34 | "xinclude_end", 35 | "docb_document" 36 | ) 37 | -------------------------------------------------------------------------------- /R/xml_url.R: -------------------------------------------------------------------------------- 1 | #' The URL of an XML document 2 | #' 3 | #' This is useful for interpreting relative urls with [url_relative()]. 4 | #' 5 | #' @param x A node or document. 6 | #' @return A character vector of length 1. Returns `NA` if the name is 7 | #' not set. 8 | #' @export 9 | #' @examples 10 | #' catalog <- read_xml(xml2_example("cd_catalog.xml")) 11 | #' xml_url(catalog) 12 | #' 13 | #' x <- read_xml("") 14 | #' xml_url(x) 15 | xml_url <- function(x) { 16 | UseMethod("xml_url") 17 | } 18 | 19 | #' @export 20 | xml_url.xml_missing <- function(x) { 21 | NA_character_ 22 | } 23 | 24 | #' @export 25 | xml_url.xml_node <- function(x) { 26 | .Call(doc_url, x$doc) 27 | } 28 | 29 | #' @export 30 | xml_url.xml_nodeset <- function(x) { 31 | vapply(x, function(x) .Call(doc_url, x), character(1)) 32 | } 33 | 34 | #' Convert between relative and absolute urls. 35 | #' 36 | #' @param x A character vector of urls relative to that base 37 | #' @param base A string giving a base url. 38 | #' @return A character vector of urls 39 | #' @seealso \code{\link{xml_url}} to retrieve the URL associated with a document 40 | #' @export 41 | #' @examples 42 | #' url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d") 43 | #' 44 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz") 45 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz/") 46 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b") 47 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/") 48 | #' @export 49 | url_absolute <- function(x, base) { 50 | .Call(url_absolute_, x, base) 51 | } 52 | 53 | #' @rdname url_absolute 54 | #' @export 55 | url_relative <- function(x, base) { 56 | .Call(url_relative_, x, base) 57 | } 58 | 59 | #' Escape and unescape urls. 60 | #' 61 | #' @param x A character vector of urls. 62 | #' @param reserved A string containing additional characters to avoid escaping. 63 | #' @export 64 | #' @examples 65 | #' url_escape("a b c") 66 | #' url_escape("a b c", "") 67 | #' 68 | #' url_unescape("a%20b%2fc") 69 | #' url_unescape("%C2%B5") 70 | url_escape <- function(x, reserved = "") { 71 | .Call(url_escape_, x, reserved) 72 | } 73 | 74 | #' @rdname url_escape 75 | #' @export 76 | url_unescape <- function(x) { 77 | .Call(url_unescape_, x) 78 | } 79 | 80 | #' Parse a url into its component pieces. 81 | #' 82 | #' @param x A character vector of urls. 83 | #' @return A dataframe with one row for each element of \code{x} and 84 | #' columns: scheme, server, port, user, path, query, fragment. 85 | #' @export 86 | #' @examples 87 | #' url_parse("http://had.co.nz/") 88 | #' url_parse("http://had.co.nz:1234/") 89 | #' url_parse("http://had.co.nz:1234/?a=1&b=2") 90 | #' url_parse("http://had.co.nz:1234/?a=1&b=2#def") 91 | url_parse <- function(x) { 92 | .Call(url_parse_, x) 93 | } 94 | -------------------------------------------------------------------------------- /R/xml_write.R: -------------------------------------------------------------------------------- 1 | #' Write XML or HTML to disk. 2 | #' 3 | #' This writes out both XML and normalised HTML. The default behavior will 4 | #' output the same format which was read. If you want to force output pass 5 | #' `option = "as_xml"` or `option = "as_html"` respectively. 6 | #' 7 | #' @param x A document or node to write to disk. It's not possible to 8 | #' save nodesets containing more than one node. 9 | #' @param file Path to file or connection to write to. 10 | #' @param encoding The character encoding to use in the document. The default 11 | #' encoding is \sQuote{UTF-8}. Available encodings are specified at 12 | #' . 13 | #' @param options default: \sQuote{format}. Zero or more of 14 | #' \Sexpr[results=rd, stage=build]{xml2:::describe_options(xml2:::xml_save_options())} 15 | #' @param ... additional arguments passed to methods. 16 | #' @export 17 | #' @examples 18 | #' h <- read_html("

Hi!

") 19 | #' 20 | #' tmp <- tempfile(fileext = ".xml") 21 | #' write_xml(h, tmp, options = "format") 22 | #' readLines(tmp) 23 | #' 24 | #' # write formatted HTML output 25 | #' write_html(h, tmp, options = "format") 26 | #' readLines(tmp) 27 | write_xml <- function(x, file, ...) { 28 | UseMethod("write_xml") 29 | } 30 | 31 | #' @export 32 | write_xml.xml_missing <- function(x, file, ...) { 33 | cli::cli_abort("Missing data cannot be written.") 34 | } 35 | 36 | #' @rdname write_xml 37 | #' @export 38 | write_xml.xml_document <- function(x, file, ..., options = "format", encoding = "UTF-8") { 39 | options <- parse_options(options, xml_save_options()) 40 | file <- path_to_connection(file, check = "dir") 41 | 42 | if (inherits(file, "connection")) { 43 | if (!isOpen(file)) { 44 | open(file, "wb") 45 | on.exit(close(file)) 46 | } 47 | .Call(doc_write_connection, x$doc, file, encoding, options) 48 | } else { 49 | check_string(file) 50 | .Call(doc_write_file, x$doc, file, encoding, options) 51 | } 52 | 53 | invisible() 54 | } 55 | 56 | #' @export 57 | write_xml.xml_nodeset <- function(x, file, ..., options = "format", encoding = "UTF-8") { 58 | if (length(x) != 1) { 59 | cli::cli_abort("Can only save length 1 node sets.") 60 | } 61 | 62 | options <- parse_options(options, xml_save_options()) 63 | file <- path_to_connection(file, check = "dir") 64 | 65 | if (inherits(file, "connection")) { 66 | if (!isOpen(file)) { 67 | open(file, "wb") 68 | on.exit(close(file)) 69 | } 70 | .Call(node_write_connection, x[[1]]$node, file, encoding, options) 71 | } else { 72 | check_string(file) 73 | .Call(node_write_file, x[[1]]$node, file, encoding, options) 74 | } 75 | 76 | invisible() 77 | } 78 | 79 | #' @export 80 | write_xml.xml_node <- function(x, file, ..., options = "format", encoding = "UTF-8") { 81 | options <- parse_options(options, xml_save_options()) 82 | 83 | file <- path_to_connection(file, check = "dir") 84 | if (inherits(file, "connection")) { 85 | if (!isOpen(file)) { 86 | open(file, "wb") 87 | on.exit(close(file)) 88 | } 89 | .Call(node_write_connection, x$node, file, encoding, options) 90 | } else { 91 | check_string(file) 92 | .Call(node_write_file, x$node, file, encoding, options) 93 | } 94 | 95 | invisible() 96 | } 97 | 98 | 99 | #' @export 100 | #' @rdname write_xml 101 | write_html <- function(x, file, ...) { 102 | UseMethod("write_html") 103 | } 104 | 105 | #' @export 106 | write_html.xml_missing <- function(x, file, ...) { 107 | cli::cli_abort("Missing data cannot be written.") 108 | } 109 | 110 | #' @rdname write_xml 111 | #' @export 112 | write_html.xml_document <- write_xml.xml_document 113 | 114 | #' @export 115 | write_html.xml_nodeset <- write_xml.xml_nodeset 116 | 117 | #' @export 118 | write_html.xml_node <- write_xml.xml_node 119 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onUnload <- function(libpath) { 2 | gc() # trigger finalisers 3 | library.dynam.unload("xml2", libpath) 4 | } 5 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # xml2 17 | 18 | 19 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/xml2)](https://cran.r-project.org/package=xml2) 20 | [![Codecov test coverage](https://codecov.io/gh/r-lib/xml2/branch/master/graph/badge.svg)](https://app.codecov.io/gh/r-lib/xml2?branch=main) 21 | [![R build status](https://github.com/r-lib/xml2/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/xml2/actions) 22 | [![R-CMD-check](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml) 23 | 24 | 25 | The xml2 package is a binding to [libxml2](http://xmlsoft.org), making it easy to work with HTML and XML from R. The API is somewhat inspired by [jQuery](https://jquery.com). 26 | 27 | ## Installation 28 | 29 | You can install xml2 from CRAN, 30 | 31 | ```r 32 | install.packages("xml2") 33 | ``` 34 | 35 | or you can install the development version from github, using `pak`: 36 | 37 | ```r 38 | # install.packages("pak") 39 | pak::pak("r-lib/xml2") 40 | ``` 41 | 42 | ## Usage 43 | 44 | ```r 45 | library(xml2) 46 | x <- read_xml(" text ") 47 | x 48 | 49 | xml_name(x) 50 | xml_children(x) 51 | xml_text(x) 52 | xml_find_all(x, ".//baz") 53 | 54 | h <- read_html("

Hi !") 55 | h 56 | xml_name(h) 57 | xml_text(h) 58 | ``` 59 | 60 | There are three key classes: 61 | 62 | * `xml_node`: a single node in a document. 63 | 64 | * `xml_doc`: the complete document. Acting on a document is usually the same 65 | as acting on the root node of the document. 66 | 67 | * `xml_nodeset`: a __set__ of nodes within the document. Operations on 68 | `xml_nodeset`s are vectorised, apply the operation over each node in the set. 69 | 70 | ## Compared to the XML package 71 | 72 | xml2 has similar goals to the XML package. The main differences are: 73 | 74 | * xml2 takes care of memory management for you. It will automatically 75 | free the memory used by an XML document as soon as the last reference 76 | to it goes away. 77 | 78 | * xml2 has a very simple class hierarchy so you don't need to think about exactly 79 | what type of object you have, xml2 will just do the right thing. 80 | 81 | * More convenient handling of namespaces in Xpath expressions - see `xml_ns()` 82 | and `xml_ns_strip()` to get started. 83 | 84 | ## Code of Conduct 85 | 86 | Please note that the xml2 project is released with a [Contributor Code of Conduct](https://xml2.r-lib.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # xml2 5 | 6 | 7 | 8 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/xml2)](https://cran.r-project.org/package=xml2) 9 | [![Codecov test 10 | coverage](https://codecov.io/gh/r-lib/xml2/branch/master/graph/badge.svg)](https://app.codecov.io/gh/r-lib/xml2?branch=main) 11 | [![R build 12 | status](https://github.com/r-lib/xml2/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/xml2/actions) 13 | [![R-CMD-check](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml) 14 | 15 | 16 | The xml2 package is a binding to [libxml2](http://xmlsoft.org), making 17 | it easy to work with HTML and XML from R. The API is somewhat inspired 18 | by [jQuery](https://jquery.com). 19 | 20 | ## Installation 21 | 22 | You can install xml2 from CRAN, 23 | 24 | ``` r 25 | install.packages("xml2") 26 | ``` 27 | 28 | or you can install the development version from github, using 29 | `pak`: 30 | 31 | ``` r 32 | # install.packages("pak") 33 | pak::pak("r-lib/xml2") 34 | ``` 35 | 36 | ## Usage 37 | 38 | ``` r 39 | library(xml2) 40 | x <- read_xml(" text ") 41 | x 42 | 43 | xml_name(x) 44 | xml_children(x) 45 | xml_text(x) 46 | xml_find_all(x, ".//baz") 47 | 48 | h <- read_html("

Hi !") 49 | h 50 | xml_name(h) 51 | xml_text(h) 52 | ``` 53 | 54 | There are three key classes: 55 | 56 | - `xml_node`: a single node in a document. 57 | 58 | - `xml_doc`: the complete document. Acting on a document is usually the 59 | same as acting on the root node of the document. 60 | 61 | - `xml_nodeset`: a **set** of nodes within the document. Operations on 62 | `xml_nodeset`s are vectorised, apply the operation over each node in 63 | the set. 64 | 65 | ## Compared to the XML package 66 | 67 | xml2 has similar goals to the XML package. The main differences are: 68 | 69 | - xml2 takes care of memory management for you. It will automatically 70 | free the memory used by an XML document as soon as the last reference 71 | to it goes away. 72 | 73 | - xml2 has a very simple class hierarchy so you don’t need to think 74 | about exactly what type of object you have, xml2 will just do the 75 | right thing. 76 | 77 | - More convenient handling of namespaces in Xpath expressions - see 78 | `xml_ns()` and `xml_ns_strip()` to get started. 79 | 80 | ## Code of Conduct 81 | 82 | Please note that the xml2 project is released with a [Contributor Code 83 | of Conduct](https://xml2.r-lib.org/CODE_OF_CONDUCT.html). By 84 | contributing to this project, you agree to abide by its terms. 85 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: http://xml2.r-lib.org 2 | 3 | template: 4 | bootstrap: 5 5 | package: tidytemplate 6 | 7 | includes: 8 | in_header: | 9 | 10 | 11 | development: 12 | mode: auto 13 | 14 | reference: 15 | - title: Read and write documents 16 | contents: 17 | - starts_with("read_") 18 | - starts_with("write_") 19 | - starts_with("download_") 20 | 21 | - title: Class coercion 22 | contents: 23 | - starts_with("as_") 24 | 25 | - title: URL manipulation 26 | contents: 27 | - starts_with("url_") 28 | 29 | - title: Create and modify and document 30 | contents: 31 | - starts_with("xml_new") 32 | - starts_with("xml_add") 33 | - starts_with("xml_set") 34 | - xml_cdata 35 | - xml_comment 36 | - xml_dtd 37 | - xml_ns_strip 38 | - xml_replace 39 | - xml_remove 40 | 41 | - title: Search and navigate a document 42 | contents: 43 | - starts_with("xml_find") 44 | - starts_with("xml_attr") 45 | - xml_path 46 | 47 | - title: Inspect a document 48 | contents: 49 | - starts_with("xml_ns") 50 | - xml_children 51 | - ends_with("structure") 52 | - xml_type 53 | - xml_url 54 | - xml_validate 55 | 56 | - title: Utilities 57 | contents: 58 | - ends_with("serialize") 59 | - xml2_example 60 | 61 | news: 62 | releases: 63 | - text: Version 1.0.0 64 | href: https://www.rstudio.com/blog/xml2-1-0-0/ 65 | - text: Version 1.1.1 66 | href: https://www.rstudio.com/blog/xml-1-1-1/ 67 | -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | rm -f src/Makevars configure.log 3 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Anticonf (tm) script by Jeroen Ooms (2015) 3 | # This script will query 'pkg-config' for the required cflags and ldflags. 4 | # If pkg-config is unavailable or does not find the library, try setting 5 | # INCLUDE_DIR and LIB_DIR manually via e.g: 6 | # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib' 7 | 8 | # Library settings 9 | PKG_CONFIG_NAME="libxml-2.0" 10 | PKG_DEB_NAME="libxml2-dev" 11 | PKG_RPM_NAME="libxml2-devel" 12 | PKG_CSW_NAME="libxml2_dev" 13 | PKG_TEST_HEADER="" 14 | PKG_LIBS="-lxml2" 15 | 16 | # Note that cflags may be empty in case of success 17 | if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then 18 | echo "Found INCLUDE_DIR and/or LIB_DIR!" 19 | PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS" 20 | PKG_LIBS="-L$LIB_DIR $PKG_LIBS" 21 | else 22 | # Use xml2-config if available 23 | xml2-config --version >/dev/null 2>&1 24 | if [ $? -eq 0 ]; then 25 | PKGCONFIG_CFLAGS=`xml2-config --cflags` 26 | PKGCONFIG_LIBS=`xml2-config --libs` 27 | 28 | # Fix a missing libxml2 directory on the requested include directory 29 | # https://github.com/r-lib/xml2/issues/296 30 | if [ `uname` = "Darwin" ] && echo "${PKGCONFIG_CFLAGS}" | grep -sq "/usr/include$"; then 31 | PKGCONFIG_CFLAGS="$PKGCONFIG_CFLAGS/libxml2" 32 | fi 33 | 34 | else 35 | pkg-config --version >/dev/null 2>&1 36 | if [ $? -eq 0 ]; then 37 | PKGCONFIG_CFLAGS=`pkg-config --cflags $PKG_CONFIG_NAME` 38 | PKGCONFIG_LIBS=`pkg-config --libs $PKG_CONFIG_NAME` 39 | fi 40 | fi 41 | 42 | if [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then 43 | echo "Found pkg-config cflags and libs!" 44 | PKG_CFLAGS=${PKGCONFIG_CFLAGS} 45 | PKG_LIBS=${PKGCONFIG_LIBS} 46 | fi 47 | fi 48 | 49 | # Find compiler 50 | CC=`${R_HOME}/bin/R CMD config CC` 51 | CFLAGS=`${R_HOME}/bin/R CMD config CFLAGS` 52 | CPPFLAGS=`${R_HOME}/bin/R CMD config CPPFLAGS` 53 | 54 | # For debugging 55 | echo "Using PKG_CFLAGS=$PKG_CFLAGS" 56 | echo "Using PKG_LIBS=$PKG_LIBS" 57 | 58 | # Test configuration 59 | echo "#include $PKG_TEST_HEADER" | ${CC} ${CPPFLAGS} ${PKG_CFLAGS} ${CFLAGS} -E -xc - >/dev/null 2>configure.log 60 | 61 | # Customize the error 62 | if [ $? -ne 0 ]; then 63 | echo "------------------------- ANTICONF ERROR ---------------------------" 64 | echo "Configuration failed because $PKG_CONFIG_NAME was not found. Try installing:" 65 | echo " * deb: $PKG_DEB_NAME (Debian, Ubuntu, etc)" 66 | echo " * rpm: $PKG_RPM_NAME (Fedora, CentOS, RHEL)" 67 | echo " * csw: $PKG_CSW_NAME (Solaris)" 68 | echo "If $PKG_CONFIG_NAME is already installed, check that 'pkg-config' is in your" 69 | echo "PATH and PKG_CONFIG_PATH contains a $PKG_CONFIG_NAME.pc file. If pkg-config" 70 | echo "is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:" 71 | echo "R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'" 72 | echo "-------------------------- [ERROR MESSAGE] ---------------------------" 73 | cat configure.log 74 | echo "--------------------------------------------------------------------" 75 | exit 1 76 | fi 77 | 78 | # Write to Makevars 79 | sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars 80 | 81 | # Success 82 | exit 0 83 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/xml2/bf5619bbb6452d1f23cd88a9e0960d77e98a0d7b/configure.win -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 note 4 | 5 | ## revdepcheck results 6 | 7 | We checked 570 reverse dependencies (526 from CRAN + 44 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package. 8 | 9 | * We saw 0 new problems 10 | * We failed to check 0 packages 11 | -------------------------------------------------------------------------------- /docker/r-devel-san/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rocker/r-devel-san 2 | 3 | RUN apt-get -qq update \ 4 | && apt-get -qq dist-upgrade -y \ 5 | && apt-get -qq install git pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev -y \ 6 | && RD -e 'install.packages(c("Rcpp", "BH", "httr", "testthat", "magrittr", "knitr", "rmarkdown", "covr"), quiet = T)' 7 | 8 | RUN git clone https://github.com/hadley/xml2 \ 9 | && RD CMD build xml2 --no-build-vignettes \ 10 | && RD CMD INSTALL xml2_*.tar.gz --install-tests 11 | 12 | RUN RD -e 'testthat::test_package("xml2"); q("no");' || true 13 | 14 | RUN RD CMD check xml2_*.tar.gz 15 | -------------------------------------------------------------------------------- /inst/extdata/order-doc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Alice Smith 5 | 123 Maple Street 6 | Mill Valley 7 | CA 8 | 90952 9 | 10 | 11 | Robert Smith 12 | 8 Oak Avenue 13 | Old Town 14 | PA 15 | 95819 16 | 17 | Hurry, my lawn is going wild! 18 | 19 | 20 | Lawnmower 21 | 1 22 | 148.95 23 | Confirm this is electric 24 | 25 | 26 | Baby Monitor 27 | 1 28 | 39.98 29 | 1999-05-21 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /inst/extdata/order-schema.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Purchase order schema for Example.com. 6 | Copyright 2000 Example.com. All rights reserved. 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | Purchase order schema for Example.Microsoft.com. 28 | Copyright 2001 Example.Microsoft.com. All rights reserved. 29 | 30 | 31 | Application info. 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /inst/extdata/r-project.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | R: The R Project for Statistical Computing 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 | 24 |

25 |
26 | 74 |
75 |

The R Project for Statistical Computing

76 |

Getting Started

77 |

R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To download R, please choose your preferred CRAN mirror.

78 |

If you have questions about R like how to download and install the software, or what the license terms are, please read our answers to frequently asked questions before you send an email.

79 |

News

80 |
    81 |
  • R 3.2.0 (Full of Ingredients) prerelease versions will appear starting March 19. Final release is scheduled for 2015-04-16.

  • 82 |
  • R version 3.1.3 (Smooth Sidewalk) has been released on 2015-03-09.

  • 83 |
  • The R Journal Volume 6/2 is available.

  • 84 |
  • R version 3.1.2 (Pumpkin Helmet) has been released on 2014-10-31.

  • 85 |
  • useR! 2015, will take place at the University of Aalborg, Denmark, June 30 - July 3, 2015.

  • 86 |
  • useR! 2014, took place at the University of California, Los Angeles, USA June 30 - July 3, 2014.

  • 87 |
88 | 91 |
92 |
93 | 96 |
97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /inst/include/xml2_types.h: -------------------------------------------------------------------------------- 1 | #ifndef __XML2_XML2_TYPES__ 2 | #define __XML2_XML2_TYPES__ 3 | 4 | #include 5 | #include 6 | 7 | template class XPtr { 8 | protected: 9 | SEXP data_; 10 | 11 | public: 12 | XPtr(SEXP x) : data_(x) { 13 | if (TYPEOF(data_) != EXTPTRSXP) { 14 | Rf_error("Expecting an external pointer: [type=%s]", Rf_type2char(TYPEOF(data_))); 15 | } 16 | R_PreserveObject(data_); 17 | } 18 | 19 | XPtr(T* p) { 20 | data_ = R_MakeExternalPtr((void *) p, R_NilValue, R_NilValue); 21 | R_PreserveObject(data_); 22 | } 23 | 24 | XPtr(const XPtr &old) { 25 | data_ = old.data_; 26 | R_PreserveObject(data_); 27 | } 28 | 29 | XPtr& operator=(const XPtr &other) { 30 | R_PreserveObject(other.data_); 31 | if (data_ != NULL) { 32 | R_ReleaseObject(data_); 33 | } 34 | data_ = other.data_; 35 | return *this; 36 | } 37 | 38 | operator SEXP() const { return data_; } 39 | 40 | T* get() const { 41 | return (T*)(R_ExternalPtrAddr(data_)); 42 | } 43 | 44 | T* checked_get() const { 45 | T* ptr = get(); 46 | if (ptr == NULL) { 47 | Rf_error("external pointer is not valid"); 48 | } 49 | return ptr; 50 | } 51 | 52 | operator T*() { 53 | return checked_get(); 54 | } 55 | 56 | T* operator->() const { 57 | return checked_get(); 58 | } 59 | 60 | ~XPtr() { 61 | R_ReleaseObject(data_); 62 | } 63 | }; 64 | 65 | 66 | class XPtrDoc : public ::XPtr { 67 | static void finalizeXPtrDoc(SEXP p) { 68 | if (TYPEOF(p) != EXTPTRSXP) { 69 | return; 70 | } 71 | 72 | xmlDoc* ptr = (xmlDoc*) R_ExternalPtrAddr(p); 73 | 74 | if (ptr == NULL) { 75 | return; 76 | } 77 | 78 | R_ClearExternalPtr(p); 79 | 80 | xmlFreeDoc(ptr); 81 | } 82 | 83 | public: 84 | XPtrDoc(xmlDoc* p) : ::XPtr(p) { 85 | R_RegisterCFinalizerEx(data_, finalizeXPtrDoc, (Rboolean) false); 86 | } 87 | 88 | XPtrDoc(SEXP x) : ::XPtr(x) {} 89 | }; 90 | 91 | typedef ::XPtr XPtrNode; 92 | typedef ::XPtr XPtrNs; 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /man/as_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_list.R 3 | \name{as_list} 4 | \alias{as_list} 5 | \title{Coerce xml nodes to a list.} 6 | \usage{ 7 | as_list(x, ns = character(), ...) 8 | } 9 | \arguments{ 10 | \item{x}{A document, node, or node set.} 11 | 12 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced 13 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly 14 | qualified with the ns prefix, i.e. if the element \code{bar} is defined 15 | in namespace \code{foo}, it will be called \code{foo:bar}. (And 16 | similarly for attributes). Default namespaces must be given an explicit 17 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and 18 | \code{\link[=xml_set_name]{xml_set_name()}}.} 19 | 20 | \item{...}{Needed for compatibility with generic. Unused.} 21 | } 22 | \description{ 23 | This turns an XML document (or node or nodeset) into the equivalent R 24 | list. Note that this is \code{as_list()}, not \code{as.list()}: 25 | \code{lapply()} automatically calls \code{as.list()} on its inputs, so 26 | we can't override the default. 27 | } 28 | \details{ 29 | \code{as_list} currently only handles the four most common types of 30 | children that an element might have: 31 | 32 | \itemize{ 33 | \item Other elements, converted to lists. 34 | \item Attributes, stored as R attributes. Attributes that have special meanings in R 35 | (\code{\link[=class]{class()}}, \code{\link[=comment]{comment()}}, \code{\link[=dim]{dim()}}, 36 | \code{\link[=dimnames]{dimnames()}}, \code{\link[=names]{names()}}, \code{\link[=row.names]{row.names()}} and 37 | \code{\link[=tsp]{tsp()}}) are escaped with '.' 38 | \item Text, stored as a character vector. 39 | } 40 | } 41 | \examples{ 42 | as_list(read_xml(" a ]]>")) 43 | as_list(read_xml(" ")) 44 | as_list(read_xml("")) 45 | as_list(read_xml("")) 46 | } 47 | -------------------------------------------------------------------------------- /man/as_xml_document.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_xml_document.R 3 | \name{as_xml_document} 4 | \alias{as_xml_document} 5 | \title{Coerce a R list to xml nodes.} 6 | \usage{ 7 | as_xml_document(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A document, node, or node set.} 11 | 12 | \item{...}{Needed for compatibility with generic. Unused.} 13 | } 14 | \description{ 15 | This turns an R list into the equivalent XML document. Not all R lists will 16 | produce valid XML, in particular there can only be one root node and all 17 | child nodes need to be named (or empty) lists. R attributes become XML 18 | attributes and R names become XML node names. 19 | } 20 | \examples{ 21 | as_xml_document(list(x = list())) 22 | 23 | # Nesting multiple nodes 24 | as_xml_document(list(foo = list(bar = list(baz = list())))) 25 | 26 | # attributes are stored as R attributes 27 | as_xml_document(list(foo = structure(list(), id = "a"))) 28 | as_xml_document(list(foo = list( 29 | bar = structure(list(), id = "a"), 30 | bar = structure(list(), id = "b") 31 | ))) 32 | } 33 | -------------------------------------------------------------------------------- /man/download_xml.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_parse.R 3 | \name{download_xml} 4 | \alias{download_xml} 5 | \alias{download_html} 6 | \title{Download a HTML or XML file} 7 | \usage{ 8 | download_xml( 9 | url, 10 | file = basename(url), 11 | quiet = TRUE, 12 | mode = "wb", 13 | handle = curl::new_handle() 14 | ) 15 | 16 | download_html( 17 | url, 18 | file = basename(url), 19 | quiet = TRUE, 20 | mode = "wb", 21 | handle = curl::new_handle() 22 | ) 23 | } 24 | \arguments{ 25 | \item{url}{A character string naming the URL of a resource to be downloaded.} 26 | 27 | \item{file}{A character string with the name where the downloaded file is 28 | saved.} 29 | 30 | \item{quiet}{If \code{TRUE}, suppress status messages (if any), and the 31 | progress bar.} 32 | 33 | \item{mode}{A character string specifying the mode with which to write the file. 34 | Useful values are \code{"w"}, \code{"wb"} (binary), \code{"a"} (append) 35 | and \code{"ab"}.} 36 | 37 | \item{handle}{a curl handle object} 38 | } 39 | \value{ 40 | Path of downloaded file (invisibly). 41 | } 42 | \description{ 43 | Libcurl implementation of \code{C_download} (the "internal" download method) 44 | with added support for https, ftps, gzip, etc. Default behavior is identical 45 | to \code{\link[=download.file]{download.file()}}, but request can be fully configured by passing 46 | a custom \code{\link[curl:handle]{curl::handle()}}. 47 | } 48 | \details{ 49 | The main difference between \code{curl_download} and \code{curl_fetch_disk} 50 | is that \code{curl_download} checks the http status code before starting the 51 | download, and raises an error when status is non-successful. The behavior of 52 | \code{curl_fetch_disk} on the other hand is to proceed as normal and write 53 | the error page to disk in case of a non success response. 54 | 55 | For a more advanced download interface which supports concurrent requests and 56 | resuming large files, have a look at the \link[curl]{multi_download} function. 57 | } 58 | \examples{ 59 | \dontrun{ 60 | download_html("http://tidyverse.org/index.html") 61 | } 62 | } 63 | \seealso{ 64 | \link[curl:curl_download]{curl_download} 65 | } 66 | -------------------------------------------------------------------------------- /man/figures/lifecycle-archived.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: archived 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | archived 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-defunct.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: defunct 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | defunct 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: experimental 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | experimental 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-maturing.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: maturing 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | maturing 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-questioning.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: questioning 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | questioning 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-soft-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: soft-deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | soft-deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: stable 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | lifecycle 21 | 22 | 25 | 26 | stable 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: superseded 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | superseded 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/oldclass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S4.R 3 | \name{xml_document-class} 4 | \alias{xml_document-class} 5 | \alias{xml_missing-class} 6 | \alias{xml_node-class} 7 | \alias{xml_nodeset-class} 8 | \title{Register S4 classes} 9 | \description{ 10 | Classes are exported so they can be re-used within S4 classes, see \code{\link[methods:setOldClass]{methods::setOldClass()}}. 11 | \itemize{ 12 | \item \code{xml_document}: a complete document. 13 | \item \code{xml_nodeset}: a \emph{set} of nodes within a document. 14 | \item \code{xml_missing}: a missing object, e.g. for an empty result set. 15 | \item \code{xml_node}: a single node in a document. 16 | } 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/read_xml.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_parse.R 3 | \name{read_xml} 4 | \alias{read_xml} 5 | \alias{read_html} 6 | \alias{read_xml.character} 7 | \alias{read_xml.raw} 8 | \alias{read_xml.connection} 9 | \title{Read HTML or XML.} 10 | \usage{ 11 | read_xml(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS") 12 | 13 | read_html(x, encoding = "", ..., options = c("RECOVER", "NOERROR", "NOBLANKS")) 14 | 15 | \method{read_xml}{character}(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS") 16 | 17 | \method{read_xml}{raw}( 18 | x, 19 | encoding = "", 20 | base_url = "", 21 | ..., 22 | as_html = FALSE, 23 | options = "NOBLANKS" 24 | ) 25 | 26 | \method{read_xml}{connection}( 27 | x, 28 | encoding = "", 29 | n = 64 * 1024, 30 | verbose = FALSE, 31 | ..., 32 | base_url = "", 33 | as_html = FALSE, 34 | options = "NOBLANKS" 35 | ) 36 | } 37 | \arguments{ 38 | \item{x}{A string, a connection, or a raw vector. 39 | 40 | A string can be either a path, a url or literal xml. Urls will 41 | be converted into connections either using \code{base::url} or, if 42 | installed, \code{curl::curl}. Local paths ending in \code{.gz}, 43 | \code{.bz2}, \code{.xz}, \code{.zip} will be automatically uncompressed. 44 | 45 | If a connection, the complete connection is read into a raw vector before 46 | being parsed.} 47 | 48 | \item{encoding}{Specify a default encoding for the document. Unless 49 | otherwise specified XML documents are assumed to be in UTF-8 or 50 | UTF-16. If the document is not UTF-8/16, and lacks an explicit 51 | encoding directive, this allows you to supply a default.} 52 | 53 | \item{...}{Additional arguments passed on to methods.} 54 | 55 | \item{as_html}{Optionally parse an xml file as if it's html.} 56 | 57 | \item{options}{Set parsing options for the libxml2 parser. Zero or more of 58 | \Sexpr[results=rd, stage=build]{xml2:::describe_options(xml2:::xml_parse_options())}} 59 | 60 | \item{base_url}{When loading from a connection, raw vector or literal 61 | html/xml, this allows you to specify a base url for the document. Base 62 | urls are used to turn relative urls into absolute urls.} 63 | 64 | \item{n}{If \code{file} is a connection, the number of bytes to read per 65 | iteration. Defaults to 64kb.} 66 | 67 | \item{verbose}{When reading from a slow connection, this prints some 68 | output on every iteration so you know its working.} 69 | } 70 | \value{ 71 | An XML document. HTML is normalised to valid XML - this may not 72 | be exactly the same transformation performed by the browser, but it's 73 | a reasonable approximation. 74 | } 75 | \description{ 76 | Read HTML or XML. 77 | } 78 | \section{Setting the "user agent" header}{ 79 | 80 | 81 | When performing web scraping tasks it is both good practice --- and often required --- 82 | to set the \href{https://en.wikipedia.org/wiki/User_agent}{user agent} request header 83 | to a specific value. Sometimes this value is assigned to emulate a browser in order 84 | to have content render in a certain way (e.g. \verb{Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0} to emulate more recent Windows browsers). Most often, 85 | this value should be set to provide the web resource owner information on who you are 86 | and the intent of your actions like this Google scraping bot user agent identifier: 87 | \verb{Googlebot/2.1 (+http://www.google.com/bot.html)}. 88 | 89 | You can set the HTTP user agent for URL-based requests using \code{\link[httr:set_config]{httr::set_config()}} and \code{\link[httr:user_agent]{httr::user_agent()}}: 90 | 91 | \code{httr::set_config(httr::user_agent("me@example.com; +https://example.com/info.html"))} 92 | 93 | \code{\link[httr:set_config]{httr::set_config()}} changes the configuration globally, 94 | \code{\link[httr:with_config]{httr::with_config()}} can be used to change configuration temporarily. 95 | } 96 | 97 | \examples{ 98 | # Literal xml/html is useful for small examples 99 | read_xml("") 100 | read_html("Hi<title></html>") 101 | read_html("<html><title>Hi") 102 | 103 | # From a local path 104 | read_html(system.file("extdata", "r-project.html", package = "xml2")) 105 | 106 | \dontrun{ 107 | # From a url 108 | cd <- read_xml(xml2_example("cd_catalog.xml")) 109 | me <- read_html("http://had.co.nz") 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /man/url_absolute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_url.R 3 | \name{url_absolute} 4 | \alias{url_absolute} 5 | \alias{url_relative} 6 | \title{Convert between relative and absolute urls.} 7 | \usage{ 8 | url_absolute(x, base) 9 | 10 | url_relative(x, base) 11 | } 12 | \arguments{ 13 | \item{x}{A character vector of urls relative to that base} 14 | 15 | \item{base}{A string giving a base url.} 16 | } 17 | \value{ 18 | A character vector of urls 19 | } 20 | \description{ 21 | Convert between relative and absolute urls. 22 | } 23 | \examples{ 24 | url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d") 25 | 26 | url_relative("http://hadley.nz/a/c", "http://hadley.nz") 27 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/") 28 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b") 29 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/") 30 | } 31 | \seealso{ 32 | \code{\link{xml_url}} to retrieve the URL associated with a document 33 | } 34 | -------------------------------------------------------------------------------- /man/url_escape.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_url.R 3 | \name{url_escape} 4 | \alias{url_escape} 5 | \alias{url_unescape} 6 | \title{Escape and unescape urls.} 7 | \usage{ 8 | url_escape(x, reserved = "") 9 | 10 | url_unescape(x) 11 | } 12 | \arguments{ 13 | \item{x}{A character vector of urls.} 14 | 15 | \item{reserved}{A string containing additional characters to avoid escaping.} 16 | } 17 | \description{ 18 | Escape and unescape urls. 19 | } 20 | \examples{ 21 | url_escape("a b c") 22 | url_escape("a b c", "") 23 | 24 | url_unescape("a\%20b\%2fc") 25 | url_unescape("\%C2\%B5") 26 | } 27 | -------------------------------------------------------------------------------- /man/url_parse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_url.R 3 | \name{url_parse} 4 | \alias{url_parse} 5 | \title{Parse a url into its component pieces.} 6 | \usage{ 7 | url_parse(x) 8 | } 9 | \arguments{ 10 | \item{x}{A character vector of urls.} 11 | } 12 | \value{ 13 | A dataframe with one row for each element of \code{x} and 14 | columns: scheme, server, port, user, path, query, fragment. 15 | } 16 | \description{ 17 | Parse a url into its component pieces. 18 | } 19 | \examples{ 20 | url_parse("http://had.co.nz/") 21 | url_parse("http://had.co.nz:1234/") 22 | url_parse("http://had.co.nz:1234/?a=1&b=2") 23 | url_parse("http://had.co.nz:1234/?a=1&b=2#def") 24 | } 25 | -------------------------------------------------------------------------------- /man/write_xml.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_write.R 3 | \name{write_xml} 4 | \alias{write_xml} 5 | \alias{write_xml.xml_document} 6 | \alias{write_html} 7 | \alias{write_html.xml_document} 8 | \title{Write XML or HTML to disk.} 9 | \usage{ 10 | write_xml(x, file, ...) 11 | 12 | \method{write_xml}{xml_document}(x, file, ..., options = "format", encoding = "UTF-8") 13 | 14 | write_html(x, file, ...) 15 | 16 | \method{write_html}{xml_document}(x, file, ..., options = "format", encoding = "UTF-8") 17 | } 18 | \arguments{ 19 | \item{x}{A document or node to write to disk. It's not possible to 20 | save nodesets containing more than one node.} 21 | 22 | \item{file}{Path to file or connection to write to.} 23 | 24 | \item{...}{additional arguments passed to methods.} 25 | 26 | \item{options}{default: \sQuote{format}. Zero or more of 27 | \Sexpr[results=rd, stage=build]{xml2:::describe_options(xml2:::xml_save_options())}} 28 | 29 | \item{encoding}{The character encoding to use in the document. The default 30 | encoding is \sQuote{UTF-8}. Available encodings are specified at 31 | \url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.} 32 | } 33 | \description{ 34 | This writes out both XML and normalised HTML. The default behavior will 35 | output the same format which was read. If you want to force output pass 36 | \code{option = "as_xml"} or \code{option = "as_html"} respectively. 37 | } 38 | \examples{ 39 | h <- read_html("<p>Hi!</p>") 40 | 41 | tmp <- tempfile(fileext = ".xml") 42 | write_xml(h, tmp, options = "format") 43 | readLines(tmp) 44 | 45 | # write formatted HTML output 46 | write_html(h, tmp, options = "format") 47 | readLines(tmp) 48 | } 49 | -------------------------------------------------------------------------------- /man/xml2-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml2-package.R 3 | \docType{package} 4 | \name{xml2-package} 5 | \alias{xml2} 6 | \alias{xml2-package} 7 | \title{xml2: Parse XML} 8 | \description{ 9 | Work with XML files using a simple, consistent interface. Built on top of the 'libxml2' C library. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://xml2.r-lib.org} 15 | \item \url{https://github.com/r-lib/xml2} 16 | \item Report bugs at \url{https://github.com/r-lib/xml2/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Hadley Wickham \email{hadley@posit.co} 22 | 23 | Authors: 24 | \itemize{ 25 | \item Jim Hester 26 | \item Jeroen Ooms 27 | } 28 | 29 | Other contributors: 30 | \itemize{ 31 | \item Posit Software, PBC [copyright holder, funder] 32 | \item R Foundation (Copy of R-project homepage cached as example) [contributor] 33 | } 34 | 35 | } 36 | \keyword{internal} 37 | -------------------------------------------------------------------------------- /man/xml2_example.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{xml2_example} 4 | \alias{xml2_example} 5 | \title{Get path to a xml2 example} 6 | \usage{ 7 | xml2_example(path = NULL) 8 | } 9 | \arguments{ 10 | \item{path}{Name of file. If \code{NULL}, the example files will be listed.} 11 | } 12 | \description{ 13 | xml2 comes bundled with a number of sample files in its \sQuote{inst/extdata} 14 | directory. This function makes them easy to access. 15 | } 16 | -------------------------------------------------------------------------------- /man/xml_attr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_attr.R 3 | \name{xml_attr} 4 | \alias{xml_attr} 5 | \alias{xml_has_attr} 6 | \alias{xml_attrs} 7 | \alias{xml_attr<-} 8 | \alias{xml_set_attr} 9 | \alias{xml_attrs<-} 10 | \alias{xml_set_attrs} 11 | \title{Retrieve an attribute.} 12 | \usage{ 13 | xml_attr(x, attr, ns = character(), default = NA_character_) 14 | 15 | xml_has_attr(x, attr, ns = character()) 16 | 17 | xml_attrs(x, ns = character()) 18 | 19 | xml_attr(x, attr, ns = character()) <- value 20 | 21 | xml_set_attr(x, attr, value, ns = character()) 22 | 23 | xml_attrs(x, ns = character()) <- value 24 | 25 | xml_set_attrs(x, value, ns = character()) 26 | } 27 | \arguments{ 28 | \item{x}{A document, node, or node set.} 29 | 30 | \item{attr}{Name of attribute to extract.} 31 | 32 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced 33 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly 34 | qualified with the ns prefix, i.e. if the element \code{bar} is defined 35 | in namespace \code{foo}, it will be called \code{foo:bar}. (And 36 | similarly for attributes). Default namespaces must be given an explicit 37 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and 38 | \code{\link[=xml_set_name]{xml_set_name()}}.} 39 | 40 | \item{default}{Default value to use when attribute is not present.} 41 | 42 | \item{value}{character vector of new value.} 43 | } 44 | \value{ 45 | \code{xml_attr()} returns a character vector. \code{NA} is used 46 | to represent of attributes that aren't defined. 47 | 48 | \code{xml_has_attr()} returns a logical vector. 49 | 50 | \code{xml_attrs()} returns a named character vector if \code{x} x is single 51 | node, or a list of character vectors if given a nodeset 52 | } 53 | \description{ 54 | \code{xml_attrs()} retrieves all attributes values as a named character 55 | vector, \verb{xml_attrs() <-} or \code{xml_set_attrs()} sets all attribute 56 | values. \code{xml_attr()} retrieves the value of single attribute and 57 | \verb{xml_attr() <-} or \code{xml_set_attr()} modifies its value. If the 58 | attribute doesn't exist, it will return \code{default}, which defaults to 59 | \code{NA}. \code{xml_has_attr()} tests if an attribute is present. 60 | } 61 | \examples{ 62 | x <- read_xml("<root id='1'><child id ='a' /><child id='b' d='b'/></root>") 63 | xml_attr(x, "id") 64 | xml_attr(x, "apple") 65 | xml_attrs(x) 66 | 67 | kids <- xml_children(x) 68 | kids 69 | xml_attr(kids, "id") 70 | xml_has_attr(kids, "id") 71 | xml_attrs(kids) 72 | 73 | # Missing attributes give missing values 74 | xml_attr(xml_children(x), "d") 75 | xml_has_attr(xml_children(x), "d") 76 | 77 | # If the document has a namespace, use the ns argument and 78 | # qualified attribute names 79 | x <- read_xml(' 80 | <root xmlns:b="http://bar.com" xmlns:f="http://foo.com"> 81 | <doc b:id="b" f:id="f" id="" /> 82 | </root> 83 | ') 84 | doc <- xml_children(x)[[1]] 85 | ns <- xml_ns(x) 86 | 87 | xml_attrs(doc) 88 | xml_attrs(doc, ns) 89 | 90 | # If you don't supply a ns spec, you get the first matching attribute 91 | xml_attr(doc, "id") 92 | xml_attr(doc, "b:id", ns) 93 | xml_attr(doc, "id", ns) 94 | 95 | # Can set a single attribute with `xml_attr() <-` or `xml_set_attr()` 96 | xml_attr(doc, "id") <- "one" 97 | xml_set_attr(doc, "id", "two") 98 | 99 | # Or set multiple attributes with `xml_attrs()` or `xml_set_attrs()` 100 | xml_attrs(doc) <- c("b:id" = "one", "f:id" = "two", "id" = "three") 101 | xml_set_attrs(doc, c("b:id" = "one", "f:id" = "two", "id" = "three")) 102 | } 103 | -------------------------------------------------------------------------------- /man/xml_cdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classes.R 3 | \name{xml_cdata} 4 | \alias{xml_cdata} 5 | \title{Construct a cdata node} 6 | \usage{ 7 | xml_cdata(content) 8 | } 9 | \arguments{ 10 | \item{content}{The CDATA content, does not include \verb{<![CDATA[}} 11 | } 12 | \description{ 13 | Construct a cdata node 14 | } 15 | \examples{ 16 | x <- xml_new_root("root") 17 | xml_add_child(x, xml_cdata("<d/>")) 18 | as.character(x) 19 | } 20 | -------------------------------------------------------------------------------- /man/xml_children.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_children.R 3 | \name{xml_children} 4 | \alias{xml_children} 5 | \alias{xml_child} 6 | \alias{xml_contents} 7 | \alias{xml_parents} 8 | \alias{xml_siblings} 9 | \alias{xml_parent} 10 | \alias{xml_length} 11 | \alias{xml_root} 12 | \title{Navigate around the family tree.} 13 | \usage{ 14 | xml_children(x) 15 | 16 | xml_child(x, search = 1, ns = xml_ns(x)) 17 | 18 | xml_contents(x) 19 | 20 | xml_parents(x) 21 | 22 | xml_siblings(x) 23 | 24 | xml_parent(x) 25 | 26 | xml_length(x, only_elements = TRUE) 27 | 28 | xml_root(x) 29 | } 30 | \arguments{ 31 | \item{x}{A document, node, or node set.} 32 | 33 | \item{search}{For \code{xml_child}, either the child number to return (by 34 | position), or the name of the child node to return. If there are multiple 35 | child nodes with the same name, the first will be returned} 36 | 37 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced 38 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly 39 | qualified with the ns prefix, i.e. if the element \code{bar} is defined 40 | in namespace \code{foo}, it will be called \code{foo:bar}. (And 41 | similarly for attributes). Default namespaces must be given an explicit 42 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and 43 | \code{\link[=xml_set_name]{xml_set_name()}}.} 44 | 45 | \item{only_elements}{For \code{xml_length}, should it count all children, 46 | or just children that are elements (the default)?} 47 | } 48 | \value{ 49 | A node or nodeset (possibly empty). Results are always de-duplicated. 50 | } 51 | \description{ 52 | \code{xml_children} returns only elements, \code{xml_contents} returns 53 | all nodes. \code{xml_length} returns the number of children. 54 | \code{xml_parent} returns the parent node, \code{xml_parents} 55 | returns all parents up to the root. \code{xml_siblings} returns all nodes 56 | at the same level. \code{xml_child} makes it easy to specify a specific 57 | child to return. 58 | } 59 | \examples{ 60 | x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>") 61 | xml_children(x) 62 | xml_children(xml_children(x)) 63 | xml_siblings(xml_children(x)[[1]]) 64 | 65 | # Note the each unique node only appears once in the output 66 | xml_parent(xml_children(x)) 67 | 68 | # Mixed content 69 | x <- read_xml("<foo> a <b/> c <d>e</d> f</foo>") 70 | # Childen gets the elements, contents gets all node types 71 | xml_children(x) 72 | xml_contents(x) 73 | 74 | xml_length(x) 75 | xml_length(x, only_elements = FALSE) 76 | 77 | # xml_child makes it easier to select specific children 78 | xml_child(x) 79 | xml_child(x, 2) 80 | xml_child(x, "baz") 81 | } 82 | -------------------------------------------------------------------------------- /man/xml_comment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classes.R 3 | \name{xml_comment} 4 | \alias{xml_comment} 5 | \title{Construct a comment node} 6 | \usage{ 7 | xml_comment(content) 8 | } 9 | \arguments{ 10 | \item{content}{The comment content} 11 | } 12 | \description{ 13 | Construct a comment node 14 | } 15 | \examples{ 16 | x <- xml_new_document() 17 | r <- xml_add_child(x, "root") 18 | xml_add_child(r, xml_comment("Hello!")) 19 | as.character(x) 20 | } 21 | -------------------------------------------------------------------------------- /man/xml_dtd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classes.R 3 | \name{xml_dtd} 4 | \alias{xml_dtd} 5 | \title{Construct a document type definition} 6 | \usage{ 7 | xml_dtd(name = "", external_id = "", system_id = "") 8 | } 9 | \arguments{ 10 | \item{name}{The name of the declaration} 11 | 12 | \item{external_id}{The external ID of the declaration} 13 | 14 | \item{system_id}{The system ID of the declaration} 15 | } 16 | \description{ 17 | This is used to create simple document type definitions. If you need to 18 | create a more complicated definition with internal subsets it is recommended 19 | to parse a string directly with \code{read_xml()}. 20 | } 21 | \examples{ 22 | r <- xml_new_root( 23 | xml_dtd( 24 | "html", 25 | "-//W3C//DTD XHTML 1.0 Transitional//EN", 26 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" 27 | ) 28 | ) 29 | 30 | # Use read_xml directly for more complicated DTD 31 | d <- read_xml( 32 | '<!DOCTYPE doc [ 33 | <!ELEMENT doc (#PCDATA)> 34 | <!ENTITY foo " test "> 35 | ]> 36 | <doc>This is a valid document &foo; !</doc>' 37 | ) 38 | } 39 | -------------------------------------------------------------------------------- /man/xml_find_all.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_find.R 3 | \name{xml_find_all} 4 | \alias{xml_find_all} 5 | \alias{xml_find_all.xml_nodeset} 6 | \alias{xml_find_first} 7 | \alias{xml_find_num} 8 | \alias{xml_find_int} 9 | \alias{xml_find_chr} 10 | \alias{xml_find_lgl} 11 | \alias{xml_find_one} 12 | \title{Find nodes that match an xpath expression.} 13 | \usage{ 14 | xml_find_all(x, xpath, ns = xml_ns(x), ...) 15 | 16 | \method{xml_find_all}{xml_nodeset}(x, xpath, ns = xml_ns(x), flatten = TRUE, ...) 17 | 18 | xml_find_first(x, xpath, ns = xml_ns(x)) 19 | 20 | xml_find_num(x, xpath, ns = xml_ns(x)) 21 | 22 | xml_find_int(x, xpath, ns = xml_ns(x)) 23 | 24 | xml_find_chr(x, xpath, ns = xml_ns(x)) 25 | 26 | xml_find_lgl(x, xpath, ns = xml_ns(x)) 27 | } 28 | \arguments{ 29 | \item{x}{A document, node, or node set.} 30 | 31 | \item{xpath}{A string containing an xpath (1.0) expression.} 32 | 33 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced 34 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly 35 | qualified with the ns prefix, i.e. if the element \code{bar} is defined 36 | in namespace \code{foo}, it will be called \code{foo:bar}. (And 37 | similarly for attributes). Default namespaces must be given an explicit 38 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and 39 | \code{\link[=xml_set_name]{xml_set_name()}}.} 40 | 41 | \item{...}{Further arguments passed to or from other methods.} 42 | 43 | \item{flatten}{A logical indicating whether to return a single, flattened 44 | nodeset or a list of nodesets.} 45 | } 46 | \value{ 47 | \code{xml_find_all} returns a nodeset if applied to a node, and a nodeset 48 | or a list of nodesets if applied to a nodeset. If there are no matches, 49 | the nodeset(s) will be empty. Within each nodeset, the result will always 50 | be unique; repeated nodes are automatically de-duplicated. 51 | 52 | \code{xml_find_first} returns a node if applied to a node, and a nodeset 53 | if applied to a nodeset. The output is \emph{always} the same size as 54 | the input. If there are no matches, \code{xml_find_first} will return a 55 | missing node; if there are multiple matches, it will return the first 56 | only. 57 | 58 | \code{xml_find_num}, \code{xml_find_chr}, \code{xml_find_lgl} return 59 | numeric, character and logical results respectively. 60 | } 61 | \description{ 62 | Xpath is like regular expressions for trees - it's worth learning if 63 | you're trying to extract nodes from arbitrary locations in a document. 64 | Use \code{xml_find_all} to find all matches - if there's no match you'll 65 | get an empty result. Use \code{xml_find_first} to find a specific match - 66 | if there's no match you'll get an \code{xml_missing} node. 67 | } 68 | \section{Deprecated functions}{ 69 | 70 | \code{xml_find_one()} has been deprecated. Instead use 71 | \code{xml_find_first()}. 72 | } 73 | 74 | \examples{ 75 | x <- read_xml("<foo><bar><baz/></bar><baz/></foo>") 76 | xml_find_all(x, ".//baz") 77 | xml_path(xml_find_all(x, ".//baz")) 78 | 79 | # Note the difference between .// and // 80 | # // finds anywhere in the document (ignoring the current node) 81 | # .// finds anywhere beneath the current node 82 | (bar <- xml_find_all(x, ".//bar")) 83 | xml_find_all(bar, ".//baz") 84 | xml_find_all(bar, "//baz") 85 | 86 | # Find all vs find one ----------------------------------------------------- 87 | x <- read_xml("<body> 88 | <p>Some <b>text</b>.</p> 89 | <p>Some <b>other</b> <b>text</b>.</p> 90 | <p>No bold here!</p> 91 | </body>") 92 | para <- xml_find_all(x, ".//p") 93 | 94 | # By default, if you apply xml_find_all to a nodeset, it finds all matches, 95 | # de-duplicates them, and returns as a single nodeset. This means you 96 | # never know how many results you'll get 97 | xml_find_all(para, ".//b") 98 | 99 | # If you set flatten to FALSE, though, xml_find_all will return a list of 100 | # nodesets, where each nodeset contains the matches for the corresponding 101 | # node in the original nodeset. 102 | xml_find_all(para, ".//b", flatten = FALSE) 103 | 104 | # xml_find_first only returns the first match per input node. If there are 0 105 | # matches it will return a missing node 106 | xml_find_first(para, ".//b") 107 | xml_text(xml_find_first(para, ".//b")) 108 | 109 | # Namespaces --------------------------------------------------------------- 110 | # If the document uses namespaces, you'll need use xml_ns to form 111 | # a unique mapping between full namespace url and a short prefix 112 | x <- read_xml(' 113 | <root xmlns:f = "http://foo.com" xmlns:g = "http://bar.com"> 114 | <f:doc><g:baz /></f:doc> 115 | <f:doc><g:baz /></f:doc> 116 | </root> 117 | ') 118 | xml_find_all(x, ".//f:doc") 119 | xml_find_all(x, ".//f:doc", xml_ns(x)) 120 | } 121 | \seealso{ 122 | \code{\link[=xml_ns_strip]{xml_ns_strip()}} to remove the default namespaces 123 | } 124 | -------------------------------------------------------------------------------- /man/xml_missing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_missing.R 3 | \name{xml_missing} 4 | \alias{xml_missing} 5 | \title{Construct an missing xml object} 6 | \usage{ 7 | xml_missing() 8 | } 9 | \description{ 10 | Construct an missing xml object 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/xml_name.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_name.R 3 | \name{xml_name} 4 | \alias{xml_name} 5 | \alias{xml_name<-} 6 | \alias{xml_set_name} 7 | \title{The (tag) name of an xml element.} 8 | \usage{ 9 | xml_name(x, ns = character()) 10 | 11 | xml_name(x, ns = character()) <- value 12 | 13 | xml_set_name(x, value, ns = character()) 14 | } 15 | \arguments{ 16 | \item{x}{A document, node, or node set.} 17 | 18 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced 19 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly 20 | qualified with the ns prefix, i.e. if the element \code{bar} is defined 21 | in namespace \code{foo}, it will be called \code{foo:bar}. (And 22 | similarly for attributes). Default namespaces must be given an explicit 23 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and 24 | \code{\link[=xml_set_name]{xml_set_name()}}.} 25 | 26 | \item{value}{a character vector with replacement name.} 27 | } 28 | \value{ 29 | A character vector. 30 | } 31 | \description{ 32 | The (tag) name of an xml element. 33 | 34 | Modify the (tag) name of an element 35 | } 36 | \examples{ 37 | x <- read_xml("<bar>123</bar>") 38 | xml_name(x) 39 | 40 | y <- read_xml("<bar><baz>1</baz>abc<foo /></bar>") 41 | z <- xml_children(y) 42 | xml_name(xml_children(y)) 43 | } 44 | -------------------------------------------------------------------------------- /man/xml_new_document.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_modify.R 3 | \name{xml_new_document} 4 | \alias{xml_new_document} 5 | \alias{xml_new_root} 6 | \title{Create a new document, possibly with a root node} 7 | \usage{ 8 | xml_new_document(version = "1.0", encoding = "UTF-8") 9 | 10 | xml_new_root( 11 | .value, 12 | ..., 13 | .copy = inherits(.value, "xml_node"), 14 | .version = "1.0", 15 | .encoding = "UTF-8" 16 | ) 17 | } 18 | \arguments{ 19 | \item{version}{The version number of the document.} 20 | 21 | \item{encoding}{The character encoding to use in the document. The default 22 | encoding is \sQuote{UTF-8}. Available encodings are specified at 23 | \url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.} 24 | 25 | \item{.value}{node to insert.} 26 | 27 | \item{...}{If named attributes or namespaces to set on the node, if unnamed 28 | text to assign to the node.} 29 | 30 | \item{.copy}{whether to copy the \code{.value} before replacing. If this is \code{FALSE} 31 | then the node will be moved from it's current location.} 32 | 33 | \item{.version}{The version number of the document, passed to \code{xml_new_document(version)}.} 34 | 35 | \item{.encoding}{The encoding of the document, passed to \code{xml_new_document(encoding)}.} 36 | } 37 | \value{ 38 | A \code{xml_document} object. 39 | } 40 | \description{ 41 | \code{xml_new_document} creates only a new document without a root node. In 42 | most cases you should instead use \code{xml_new_root}, which creates a new 43 | document and assigns the root node in one step. 44 | } 45 | -------------------------------------------------------------------------------- /man/xml_ns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_namespaces.R 3 | \name{xml_ns} 4 | \alias{xml_ns} 5 | \alias{xml_ns_rename} 6 | \title{XML namespaces.} 7 | \usage{ 8 | xml_ns(x) 9 | 10 | xml_ns_rename(old, ...) 11 | } 12 | \arguments{ 13 | \item{x}{A document, node, or node set.} 14 | 15 | \item{old, ...}{An existing xml_namespace object followed by name-value 16 | (old prefix-new prefix) pairs to replace.} 17 | } 18 | \value{ 19 | A character vector with class \code{xml_namespace} so the 20 | default display is a little nicer. 21 | } 22 | \description{ 23 | \code{xml_ns} extracts all namespaces from a document, matching each 24 | unique namespace url with the prefix it was first associated with. Default 25 | namespaces are named \code{d1}, \code{d2} etc. Use \code{xml_ns_rename} 26 | to change the prefixes. Once you have a namespace object, you can pass it to 27 | other functions to work with fully qualified names instead of local names. 28 | } 29 | \examples{ 30 | x <- read_xml(' 31 | <root> 32 | <doc1 xmlns = "http://foo.com"><baz /></doc1> 33 | <doc2 xmlns = "http://bar.com"><baz /></doc2> 34 | </root> 35 | ') 36 | xml_ns(x) 37 | 38 | # When there are default namespaces, it's a good idea to rename 39 | # them to give informative names: 40 | ns <- xml_ns_rename(xml_ns(x), d1 = "foo", d2 = "bar") 41 | ns 42 | 43 | # Now we can pass ns to other xml function to use fully qualified names 44 | baz <- xml_children(xml_children(x)) 45 | xml_name(baz) 46 | xml_name(baz, ns) 47 | 48 | xml_find_all(x, "//baz") 49 | xml_find_all(x, "//foo:baz", ns) 50 | 51 | str(as_list(x)) 52 | str(as_list(x, ns)) 53 | } 54 | -------------------------------------------------------------------------------- /man/xml_ns_strip.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_modify.R 3 | \name{xml_ns_strip} 4 | \alias{xml_ns_strip} 5 | \title{Strip the default namespaces from a document} 6 | \usage{ 7 | xml_ns_strip(x) 8 | } 9 | \arguments{ 10 | \item{x}{A document, node, or node set.} 11 | } 12 | \description{ 13 | Strip the default namespaces from a document 14 | } 15 | \examples{ 16 | x <- read_xml( 17 | "<foo xmlns = 'http://foo.com'> 18 | <baz/> 19 | <bar xmlns = 'http://bar.com'> 20 | <baz/> 21 | </bar> 22 | </foo>" 23 | ) 24 | # Need to specify the default namespaces to find the baz nodes 25 | xml_find_all(x, "//d1:baz") 26 | xml_find_all(x, "//d2:baz") 27 | 28 | # After stripping the default namespaces you can find both baz nodes directly 29 | xml_ns_strip(x) 30 | xml_find_all(x, "//baz") 31 | } 32 | -------------------------------------------------------------------------------- /man/xml_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_path.R 3 | \name{xml_path} 4 | \alias{xml_path} 5 | \title{Retrieve the xpath to a node} 6 | \usage{ 7 | xml_path(x) 8 | } 9 | \arguments{ 10 | \item{x}{A document, node, or node set.} 11 | } 12 | \value{ 13 | A character vector. 14 | } 15 | \description{ 16 | This is useful when you want to figure out where nodes matching an 17 | xpath expression live in a document. 18 | } 19 | \examples{ 20 | x <- read_xml("<foo><bar><baz /></bar><baz /></foo>") 21 | xml_path(xml_find_all(x, ".//baz")) 22 | } 23 | -------------------------------------------------------------------------------- /man/xml_replace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_modify.R 3 | \name{xml_replace} 4 | \alias{xml_replace} 5 | \alias{xml_add_sibling} 6 | \alias{xml_add_child} 7 | \alias{xml_add_parent} 8 | \alias{xml_remove} 9 | \title{Modify a tree by inserting, replacing or removing nodes} 10 | \usage{ 11 | xml_replace(.x, .value, ..., .copy = TRUE) 12 | 13 | xml_add_sibling(.x, .value, ..., .where = c("after", "before"), .copy = TRUE) 14 | 15 | xml_add_child(.x, .value, ..., .where = length(xml_children(.x)), .copy = TRUE) 16 | 17 | xml_add_parent(.x, .value, ...) 18 | 19 | xml_remove(.x, free = FALSE) 20 | } 21 | \arguments{ 22 | \item{.x}{a document, node or nodeset.} 23 | 24 | \item{.value}{node to insert.} 25 | 26 | \item{...}{If named attributes or namespaces to set on the node, if unnamed 27 | text to assign to the node.} 28 | 29 | \item{.copy}{whether to copy the \code{.value} before replacing. If this is \code{FALSE} 30 | then the node will be moved from it's current location.} 31 | 32 | \item{.where}{to add the new node, for \code{xml_add_child} the position 33 | after which to add, use \code{0} for the first child. For 34 | \code{xml_add_sibling} either \sQuote{"before"} or \sQuote{"after"} 35 | indicating if the new node should be before or after \code{.x}.} 36 | 37 | \item{free}{When removing the node also free the memory used for that node. 38 | Note if you use this option you cannot use any existing objects pointing to 39 | the node or its children, it is likely to crash R or return garbage.} 40 | } 41 | \description{ 42 | \code{xml_add_sibling()} and \code{xml_add_child()} are used to insert a node 43 | as a sibling or a child. \code{xml_add_parent()} adds a new parent in 44 | between the input node and the current parent. \code{xml_replace()} 45 | replaces an existing node with a new node. \code{xml_remove()} removes a 46 | node from the tree. 47 | } 48 | \details{ 49 | Care needs to be taken when using \code{xml_remove()}, 50 | } 51 | -------------------------------------------------------------------------------- /man/xml_serialize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_serialize.R 3 | \name{xml_serialize} 4 | \alias{xml_serialize} 5 | \alias{xml_unserialize} 6 | \title{Serializing XML objects to connections.} 7 | \usage{ 8 | xml_serialize(object, connection, ...) 9 | 10 | xml_unserialize(connection, ...) 11 | } 12 | \arguments{ 13 | \item{object}{\R object to serialize.} 14 | 15 | \item{connection}{an open \link[base]{connection} or (for \code{serialize}) 16 | \code{NULL} or (for \code{unserialize}) a raw vector 17 | (see \sQuote{Details}).} 18 | 19 | \item{...}{Additional arguments passed to \code{\link[=read_xml]{read_xml()}}.} 20 | } 21 | \value{ 22 | For \code{serialize}, \code{NULL} unless \code{connection = NULL}, when 23 | the result is returned in a raw vector. 24 | 25 | For \code{unserialize} an \R object. 26 | } 27 | \description{ 28 | Serializing XML objects to connections. 29 | } 30 | \examples{ 31 | library(xml2) 32 | x <- read_xml("<a> 33 | <b><c>123</c></b> 34 | <b><c>456</c></b> 35 | </a>") 36 | 37 | b <- xml_find_all(x, "//b") 38 | out <- xml_serialize(b, NULL) 39 | xml_unserialize(out) 40 | } 41 | -------------------------------------------------------------------------------- /man/xml_set_namespace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_modify.R 3 | \name{xml_set_namespace} 4 | \alias{xml_set_namespace} 5 | \title{Set the node's namespace} 6 | \usage{ 7 | xml_set_namespace(.x, prefix = "", uri = "") 8 | } 9 | \arguments{ 10 | \item{.x}{a node} 11 | 12 | \item{prefix}{The namespace prefix to use} 13 | 14 | \item{uri}{The namespace URI to use} 15 | } 16 | \value{ 17 | the node (invisibly) 18 | } 19 | \description{ 20 | The namespace to be set must be already defined in one of the node's 21 | ancestors. 22 | } 23 | -------------------------------------------------------------------------------- /man/xml_structure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_structure.R 3 | \name{xml_structure} 4 | \alias{xml_structure} 5 | \alias{html_structure} 6 | \title{Show the structure of an html/xml document.} 7 | \usage{ 8 | xml_structure(x, indent = 2, file = "") 9 | 10 | html_structure(x, indent = 2, file = "") 11 | } 12 | \arguments{ 13 | \item{x}{HTML/XML document (or part there of)} 14 | 15 | \item{indent}{Number of spaces to ident} 16 | 17 | \item{file}{A \link[base]{connection}, or a character string naming the file 18 | to print to. If \code{""} (the default), \code{cat} prints to the 19 | standard output connection, the console unless redirected by 20 | \code{\link[base]{sink}}. 21 | If it is \code{"|cmd"}, the output is piped to the command given 22 | by \file{cmd}, by opening a pipe connection. 23 | } 24 | } 25 | \description{ 26 | Show the structure of an html/xml document without displaying any of 27 | the values. This is useful if you want to get a high level view of the 28 | way a document is organised. Compared to \code{xml_structure}, 29 | \code{html_structure} prints the id and class attributes. 30 | } 31 | \examples{ 32 | xml_structure(read_xml("<a><b><c/><c/></b><d/></a>")) 33 | 34 | rproj <- read_html(system.file("extdata", "r-project.html", package = "xml2")) 35 | xml_structure(rproj) 36 | xml_structure(xml_find_all(rproj, ".//p")) 37 | 38 | h <- read_html("<body><p id = 'a'></p><p class = 'c d'></p></body>") 39 | html_structure(h) 40 | } 41 | -------------------------------------------------------------------------------- /man/xml_text.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_text.R 3 | \name{xml_text} 4 | \alias{xml_text} 5 | \alias{xml_text<-} 6 | \alias{xml_set_text} 7 | \alias{xml_double} 8 | \alias{xml_integer} 9 | \title{Extract or modify the text} 10 | \usage{ 11 | xml_text(x, trim = FALSE) 12 | 13 | xml_text(x) <- value 14 | 15 | xml_set_text(x, value) 16 | 17 | xml_double(x) 18 | 19 | xml_integer(x) 20 | } 21 | \arguments{ 22 | \item{x}{A document, node, or node set.} 23 | 24 | \item{trim}{If \code{TRUE} will trim leading and trailing spaces.} 25 | 26 | \item{value}{character vector with replacement text.} 27 | } 28 | \value{ 29 | A character vector, the same length as x. 30 | } 31 | \description{ 32 | \code{xml_text} returns a character vector, \code{xml_double} returns a 33 | numeric vector, \code{xml_integer} returns an integer vector. 34 | } 35 | \examples{ 36 | x <- read_xml("<p>This is some text. This is <b>bold!</b></p>") 37 | xml_text(x) 38 | xml_text(xml_children(x)) 39 | 40 | x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>") 41 | xml_text(x) 42 | xml_text(xml_find_all(x, "//x")) 43 | 44 | x <- read_xml("<p> Some text </p>") 45 | xml_text(x, trim = TRUE) 46 | 47 | # xml_double() and xml_integer() are useful for extracting numeric attributes 48 | x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>") 49 | xml_integer(xml_find_all(x, "//@x")) 50 | } 51 | -------------------------------------------------------------------------------- /man/xml_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_type.R 3 | \name{xml_type} 4 | \alias{xml_type} 5 | \title{Determine the type of a node.} 6 | \usage{ 7 | xml_type(x) 8 | } 9 | \arguments{ 10 | \item{x}{A document, node, or node set.} 11 | } 12 | \description{ 13 | Determine the type of a node. 14 | } 15 | \examples{ 16 | x <- read_xml("<foo> a <b /> <![CDATA[ blah]]></foo>") 17 | xml_type(x) 18 | xml_type(xml_contents(x)) 19 | } 20 | -------------------------------------------------------------------------------- /man/xml_url.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_url.R 3 | \name{xml_url} 4 | \alias{xml_url} 5 | \title{The URL of an XML document} 6 | \usage{ 7 | xml_url(x) 8 | } 9 | \arguments{ 10 | \item{x}{A node or document.} 11 | } 12 | \value{ 13 | A character vector of length 1. Returns \code{NA} if the name is 14 | not set. 15 | } 16 | \description{ 17 | This is useful for interpreting relative urls with \code{\link[=url_relative]{url_relative()}}. 18 | } 19 | \examples{ 20 | catalog <- read_xml(xml2_example("cd_catalog.xml")) 21 | xml_url(catalog) 22 | 23 | x <- read_xml("<foo/>") 24 | xml_url(x) 25 | } 26 | -------------------------------------------------------------------------------- /man/xml_validate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xml_schema.R 3 | \name{xml_validate} 4 | \alias{xml_validate} 5 | \title{Validate XML schema} 6 | \usage{ 7 | xml_validate(x, schema) 8 | } 9 | \arguments{ 10 | \item{x}{A document, node, or node set.} 11 | 12 | \item{schema}{an XML document containing the schema} 13 | } 14 | \value{ 15 | TRUE or FALSE 16 | } 17 | \description{ 18 | Validate an XML document against an XML 1.0 schema. 19 | } 20 | \examples{ 21 | # Example from https://msdn.microsoft.com/en-us/library/ms256129(v=vs.110).aspx 22 | doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2")) 23 | schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2")) 24 | xml_validate(doc, schema) 25 | } 26 | -------------------------------------------------------------------------------- /revdep/.gitignore: -------------------------------------------------------------------------------- 1 | **/ 2 | checks 3 | library 4 | checks.noindex 5 | library.noindex 6 | data.sqlite 7 | *.html 8 | -------------------------------------------------------------------------------- /revdep/README.md: -------------------------------------------------------------------------------- 1 | # Revdeps 2 | 3 | -------------------------------------------------------------------------------- /revdep/cran.md: -------------------------------------------------------------------------------- 1 | ## revdepcheck results 2 | 3 | We checked 2 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 4 | 5 | * We saw 0 new problems 6 | * We failed to check 0 packages 7 | 8 | -------------------------------------------------------------------------------- /revdep/email.yml: -------------------------------------------------------------------------------- 1 | release_date: ??? 2 | rel_release_date: ??? 3 | my_news_url: ??? 4 | release_version: ??? 5 | release_details: ??? 6 | -------------------------------------------------------------------------------- /revdep/failures.md: -------------------------------------------------------------------------------- 1 | *Wow, no problems at all. :)* -------------------------------------------------------------------------------- /revdep/problems.md: -------------------------------------------------------------------------------- 1 | *Wow, no problems at all. :)* -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS=-I../inst/include @cflags@ -DUCHAR_TYPE=wchar_t -DU_SHOW_CPLUSPLUS_API=0 -DSTRICT_R_HEADERS -DR_NO_REMAP 2 | PKG_CFLAGS=$(C_VISIBILITY) 3 | PKG_CXXFLAGS=$(CXX_VISIBILITY) 4 | PKG_LIBS=@libs@ 5 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CONFIG_NAME = libxml-2.0 2 | PKG_CONFIG ?= $(BINPREF)pkg-config 3 | PKG_LIBS := $(shell $(PKG_CONFIG) --libs $(PKG_CONFIG_NAME)) 4 | STATIC_CFLAGS = -DSTRICT_R_HEADERS -DR_NO_REMAP -DLIBXML_STATIC -I../inst/include 5 | 6 | ifneq ($(PKG_LIBS),) 7 | $(info using $(PKG_CONFIG_NAME) from Rtools) 8 | PKG_CPPFLAGS := $(shell $(PKG_CONFIG) --cflags $(PKG_CONFIG_NAME)) $(STATIC_CFLAGS) 9 | else 10 | RWINLIB = ../windows/libxml2 11 | PKG_CPPFLAGS = -I$(RWINLIB)/include -I$(RWINLIB)/include/libxml2 $(STATIC_CFLAGS) 12 | PKG_LIBS = -L$(RWINLIB)/lib$(subst gcc,,$(COMPILED_BY))$(R_ARCH) -L$(RWINLIB)/lib \ 13 | -lxml2 -liconv -lz -lws2_32 14 | endif 15 | 16 | all: $(SHLIB) 17 | 18 | $(OBJECTS): $(RWINLIB) 19 | 20 | $(RWINLIB): 21 | "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "../tools/winlibs.R" 22 | 23 | clean: 24 | rm -f $(SHLIB) $(OBJECTS) 25 | -------------------------------------------------------------------------------- /src/connection.cpp: -------------------------------------------------------------------------------- 1 | #include <Rinternals.h> 2 | #include <iterator> 3 | #include <vector> 4 | #include "xml2_utils.h" 5 | 6 | // Wrapper around R's read_bin function 7 | SEXP read_bin(SEXP con, size_t bytes) { 8 | SEXP e; 9 | SEXP raw_sxp = PROTECT(Rf_mkString("raw")); 10 | SEXP bytes_sxp = PROTECT(Rf_ScalarInteger(bytes)); 11 | PROTECT(e = Rf_lang4(Rf_install("readBin"), con, raw_sxp, bytes_sxp)); 12 | SEXP res = Rf_eval(e, R_GlobalEnv); 13 | UNPROTECT(3); 14 | return res; 15 | } 16 | 17 | // Wrapper around R's write_bin function 18 | SEXP write_bin(SEXP data, SEXP con) { 19 | SEXP e; 20 | PROTECT(e = Rf_lang3(Rf_install("writeBin"), data, con)); 21 | SEXP res = Rf_eval(e, R_GlobalEnv); 22 | UNPROTECT(1); 23 | return res; 24 | } 25 | 26 | // Read data from a connection in chunks and then combine into a single 27 | // raw vector. 28 | // 29 | // [[export]] 30 | extern "C" SEXP read_connection_(SEXP con_sxp, SEXP read_size_sxp) { 31 | 32 | BEGIN_CPP 33 | std::vector<char> buffer; 34 | size_t read_size = REAL(read_size_sxp)[0]; 35 | 36 | SEXP chunk = read_bin(con_sxp, read_size); 37 | R_xlen_t chunk_size = Rf_xlength(chunk); 38 | while(chunk_size > 0) { 39 | std::copy(RAW(chunk), RAW(chunk) + chunk_size, std::back_inserter(buffer)); 40 | chunk = read_bin(con_sxp, read_size); 41 | chunk_size = Rf_xlength(chunk); 42 | } 43 | 44 | size_t size = buffer.size(); 45 | 46 | SEXP out = PROTECT(Rf_allocVector(RAWSXP, size)); 47 | std::copy(buffer.begin(), buffer.end(), RAW(out)); 48 | 49 | UNPROTECT(1); 50 | 51 | return out; 52 | 53 | END_CPP 54 | } 55 | -------------------------------------------------------------------------------- /src/connection.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include <Rinternals.h> 4 | #include <algorithm> 5 | #include <cstring> 6 | 7 | SEXP read_bin(SEXP con, size_t bytes = 64 * 1024); 8 | SEXP write_bin(SEXP data, SEXP con); 9 | 10 | inline SEXP R_GetConnection(SEXP con) { return con; } 11 | 12 | inline size_t R_ReadConnection(SEXP con, void* buf, size_t n) { 13 | SEXP res = PROTECT(read_bin(con, n)); 14 | 15 | R_xlen_t size = Rf_xlength(res); 16 | 17 | memcpy(buf, RAW(res), size); 18 | 19 | UNPROTECT(1); 20 | 21 | return Rf_xlength(res); 22 | } 23 | 24 | inline size_t R_WriteConnection(SEXP con, void* buf, size_t n) { 25 | SEXP payload = PROTECT(Rf_allocVector(RAWSXP, n)); 26 | 27 | memcpy(RAW(payload), buf, n); 28 | 29 | write_bin(payload, con); 30 | 31 | UNPROTECT(1); 32 | 33 | return n; 34 | } 35 | -------------------------------------------------------------------------------- /src/xml2_init.c: -------------------------------------------------------------------------------- 1 | #include <Rinternals.h> 2 | #include <libxml/xmlversion.h> 3 | #include <libxml/xmlerror.h> 4 | #include <libxml/parser.h> 5 | #include <string.h> 6 | 7 | /* * * 8 | * Author: Nick Wellnhofer <wellnhofer@aevum.de> 9 | * Date: Tue, 24 Oct 2023 15:02:36 +0200 10 | * https://github.com/GNOME/libxml2/commit/61034116d0a3c8b295c6137956adc3ae55720711 11 | * 12 | * error: Make more xmlError structs constant 13 | */ 14 | #if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 21200) 15 | void handleStructuredError(void* userData, const xmlError* error) { 16 | #else 17 | void handleStructuredError(void* userData, xmlError* error) { 18 | #endif 19 | 20 | int len = strlen(error->message); 21 | if(len > 2){ 22 | error->message[len-1] = '\0'; 23 | } 24 | 25 | if (error->level <= 2) { 26 | Rf_warning("%s [%i]", error->message, (int) error->code); 27 | } else { 28 | Rf_error("%s [%i]", error->message, (int) error->code); 29 | } 30 | } 31 | 32 | void handleGenericError(void *ctx, const char *fmt, ...){ 33 | char buffer[BUFSIZ]; 34 | va_list arg; 35 | 36 | if (fmt == NULL) fmt = "(null)"; 37 | 38 | va_start(arg, fmt); 39 | vsnprintf(buffer, BUFSIZ, fmt, arg); 40 | Rf_error("%s", buffer); 41 | } 42 | 43 | void init_libxml2_library(void) { 44 | // Check that header and libs are compatible 45 | LIBXML_TEST_VERSION 46 | 47 | xmlInitParser(); 48 | xmlSetStructuredErrorFunc(NULL, handleStructuredError); 49 | xmlSetGenericErrorFunc(NULL, handleGenericError); 50 | } 51 | 52 | -------------------------------------------------------------------------------- /src/xml2_namespace.cpp: -------------------------------------------------------------------------------- 1 | #include <Rinternals.h> 2 | #include <libxml/tree.h> 3 | 4 | #include "xml2_types.h" 5 | #include "xml2_utils.h" 6 | 7 | // [[export]] 8 | extern "C" SEXP unique_ns(SEXP ns) { 9 | BEGIN_CPP 10 | return NsMap(ns).out(); 11 | END_CPP 12 | } 13 | 14 | void cache_namespace(xmlNode* node, NsMap* nsMap) { 15 | // Iterate over namespace definitions 16 | for(xmlNs* cur = node->nsDef; cur != NULL; cur = cur->next) { 17 | nsMap->add(cur->prefix, cur->href); 18 | } 19 | 20 | // Iterate over children, calling this function recursively 21 | //for(xmlNode* cur = node->children; cur != NULL; cur = cur->next) 22 | for(xmlNode* cur = node->children; cur != NULL && cur->type != XML_ENTITY_DECL; cur = cur->next) 23 | cache_namespace(cur, nsMap); 24 | } 25 | 26 | // [[export]] 27 | extern "C" SEXP doc_namespaces(SEXP doc_sxp) { 28 | BEGIN_CPP 29 | XPtrDoc doc(doc_sxp); 30 | 31 | NsMap nsMap; 32 | 33 | xmlNode* root = xmlDocGetRootElement(doc.checked_get()); 34 | cache_namespace(root, &nsMap); 35 | 36 | return nsMap.out(); 37 | END_CPP 38 | } 39 | 40 | // [[export]] 41 | extern "C" SEXP ns_lookup_uri(SEXP doc_sxp, SEXP node_sxp, SEXP uri_sxp) { 42 | BEGIN_CPP 43 | XPtrDoc doc(doc_sxp); 44 | XPtrNode node(node_sxp); 45 | 46 | xmlNsPtr ns = xmlSearchNsByHref(doc.checked_get(), node.checked_get(), asXmlChar(uri_sxp)); 47 | if (ns == NULL) { 48 | Rf_error("No namespace with URI `%s` found", CHAR(STRING_ELT(uri_sxp, 0))); 49 | } 50 | XPtrNs out(ns); 51 | return SEXP(out); 52 | END_CPP 53 | } 54 | 55 | // [[export]] 56 | extern "C" SEXP ns_lookup(SEXP doc_sxp, SEXP node_sxp, SEXP prefix_sxp) { 57 | BEGIN_CPP 58 | XPtrDoc doc(doc_sxp); 59 | XPtrNode node(node_sxp); 60 | 61 | xmlNsPtr ns = NULL; 62 | if (Rf_xlength(STRING_ELT(prefix_sxp, 0)) == 0) { 63 | ns = xmlSearchNs(doc.checked_get(), node.checked_get(), NULL); 64 | } else { 65 | ns = xmlSearchNs(doc.checked_get(), node.checked_get(), asXmlChar(prefix_sxp)); 66 | if (ns == NULL) { 67 | Rf_error("No namespace with prefix `%s` found", CHAR(STRING_ELT(prefix_sxp, 0))); 68 | } 69 | } 70 | 71 | XPtrNs out(ns); 72 | return SEXP(out); 73 | END_CPP 74 | } 75 | 76 | // [[export]] 77 | extern "C" SEXP libxml2_version_(){ 78 | return Rf_mkString(LIBXML_DOTTED_VERSION); 79 | } 80 | -------------------------------------------------------------------------------- /src/xml2_schema.cpp: -------------------------------------------------------------------------------- 1 | #include <Rinternals.h> 2 | #include <libxml/xmlschemas.h> 3 | #include <vector> 4 | #include <string> 5 | 6 | #include "xml2_types.h" 7 | #include "xml2_utils.h" 8 | 9 | /* * * 10 | * Author: Nick Wellnhofer <wellnhofer@aevum.de> 11 | * Date: Tue, 24 Oct 2023 15:02:36 +0200 12 | * https://github.com/GNOME/libxml2/commit/61034116d0a3c8b295c6137956adc3ae55720711 13 | * 14 | * error: Make more xmlError structs constant 15 | */ 16 | #if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 21200) 17 | void handleSchemaError(void* userData, const xmlError* error) { 18 | #else 19 | void handleSchemaError(void* userData, xmlError* error) { 20 | #endif 21 | std::vector<std::string> * vec = (std::vector<std::string> *) userData; 22 | std::string message = std::string(error->message); 23 | message.resize(message.size() - 1); 24 | vec->push_back(message); 25 | } 26 | 27 | // [[export]] 28 | extern "C" SEXP doc_validate(SEXP doc_sxp, SEXP schema_sxp) { 29 | 30 | XPtrDoc doc(doc_sxp); 31 | XPtrDoc schema(schema_sxp); 32 | 33 | BEGIN_CPP 34 | 35 | std::vector<std::string> vec; 36 | 37 | xmlSchemaParserCtxtPtr cptr = xmlSchemaNewDocParserCtxt(schema.checked_get()); 38 | 39 | xmlSchemaSetParserStructuredErrors(cptr, handleSchemaError, &vec); 40 | 41 | xmlSchemaPtr sptr = xmlSchemaParse(cptr); 42 | 43 | xmlSchemaValidCtxtPtr vptr = xmlSchemaNewValidCtxt(sptr); 44 | 45 | xmlSchemaSetValidStructuredErrors(vptr, handleSchemaError, &vec); 46 | 47 | SEXP out = PROTECT(Rf_allocVector(LGLSXP, 1)); 48 | 49 | LOGICAL(out)[0] = xmlSchemaValidateDoc(vptr, doc.checked_get()) == 0; 50 | 51 | xmlSchemaFreeParserCtxt(cptr); 52 | xmlSchemaFreeValidCtxt(vptr); 53 | xmlSchemaFree(sptr); 54 | 55 | SEXP errors = PROTECT(Rf_allocVector(STRSXP, vec.size())); 56 | for (size_t i = 0; i < vec.size(); ++i) { 57 | SET_STRING_ELT(errors, i, Rf_mkCharLenCE(vec[i].c_str(), vec[i].size(), CE_UTF8)); 58 | } 59 | Rf_setAttrib(out, Rf_install("errors"), errors); 60 | 61 | 62 | UNPROTECT(2); 63 | return out; 64 | 65 | END_CPP 66 | } 67 | -------------------------------------------------------------------------------- /src/xml2_xpath.cpp: -------------------------------------------------------------------------------- 1 | #include <Rinternals.h> 2 | #include <libxml/xpath.h> 3 | #include <libxml/xpathInternals.h> 4 | #include <libxml/tree.h> 5 | #include "xml2_types.h" 6 | #include <algorithm> 7 | 8 | class XmlSeeker { 9 | xmlXPathContext* context_; 10 | xmlXPathObject* result_; 11 | XPtrDoc doc_; 12 | 13 | public: 14 | 15 | XmlSeeker(XPtrDoc doc, xmlNode* node) : result_(NULL), doc_(doc) { 16 | context_ = xmlXPathNewContext(doc.checked_get()); 17 | // Set context to current node 18 | context_->node = node; 19 | } 20 | 21 | void registerNamespace(SEXP nsMap) { 22 | R_xlen_t n = Rf_xlength(nsMap); 23 | if (n == 0) { 24 | return; 25 | } 26 | 27 | SEXP prefix = Rf_getAttrib(nsMap, R_NamesSymbol); 28 | 29 | for (int i = 0; i < n; ++i) { 30 | xmlChar* prefixI = (xmlChar*) CHAR(STRING_ELT(prefix, i)); 31 | xmlChar* urlI = (xmlChar*) CHAR(STRING_ELT(nsMap, i)); 32 | 33 | if (xmlXPathRegisterNs(context_, prefixI, urlI) != 0) 34 | Rf_error("Failed to register namespace (%s <-> %s)", prefixI, urlI); 35 | } 36 | } 37 | 38 | SEXP search(const char* xpath, int num_results) { 39 | result_ = xmlXPathEval((const xmlChar*)xpath, context_); 40 | if (result_ == NULL) { 41 | SEXP ret = PROTECT(Rf_allocVector(VECSXP, 0)); 42 | Rf_setAttrib(ret, R_ClassSymbol, Rf_mkString("xml_missing")); 43 | UNPROTECT(1); 44 | return ret; 45 | } 46 | 47 | switch (result_->type) { 48 | case XPATH_NODESET: 49 | { 50 | xmlNodeSet* nodes = result_->nodesetval; 51 | if (nodes == NULL || nodes->nodeNr == 0) { 52 | SEXP ret = PROTECT(Rf_allocVector(VECSXP, 0)); 53 | Rf_setAttrib(ret, R_ClassSymbol, Rf_mkString("xml_missing")); 54 | UNPROTECT(1); 55 | return ret; 56 | } 57 | int n = std::min(result_->nodesetval->nodeNr, num_results); 58 | 59 | SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); 60 | 61 | SEXP names = PROTECT(Rf_allocVector(STRSXP, 2)); 62 | SET_STRING_ELT(names, 0, Rf_mkChar("node")); 63 | SET_STRING_ELT(names, 1, Rf_mkChar("doc")); 64 | 65 | for (int i = 0; i < n; i++) { 66 | SEXP ret = PROTECT(Rf_allocVector(VECSXP, 2)); 67 | 68 | SET_VECTOR_ELT(ret, 0, XPtrNode(nodes->nodeTab[i])); 69 | SET_VECTOR_ELT(ret, 1, doc_); 70 | 71 | Rf_setAttrib(ret, R_NamesSymbol, names); 72 | Rf_setAttrib(ret, R_ClassSymbol, Rf_mkString("xml_node")); 73 | 74 | SET_VECTOR_ELT(out, i, ret); 75 | 76 | UNPROTECT(1); 77 | } 78 | 79 | UNPROTECT(2); 80 | return out; 81 | } 82 | case XPATH_NUMBER: { return Rf_ScalarReal(result_->floatval); } 83 | case XPATH_BOOLEAN: { return Rf_ScalarLogical(result_->boolval); } 84 | case XPATH_STRING: { return Rf_ScalarString(Rf_mkCharCE((char *) result_->stringval, CE_UTF8)); } 85 | default: 86 | Rf_error("XPath result type: %d not supported", result_->type); 87 | } 88 | 89 | return R_NilValue; 90 | } 91 | 92 | ~XmlSeeker() { 93 | try { 94 | xmlXPathFreeContext(context_); 95 | if (result_ != NULL) 96 | xmlXPathFreeObject(result_); 97 | } catch (...) {} 98 | } 99 | 100 | }; 101 | 102 | // [[export]] 103 | extern "C" SEXP xpath_search(SEXP node_sxp, SEXP doc_sxp, SEXP xpath_sxp, SEXP nsMap_sxp, SEXP num_results_sxp) { 104 | 105 | XPtrNode node(node_sxp); 106 | XPtrDoc doc(doc_sxp); 107 | if (TYPEOF(xpath_sxp) != STRSXP) { 108 | Rf_error("XPath must be a string, received %s", Rf_type2char(TYPEOF(xpath_sxp))); 109 | } 110 | const char* xpath = CHAR(STRING_ELT(xpath_sxp, 0)); 111 | 112 | double num_results = REAL(num_results_sxp)[0]; 113 | 114 | if (num_results == R_PosInf) { 115 | num_results = INT_MAX; 116 | } 117 | XmlSeeker seeker(doc, node.checked_get()); 118 | seeker.registerNamespace(nsMap_sxp); 119 | return seeker.search(xpath, num_results); 120 | } 121 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(xml2) 11 | 12 | is_solaris <- tolower(Sys.info()[["sysname"]]) == "sunos" 13 | 14 | if (!is_solaris) { 15 | test_check("xml2") 16 | } 17 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_attr.md: -------------------------------------------------------------------------------- 1 | # xml_attrs<- modifies all attributes 2 | 3 | Code 4 | xml_attrs(docs) <- "test" 5 | Condition 6 | Error in `xml_attrs<-`: 7 | ! `test` must be a list of named character vectors. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_children.md: -------------------------------------------------------------------------------- 1 | # xml_child() errors if more than one search is given 2 | 3 | Code 4 | xml_child(x, 1:2) 5 | Condition 6 | Error in `xml_child()`: 7 | ! `1` and `2` must be of length 1. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_document.md: -------------------------------------------------------------------------------- 1 | # print method is correct 2 | 3 | Code 4 | print(x) 5 | Output 6 | {html_document} 7 | <html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml"> 8 | [1] <head>\n<script type="text/javascript">var ue_t0=window.ue_t0||+new Date( ... 9 | [2] <body id="styleguide-v2" class="fixed">\n<script>\n if (typeof uet == ... 10 | 11 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_find.md: -------------------------------------------------------------------------------- 1 | # xml_find_num errors with non numeric results 2 | 3 | Code 4 | xml_find_num(x, "//z") 5 | Condition 6 | Error in `xml_find_num()`: 7 | ! Element at path `//z` must be a number, not a <xml_missing> object. 8 | Code 9 | xml_find_num(x, "//y") 10 | Condition 11 | Error in `xml_find_num()`: 12 | ! Element at path `//y` must be a number, not a list. 13 | Code 14 | xml_find_num(x, "1=1") 15 | Condition 16 | Error in `xml_find_num()`: 17 | ! Element at path `1=1` must be a number, not `TRUE`. 18 | Code 19 | xml_find_num(x, "string(5)") 20 | Condition 21 | Error in `xml_find_num()`: 22 | ! Element at path `string(5)` must be a number, not the string "5". 23 | 24 | # xml_find_int errors with non integer results 25 | 26 | Code 27 | xml_find_int(x, "//z") 28 | Condition 29 | Error in `xml_find_int()`: 30 | ! Element at path `//z` must be a whole number, not a <xml_missing> object. 31 | Code 32 | xml_find_int(x, "//y") 33 | Condition 34 | Error in `xml_find_int()`: 35 | ! Element at path `//y` must be a whole number, not a list. 36 | Code 37 | xml_find_int(x, "number(1.1)") 38 | Condition 39 | Error in `xml_find_int()`: 40 | ! Element at path `number(1.1)` must be a whole number, not the number 1.1. 41 | 42 | # xml_find_chr errors with non character results 43 | 44 | Code 45 | xml_find_chr(x, "//z") 46 | Condition 47 | Error in `xml_find_chr()`: 48 | ! Element at path `//z` must be a single string, not a <xml_missing> object. 49 | Code 50 | xml_find_chr(x, "//y") 51 | Condition 52 | Error in `xml_find_chr()`: 53 | ! Element at path `//y` must be a single string, not a list. 54 | Code 55 | xml_find_chr(x, "1=1") 56 | Condition 57 | Error in `xml_find_chr()`: 58 | ! Element at path `1=1` must be a single string, not `TRUE`. 59 | Code 60 | xml_find_chr(x, "1+1") 61 | Condition 62 | Error in `xml_find_chr()`: 63 | ! Element at path `1+1` must be a single string, not the number 2. 64 | 65 | # xml_find_lgl errors with non logical results 66 | 67 | Code 68 | xml_find_lgl(x, "//z") 69 | Condition 70 | Error in `xml_find_lgl()`: 71 | ! Element at path `//z` must be `TRUE` or `FALSE`, not a <xml_missing> object. 72 | Code 73 | xml_find_lgl(x, "//y") 74 | Condition 75 | Error in `xml_find_lgl()`: 76 | ! Element at path `//y` must be `TRUE` or `FALSE`, not a list. 77 | Code 78 | xml_find_lgl(x, "string(5)") 79 | Condition 80 | Error in `xml_find_lgl()`: 81 | ! Element at path `string(5)` must be `TRUE` or `FALSE`, not the string "5". 82 | Code 83 | xml_find_lgl(x, "1+1") 84 | Condition 85 | Error in `xml_find_lgl()`: 86 | ! Element at path `1+1` must be `TRUE` or `FALSE`, not the number 2. 87 | 88 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_name.md: -------------------------------------------------------------------------------- 1 | # error if missing ns spec 2 | 3 | Code 4 | xml_name(bars, ns) 5 | Condition 6 | Error in `xml_name()`: 7 | ! Couldn't find prefix for url http://bar.com 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_node.md: -------------------------------------------------------------------------------- 1 | # print method is correct 2 | 3 | Code 4 | print(body) 5 | Output 6 | {html_node} 7 | <body id="styleguide-v2" class="fixed"> 8 | [1] <script>\n if (typeof uet == 'function') {\n uet("bb");\n }\n ... 9 | [2] <script>\n if ('csm' in window) {\n csm.measure('csm_body_delive ... 10 | [3] <div id="wrapper">\n <div id="root" class="redesign">\n<scrip ... 11 | [4] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ... 12 | [5] <script type="text/imdblogin-js" id="login">\njQuery(document).ready(fun ... 13 | [6] <script type="text/javascript">\n jQuery(\n ... 14 | [7] <iframe id="sis_pixel_sitewide" width="1" height="1" frameborder="0" mar ... 15 | [8] <script>\n setTimeout(function(){\n try{\n //sis3.0 ... 16 | [9] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ... 17 | [10] <script type="text/javascript">\nif(window.COMSCORE){\nCOMSCORE.beacon({ ... 18 | [11] <noscript>\n<img src="http://b.scorecardresearch.com/p?c1=2&c2=60349 ... 19 | [12] <script>\n doWithAds(function(){\n (new Image()).src = "http:/ ... 20 | [13] <script>\n(function(){\n var readyTimeout = setInterval(function(){\n ... 21 | [14] <div id="servertime" time="235"></div> 22 | [15] <script>\n if (typeof uet == 'function') {\n uet("be");\n }\n ... 23 | 24 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_nodeset.md: -------------------------------------------------------------------------------- 1 | # print method is correct 2 | 3 | Code 4 | print(divs) 5 | Output 6 | {xml_nodeset (10)} 7 | [1] <div id="wrapper">\n <div id="root" class="redesign">\n<scrip ... 8 | [2] <div id="root" class="redesign">\n<script>\n if (typeof uet == 'funct ... 9 | [3] <div id="nb20" class="navbarSprite">\n<div id="supertab">\t\n\t<!-- begi ... 10 | [4] <div id="supertab">\t\n\t<!-- begin TOP_AD -->\n<div id="top_ad_wrapper" ... 11 | [5] <div id="top_ad_wrapper" class="dfp_slot">\n<script type="text/javascrip ... 12 | [6] <div id="top_ad_reflow_helper"></div> 13 | [7] <div id="navbar" class="navbarSprite">\n<noscript>\n <link rel="stylesh ... 14 | [8] <div id="nb_search">\n <noscript><div id="more_if_no_javascript"><a h ... 15 | [9] <div id="more_if_no_javascript"><a href="/search/">More</a></div> 16 | [10] <div class="magnifyingglass navbarSprite"></div> 17 | 18 | --- 19 | 20 | Code 21 | print(x, width = 13L) 22 | Output 23 | {xml_document} 24 | <doc> 25 | [1] <a>123 ... 26 | [2] <b>123 ... 27 | [3] <c>12\ ... 28 | Code 29 | print(x, width = 14L) 30 | Output 31 | {xml_document} 32 | <doc> 33 | [1] <a>1234 ... 34 | [2] <b>1234 ... 35 | [3] <c>12\\ ... 36 | 37 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_parse.md: -------------------------------------------------------------------------------- 1 | # read_xml errors with an empty document 2 | 3 | Code 4 | read_xml(character()) 5 | Condition 6 | Error in `read_xml()`: 7 | ! `x` must be a single string, not an empty character vector. 8 | 9 | # parse_options errors when given an invalid option 10 | 11 | Code 12 | read_html(test_path("lego.html.bz2"), options = "INVALID") 13 | Condition 14 | Error in `read_html()`: 15 | x `options` "INVALID" is not a valid option. 16 | i Valid options are one of "RECOVER", "NOENT", "DTDLOAD", "DTDATTR", "DTDVALID", "NOERROR", "NOWARNING", "PEDANTIC", "NOBLANKS", "SAX1", "XINCLUDE", "NONET", "NODICT", "NSCLEAN", "NOCDATA", "NOXINCNODE", "COMPACT", "OLD10", ..., "IGNORE_ENC", or "BIG_LINES". 17 | i See read_html (`?xml2::read_html()`) for all options. 18 | 19 | # read_xml and read_html fail with > 1 input 20 | 21 | Code 22 | read_xml(c("foo", "bar")) 23 | Condition 24 | Error in `read_xml()`: 25 | ! `x` must be a single string, not a character vector. 26 | Code 27 | read_html(c("foo", "bar")) 28 | Condition 29 | Error in `read_xml()`: 30 | ! `x` must be a single string, not a character vector. 31 | 32 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/xml_write.md: -------------------------------------------------------------------------------- 1 | # write_xml errors for incorrect directory and with invalid inputs 2 | 3 | Code 4 | write_xml(x, c("test.xml", "foo")) 5 | Condition 6 | Error in `write_xml()`: 7 | ! `file` must be a single string, not a character vector. 8 | 9 | # write_xml works with nodeset input and connections 10 | 11 | Code 12 | write_xml(y[1], c(filename, "foo")) 13 | Condition 14 | Error in `write_xml()`: 15 | ! `file` must be a single string, not a character vector. 16 | 17 | # write_xml works with node input and files 18 | 19 | Code 20 | write_xml(y, c(filename, "foo")) 21 | Condition 22 | Error in `write_xml()`: 23 | ! `file` must be a single string, not a character vector. 24 | 25 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | maybe_error <- function(code, ...) { 2 | tryCatch(code, error = function(e) expect_error(stop(e), ...)) 3 | } 4 | -------------------------------------------------------------------------------- /tests/testthat/lego.html.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/xml2/bf5619bbb6452d1f23cd88a9e0960d77e98a0d7b/tests/testthat/lego.html.bz2 -------------------------------------------------------------------------------- /tests/testthat/ns-multiple-aliases.xml: -------------------------------------------------------------------------------- 1 | <root> 2 | <doc1 xmlns:b="http://bar.com"><b:bar /></doc1> 3 | <doc2 xmlns:c="http://bar.com"><c:bar /></doc2> 4 | </root> 5 | -------------------------------------------------------------------------------- /tests/testthat/ns-multiple-default.xml: -------------------------------------------------------------------------------- 1 | <root> 2 | <doc1 xmlns = "http://foo.com"><bar /></doc1> 3 | <doc2 xmlns = "http://bar.com"><bar /></doc2> 4 | </root> 5 | -------------------------------------------------------------------------------- /tests/testthat/ns-multiple-prefix.xml: -------------------------------------------------------------------------------- 1 | <root> 2 | <doc1 xmlns:b="http://baz.com"><b:bar /></doc1> 3 | <doc2 xmlns:b="http://bar.com"><b:bar /></doc2> 4 | </root> 5 | -------------------------------------------------------------------------------- /tests/testthat/ns-multiple.xml: -------------------------------------------------------------------------------- 1 | <root xmlns:f="http://foo.com" xmlns:g = "http://bar.com"> 2 | <doc1><f:bar f:id="a" /></doc1> 3 | <doc2><g:bar g:id="b" /></doc2> 4 | </root> 5 | -------------------------------------------------------------------------------- /tests/testthat/records.dtd: -------------------------------------------------------------------------------- 1 | <!ENTITY author "foo bar"> 2 | <!ENTITY hoqc "&author; Quantitative Consultancy"> 3 | <!ELEMENT records (record+)> 4 | <!ELEMENT record (field1)> 5 | <!ELEMENT field1 (#PCDATA)> 6 | -------------------------------------------------------------------------------- /tests/testthat/records.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <!DOCTYPE records SYSTEM "records.dtd"> 3 | <records> 4 | <record> 5 | <field1>&hoqc;</field1> 6 | </record> 7 | </records> 8 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | cat("This is libxml2 version", as.character(xml2:::libxml2_version()), "\n") 2 | -------------------------------------------------------------------------------- /tests/testthat/test-as_list.R: -------------------------------------------------------------------------------- 1 | list_xml <- function(x) as_list(read_xml(x)) 2 | 3 | test_that("empty elements become empty lists", { 4 | expect_equal(list_xml("<x></x>"), list(x = list())) 5 | expect_equal(list_xml("<x><y/></x>"), list(x = list(y = list()))) 6 | expect_equal(list_xml("<x><y><z/></y></x>"), list(x = list(y = list(z = list())))) 7 | }) 8 | 9 | test_that("text nodes become character vectors", { 10 | expect_equal(list_xml("<x>a</x>"), list(x = list("a"))) 11 | expect_equal(list_xml("<x><y>a</y></x>"), list(x = list(y = list("a")))) 12 | }) 13 | 14 | test_that("cdata nodes become character vectors", { 15 | expect_equal(list_xml("<x><![CDATA[<y/>]]></x>"), list(x = list("<y/>"))) 16 | }) 17 | 18 | test_that("xml attributes become R attibutes", { 19 | expect_equal(list_xml("<x a='1' b='2'></x>"), list(x = structure(list(), a = "1", b = "2"))) 20 | }) 21 | 22 | test_that("xml names are preserved when attributes exist", { 23 | expect_equal( 24 | list_xml("<x a='1' b='2'><y>3</y><z>4</z></x>"), 25 | list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2")) 26 | ) 27 | }) 28 | 29 | test_that("special attributes are escaped", { 30 | expect_equal( 31 | list_xml("<x a='1' b='2' names='esc'><y>3</y><z>4</z></x>"), 32 | list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2", .names = "esc")) 33 | ) 34 | }) 35 | 36 | test_that("attributes in child nodes", { 37 | expect_equal( 38 | list_xml("<w aa = '0'><x a='1' b='2' names='esc'><y>3</y><z>4</z></x></w>"), 39 | list(w = structure(list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2", .names = "esc")), aa = "0")) 40 | ) 41 | }) 42 | -------------------------------------------------------------------------------- /tests/testthat/test-as_xml_document.R: -------------------------------------------------------------------------------- 1 | roundtrip_xml <- function(x) { 2 | xml <- read_xml(x) 3 | lst <- as_list(xml) 4 | xml2 <- as_xml_document(lst) 5 | expect_equal(as.character(xml), as.character(xml2)) 6 | } 7 | 8 | test_that("roundtrips with single children", { 9 | roundtrip_xml("<a><b/></a>") 10 | 11 | roundtrip_xml("<a><b><c/></b></a>") 12 | 13 | roundtrip_xml("<a><b>foo<c/></b></a>") 14 | 15 | roundtrip_xml("<a><b>foo<c>bar</c></b></a>") 16 | 17 | roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b></a>") 18 | }) 19 | 20 | test_that("roundtrips with multi children", { 21 | roundtrip_xml("<a><b1/><b2/></a>") 22 | 23 | roundtrip_xml("<a><b><c1/><c2/></b></a>") 24 | 25 | roundtrip_xml("<a><b1>foo<c/></b1><b2>bar<c/></b2></a>") 26 | 27 | roundtrip_xml("<a><b>foo<c>bar</c><c>baz</c></b></a>") 28 | 29 | roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b></a>") 30 | roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b><c zz = '4'>baz</c></a>") 31 | }) 32 | 33 | test_that("rountrips with special attributes", { 34 | roundtrip_xml("<a names = 'test'><b/></a>") 35 | }) 36 | 37 | test_that("more than one root node is an error", { 38 | expect_error(as_xml_document(list(a = list(), b = list())), "Root nodes must be of length 1") 39 | }) 40 | 41 | test_that("Can convert nodes with leading and trailing text", { 42 | roundtrip_xml("<a>foo<b>bar</b>baz</a>") 43 | }) 44 | -------------------------------------------------------------------------------- /tests/testthat/test-classes.R: -------------------------------------------------------------------------------- 1 | test_that("CDATA creation works", { 2 | x <- xml_new_root("root") 3 | xml_add_child(x, xml_cdata("<d/>")) 4 | expect_identical(as.character(x), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root><![CDATA[<d/>]]></root>\n") 5 | }) 6 | 7 | test_that("Comment creation works", { 8 | x <- xml_new_root("root") 9 | xml_add_child(x, xml_comment("Hello!")) 10 | expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root><!--Hello!--></root>\n", as.character(x, options = "")) 11 | }) 12 | 13 | test_that("xml_dtd works", { 14 | r <- xml_new_root(xml_dtd(name = "html", external_id = "-//W3C//DTD XHTML 1.0 Transitional//EN", system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd")) 15 | expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(r)) 16 | 17 | no_name <- xml_new_root(xml_dtd(external_id = "-//W3C//DTD XHTML 1.0 Transitional//EN", system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd")) 18 | expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(no_name)) 19 | 20 | no_name_external_id <- xml_new_root(xml_dtd(system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd")) 21 | expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(no_name_external_id)) 22 | 23 | no_name_external_id_internal_id <- xml_new_root(xml_dtd()) 24 | expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE >\n", as.character(no_name_external_id_internal_id)) 25 | }) 26 | -------------------------------------------------------------------------------- /tests/testthat/test-format.R: -------------------------------------------------------------------------------- 1 | test_that("format.xml_node prints attributes for root nodes", { 2 | x <- read_xml("<parent foo = 'bar' />") 3 | expect_equal(format(x), "<parent foo=\"bar\">") 4 | }) 5 | test_that("format.xml_node prints namespaces for root nodes", { 6 | x <- read_xml("<parent/>") 7 | expect_equal(format(x), "<parent>") 8 | 9 | y <- read_xml("<parent xmlns = 'tag:james.f.hester@gmail.com,2016:bar' />") 10 | expect_equal(format(y), "<parent xmlns=\"tag:james.f.hester@gmail.com,2016:bar\">") 11 | 12 | z <- read_xml("<parent xmlns:foo = 'tag:james.f.hester@gmail.com,2016:bar' />") 13 | expect_equal(format(z), "<parent xmlns:foo=\"tag:james.f.hester@gmail.com,2016:bar\">") 14 | }) 15 | -------------------------------------------------------------------------------- /tests/testthat/test-null.R: -------------------------------------------------------------------------------- 1 | data <- read_xml(test_path("ns-multiple.xml")) 2 | tf <- tempfile() 3 | on.exit(unlink(tf)) 4 | saveRDS(data, file = tf) 5 | x <- readRDS(tf) 6 | 7 | test_that("accessors all fail rather than crash with NULL Xptrs", { 8 | expect_error(as_list(x), "external pointer is not valid") 9 | 10 | expect_error(html_structure(x), "external pointer is not valid") 11 | 12 | expect_error(xml_add_child(x, x), "external pointer is not valid") 13 | expect_error(xml_add_sibling(x, x), "external pointer is not valid") 14 | 15 | expect_error(xml_attr(x, "foo"), "external pointer is not valid") 16 | expect_error(xml_attr(x, "foo") <- "bar", "external pointer is not valid") 17 | 18 | expect_error(xml_attrs(x), "external pointer is not valid") 19 | expect_error(xml_attrs(x) <- list(), "external pointer is not valid") 20 | 21 | expect_error(xml_child(x), "external pointer is not valid") 22 | expect_error(xml_children(x), "external pointer is not valid") 23 | 24 | expect_error(xml_contents(x), "external pointer is not valid") 25 | 26 | expect_error(xml_double(x), "external pointer is not valid") 27 | 28 | expect_error(xml_find_all(x, ""), "external pointer is not valid") 29 | expect_error(xml_find_chr(x, ""), "external pointer is not valid") 30 | expect_error(xml_find_first(x, ""), "external pointer is not valid") 31 | expect_error(xml_find_lgl(x, ""), "external pointer is not valid") 32 | expect_error(xml_find_num(x, ""), "external pointer is not valid") 33 | 34 | expect_error(xml_has_attr(x, ""), "external pointer is not valid") 35 | 36 | expect_error(xml_integer(x), "external pointer is not valid") 37 | 38 | expect_error(xml_length(x), "external pointer is not valid") 39 | 40 | expect_error(xml_name(x), "external pointer is not valid") 41 | expect_error(xml_name(x) <- "foo", "external pointer is not valid") 42 | 43 | expect_error(xml_ns(x), "external pointer is not valid") 44 | expect_error(xml_ns_strip(x), "external pointer is not valid") 45 | 46 | expect_error(xml_parent(x), "external pointer is not valid") 47 | expect_error(xml_parents(x), "external pointer is not valid") 48 | 49 | expect_error(xml_path(x), "external pointer is not valid") 50 | 51 | expect_error(xml_remove(x), "external pointer is not valid") 52 | 53 | expect_error(xml_replace(x, x), "external pointer is not valid") 54 | 55 | expect_error(xml_set_namespace(x, "foo"), "external pointer is not valid") 56 | 57 | expect_error(xml_siblings(x), "external pointer is not valid") 58 | 59 | expect_error(xml_structure(x), "external pointer is not valid") 60 | 61 | expect_error(xml_text(x), "external pointer is not valid") 62 | expect_error(xml_text(x) <- "test", "external pointer is not valid") 63 | 64 | expect_error(xml_type(x), "external pointer is not valid") 65 | 66 | expect_error(xml_url(x), "external pointer is not valid") 67 | }) 68 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_children.R: -------------------------------------------------------------------------------- 1 | x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>") 2 | 3 | test_that("xml_child() returns the proper child", { 4 | expect_identical(xml_child(x), xml_children(x)[[1L]]) 5 | 6 | expect_identical(xml_child(x, 2), xml_children(x)[[2L]]) 7 | }) 8 | 9 | test_that("xml_child() returns child by name", { 10 | expect_identical(xml_child(x, "baz"), xml_find_first(x, "./baz")) 11 | }) 12 | 13 | test_that("xml_child() errors if more than one search is given", { 14 | expect_snapshot(error = TRUE, xml_child(x, 1:2)) 15 | }) 16 | 17 | test_that("xml_child() errors if search is not numeric or character", { 18 | expect_error(xml_child(x, TRUE), "`search` must be `numeric` or `character`") 19 | expect_error(xml_child(x, as.factor("test")), "`search` must be `numeric` or `character`") 20 | expect_error(xml_child(x, raw(1)), "`search` must be `numeric` or `character`") 21 | expect_error(xml_child(x, list(1)), "`search` must be `numeric` or `character`") 22 | }) 23 | 24 | test_that("xml_length", { 25 | expect_equal(xml_length(x), 2) 26 | all <- xml_find_all(x, "//*") 27 | expect_equal(xml_length(all), c(2, 1, 0, 0)) 28 | }) 29 | 30 | test_that("xml_parent", { 31 | expect_identical(unclass(xml_parent(xml_child(x))), unclass(x)) 32 | }) 33 | 34 | test_that("xml_parents", { 35 | expect_equal( 36 | xml_name(xml_parents(xml_find_first(x, "//boo"))), 37 | c("bar", "foo") 38 | ) 39 | }) 40 | 41 | test_that("xml_root", { 42 | doc <- xml_new_document() 43 | 44 | expect_s3_class(xml_root(doc), "xml_missing") 45 | 46 | a <- xml_add_child(doc, "a") 47 | b <- xml_add_child(doc, "b") 48 | 49 | expect_equal(xml_name(xml_root(b)), "a") 50 | expect_equal(xml_name(xml_root(doc)), "a") 51 | }) 52 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_document.R: -------------------------------------------------------------------------------- 1 | test_that("print method is correct", { 2 | x <- read_html(test_path("lego.html.bz2")) 3 | 4 | expect_snapshot(print(x)) 5 | }) 6 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_missing.R: -------------------------------------------------------------------------------- 1 | x <- read_xml("<body> 2 | <p>Some <b>text</b>.</p> 3 | <p>Some <b>other</b>.</p> 4 | <p>No bold text</p> 5 | </body>") 6 | para <- xml_find_all(x, ".//p") 7 | b <- xml_find_first(para, ".//b") 8 | mss <- b[[3]] 9 | 10 | test_that("xml_find returns nodes of class 'xml_missing' for missing nodes", { 11 | expect_length(b, 3L) 12 | expect_equal(lengths(b), c(2L, 2L, 0L)) 13 | expect_s3_class(mss, "xml_missing") 14 | }) 15 | 16 | test_that("xml_missing methods return properly for all S3 methods", { 17 | expect_equal(as.character(mss), NA_character_) 18 | expect_equal(as_list(mss), list()) 19 | expect_equal(nodeset_apply(mss), xml_nodeset()) 20 | expect_output(print(mss), "\\{xml_missing\\}\n<NA>") 21 | expect_equal(tree_structure(mss), NA_character_) 22 | expect_error(write_xml(mss), "Missing data cannot be written") 23 | expect_error(write_html(mss), "Missing data cannot be written") 24 | expect_equal(xml_attr(mss, "dummy_attr"), NA_character_) 25 | expect_equal(xml_attrs(mss), NA_character_) 26 | expect_equal(xml_find_all(mss), xml_nodeset()) 27 | expect_equal(xml_find_chr(mss), character()) 28 | expect_equal(xml_find_lgl(mss), logical()) 29 | expect_equal(xml_find_num(mss), numeric()) 30 | expect_equal(xml_find_first(mss), xml_missing()) 31 | expect_equal(xml_length(mss), 0L) 32 | expect_equal(xml_name(mss), NA_character_) 33 | expect_equal(xml_parent(mss), xml_missing()) 34 | expect_equal(xml_path(mss), NA_character_) 35 | expect_equal(xml_text(mss), NA_character_) 36 | expect_equal(xml_url(mss), NA_character_) 37 | }) 38 | 39 | test_that("is.na() should return TRUE for xml_missing", { 40 | expect_true(is.na(xml_missing())) 41 | }) 42 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_name.R: -------------------------------------------------------------------------------- 1 | test_that("xml_name() returns the name", { 2 | x <- read_xml("<body> 3 | <p>Some <b>text</b>.</p> 4 | <p>Some <i>other</i>.</p> 5 | <p>No bold text</p> 6 | </body>") 7 | 8 | children <- xml_children(x) 9 | x <- xml_find_first(children, ".//b|.//i") 10 | 11 | expect_equal(xml_name(x[[1]]), "b") 12 | expect_equal(xml_name(x[[2]]), "i") 13 | expect_equal(xml_name(x[[3]]), NA_character_) 14 | 15 | expect_equal(xml_name(x), c("b", "i", NA_character_)) 16 | }) 17 | 18 | test_that("qualified names returned when ns given", { 19 | x <- read_xml(test_path("ns-multiple-default.xml")) 20 | ns <- xml_ns(x) 21 | 22 | bars <- xml_children(xml_children(x)) 23 | expect_equal(xml_name(bars), c("bar", "bar")) 24 | expect_equal(xml_name(bars, ns), c("d1:bar", "d2:bar")) 25 | }) 26 | 27 | test_that("error if missing ns spec", { 28 | x <- read_xml(test_path("ns-multiple-default.xml")) 29 | ns <- xml_ns(x)[1] 30 | 31 | bars <- xml_children(xml_children(x)) 32 | expect_snapshot(error = TRUE, xml_name(bars, ns)) 33 | }) 34 | 35 | test_that("xml_name<- modifies the name", { 36 | x <- read_xml(test_path("ns-multiple-default.xml")) 37 | ns <- xml_ns(x) 38 | 39 | bars <- xml_children(xml_children(x)) 40 | bar <- bars[[1]] 41 | 42 | xml_name(bar) <- "foo" 43 | expect_equal(xml_name(bar), "foo") 44 | expect_equal(xml_name(bar, ns), "d1:foo") 45 | 46 | # ns is ignored 47 | xml_name(bar, ns) <- "bar" 48 | expect_equal(xml_name(bar), "bar") 49 | expect_equal(xml_name(bar, ns), "d1:bar") 50 | 51 | xml_name(bars) <- "foo" 52 | expect_equal(xml_name(bars), c("foo", "foo")) 53 | 54 | old_mss <- mss <- xml_missing() 55 | xml_name(mss) <- "foo" 56 | expect_identical(old_mss, mss) 57 | }) 58 | 59 | test_that("xml_set_name modifies the name", { 60 | x <- read_xml(test_path("ns-multiple-default.xml")) 61 | ns <- xml_ns(x) 62 | 63 | bars <- xml_children(xml_children(x)) 64 | bar <- bars[[1]] 65 | 66 | xml_set_name(bar, "foo") 67 | expect_equal(xml_name(bar), "foo") 68 | expect_equal(xml_name(bar, ns), "d1:foo") 69 | 70 | # ns is ignored 71 | xml_set_name(bar, "bar", ns) 72 | expect_equal(xml_name(bar), "bar") 73 | expect_equal(xml_name(bar, ns), "d1:bar") 74 | 75 | xml_set_name(bars, "foo") 76 | expect_equal(xml_name(bars), c("foo", "foo")) 77 | 78 | old_mss <- mss <- xml_missing() 79 | xml_set_name(mss, "foo") 80 | expect_identical(old_mss, mss) 81 | }) 82 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_namespaces.R: -------------------------------------------------------------------------------- 1 | # XML parsing tests ------------------------------------------------------------ 2 | 3 | test_that("multiple default namespaces given unique names", { 4 | ns <- unclass(xml_ns(read_xml(test_path("ns-multiple-default.xml")))) 5 | expect_equal(ns, c(d1 = "http://foo.com", d2 = "http://bar.com")) 6 | }) 7 | 8 | test_that("repeated prefixes given unique names", { 9 | ns <- unclass(xml_ns(read_xml(test_path("ns-multiple-prefix.xml")))) 10 | expect_equal(ns, c(b = "http://baz.com", b1 = "http://bar.com")) 11 | }) 12 | 13 | test_that("aliased prefixes retained", { 14 | ns <- unclass(xml_ns(read_xml(test_path("ns-multiple-aliases.xml")))) 15 | expect_equal(ns, c(b = "http://bar.com", c = "http://bar.com")) 16 | }) 17 | 18 | 19 | # Low-level character vector tests --------------------------------------------- 20 | 21 | test_that("unique prefix-url combo unchanged", { 22 | x <- c(blah = "http://blah.com", rah = "http://rah.com") 23 | expect_equal(.Call(unique_ns, x), x) 24 | }) 25 | 26 | test_that("all prefixs kept", { 27 | x <- c(blah = "http://blah.com", rah = "http://blah.com") 28 | expect_named(.Call(unique_ns, x), c("blah", "rah")) 29 | }) 30 | 31 | test_that("multiple default namespaces can be stripped", { 32 | x <- read_xml(test_path("ns-multiple-default.xml")) 33 | ns <- unclass(xml_ns(x)) 34 | expect_equal(ns, c(d1 = "http://foo.com", d2 = "http://bar.com")) 35 | expect_length(xml_find_all(x, "//bar"), 0) 36 | 37 | xml_ns_strip(x) 38 | ns <- unclass(xml_ns(x)) 39 | 40 | expect_equal(unname(ns), character()) 41 | expect_length(xml_find_all(x, "//bar"), 2) 42 | }) 43 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_node.R: -------------------------------------------------------------------------------- 1 | test_that("print method is correct", { 2 | x <- read_html(test_path("lego.html.bz2")) 3 | body <- xml_find_first(x, "//body") 4 | expect_snapshot(print(body)) 5 | }) 6 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_nodeset.R: -------------------------------------------------------------------------------- 1 | test_that("methods work on empty nodesets", { 2 | x <- read_xml("<a><b/></a>") 3 | empty <- xml_find_all(x, "//c") 4 | 5 | expect_error(empty[[1]], "subscript out of bounds") 6 | expect_identical(empty[1], empty) 7 | test <- empty 8 | 9 | xml_attr(test, "test") <- 1 10 | expect_identical(test, empty) 11 | 12 | xml_attrs(test) <- c("test" = 1) 13 | expect_identical(test, empty) 14 | 15 | xml_name(test) <- "test" 16 | expect_identical(test, empty) 17 | 18 | xml_text(test) <- "test" 19 | expect_identical(test, empty) 20 | 21 | expect_identical(as.character(empty), character(0)) 22 | expect_identical(as_list(empty), list()) 23 | expect_identical(nodeset_apply(empty, identical), empty) 24 | expect_output(print(empty), "\\{xml_nodeset \\(0\\)\\}") 25 | expect_silent(tree_structure(empty)) 26 | 27 | xml_add_child(test, "test") 28 | expect_identical(test, empty) 29 | 30 | xml_add_sibling(test, "test") 31 | expect_identical(test, empty) 32 | 33 | expect_identical(xml_attr(empty, "test"), character()) 34 | expect_identical(xml_attrs(empty), list()) 35 | expect_identical(xml_double(empty), numeric()) 36 | expect_identical(xml_find_all(empty), empty) 37 | expect_identical(xml_find_chr(empty), character()) 38 | expect_identical(xml_find_first(empty), empty) 39 | expect_identical(xml_find_lgl(empty), logical()) 40 | expect_identical(xml_find_num(empty), numeric()) 41 | expect_identical(xml_integer(empty), integer()) 42 | expect_identical(xml_length(empty), 0L) 43 | expect_identical(xml_name(empty), character()) 44 | expect_identical(xml_ns(empty), character()) 45 | expect_identical(xml_parent(empty), empty) 46 | expect_identical(xml_path(empty), character()) 47 | 48 | xml_remove(test) 49 | expect_identical(test, empty) 50 | 51 | xml_replace(test) 52 | expect_identical(test, empty) 53 | 54 | xml_set_attr(test, "test", 1) 55 | expect_identical(test, empty) 56 | 57 | xml_set_attrs(test, c("test" = 1)) 58 | expect_identical(test, empty) 59 | 60 | xml_set_name(test, "test") 61 | expect_identical(test, empty) 62 | 63 | xml_set_text(test, "test") 64 | expect_identical(test, empty) 65 | 66 | expect_identical(xml_siblings(empty), empty) 67 | expect_silent(xml_structure(empty)) 68 | 69 | expect_identical(xml_text(empty), character()) 70 | expect_identical(xml_url(empty), character()) 71 | }) 72 | 73 | test_that("print method is correct", { 74 | skip_if(getOption("width") < 20L, "Screen too narrow") 75 | 76 | x <- read_html(test_path("lego.html.bz2")) 77 | body <- xml_find_first(x, "//body") 78 | divs <- xml_find_all(body, ".//div")[1:10] 79 | expect_snapshot(print(divs)) 80 | 81 | # double-substring() logic 82 | s <- c( 83 | "123456789\\", # always too wide, '\' never encoded 84 | "12345", # always fits 85 | "12\\45" # doesn't fit when '\' is encoded 86 | ) 87 | # embed as text on nodes <a>,<b>,<c> 88 | s <- sprintf("<%1$s>%2$s</%1$s>", letters[1:3], s) 89 | x <- read_xml(sprintf("<doc>%s</doc>", paste(s, collapse=""))) 90 | expect_snapshot({ 91 | print(x, width = 13L) 92 | print(x, width = 14L) 93 | }) 94 | }) 95 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_parse.R: -------------------------------------------------------------------------------- 1 | test_that("download_xml fails if curl is not installed", { 2 | skip("how to test error with `check_installed()`?") 3 | mockery::stub(download_xml, "requireNamespace", function(...) FALSE) 4 | 5 | expect_error( 6 | download_xml("http://httpbin.org/xml"), 7 | "`curl` must be installed to use `download_xml\\(\\)`" 8 | ) 9 | }) 10 | 11 | test_that("read_xml errors with an empty document", { 12 | expect_snapshot(error = TRUE, { 13 | read_xml(character()) 14 | }) 15 | 16 | tf <- tempfile() 17 | file.create(tf) 18 | on.exit(unlink(tf)) 19 | 20 | expect_error(read_xml(tf), "Document is empty") 21 | }) 22 | 23 | test_that("read_html correctly parses malformed document", { 24 | lego <- read_html(test_path("lego.html.bz2")) 25 | expect_length(xml_find_all(lego, ".//p"), 39) 26 | }) 27 | 28 | test_that("parse_options errors when given an invalid option", { 29 | expect_error( 30 | parse_options("INVALID", xml_parse_options()), 31 | '`options` "INVALID" is not a valid option' 32 | ) 33 | 34 | expect_snapshot(error = TRUE, 35 | read_html(test_path("lego.html.bz2"), options = "INVALID") 36 | ) 37 | 38 | # Empty inputs returned as 0 39 | expect_identical(0L, parse_options("", xml_parse_options())) 40 | expect_identical(0L, parse_options(NULL, xml_parse_options())) 41 | 42 | # Numerics returned as integers 43 | expect_identical(12L, parse_options(12L, xml_parse_options())) 44 | expect_identical(12L, parse_options(12, xml_parse_options())) 45 | 46 | # Multiple inputs summed 47 | expect_identical(3L, parse_options(c("RECOVER", "NOENT"), xml_parse_options())) 48 | }) 49 | 50 | test_that("read_html properly passes parser arguments", { 51 | skip_if_not(libxml2_version() >= "2.9.2") 52 | 53 | blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR")) 54 | expect_equal( 55 | as_list(blanks)$html$body$catalog$cd[[1]], 56 | "\r\n " 57 | ) 58 | 59 | no_blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR", "NOBLANKS")) 60 | 61 | expect_equal( 62 | as_list(no_blanks)$html$body$catalog$cd[[1]], 63 | list("Empire Burlesque") 64 | ) 65 | }) 66 | 67 | test_that("read_xml works with httr response objects", { 68 | skip("httpbin is unreliable") 69 | x <- read_xml(httr::GET("http://httpbin.org/xml")) 70 | 71 | expect_s3_class(x, "xml_document") 72 | 73 | expect_length(xml_find_all(x, "//slide"), 2) 74 | }) 75 | 76 | test_that("read_xml and read_html fail for bad status codes", { 77 | skip("httpbin is unreliable") 78 | 79 | expect_error( 80 | read_xml(httr::GET("http://httpbin.org/status/404")), 81 | class = "http_404" 82 | ) 83 | 84 | expect_error( 85 | read_html(httr::GET("http://httpbin.org/status/404")), 86 | class = "http_404" 87 | ) 88 | }) 89 | 90 | test_that("read_xml works with raw inputs", { 91 | x <- read_xml("<foo/>") 92 | expect_equal(xml_url(x), NA_character_) 93 | }) 94 | 95 | test_that("read_html works with non-ASCII encodings", { 96 | tmp <- tempfile() 97 | on.exit(unlink(tmp)) 98 | 99 | writeLines("<html><body>\U2019</body></html>", tmp, useBytes = TRUE) 100 | res <- read_html(tmp, encoding = "UTF-8") 101 | 102 | expect_equal( 103 | as.character(res, options = ""), 104 | "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><body>\U2019</body></html>\n" 105 | ) 106 | }) 107 | 108 | test_that("read_xml and read_html fail with > 1 input", { 109 | expect_snapshot(error = TRUE, { 110 | read_xml(c("foo", "bar")) 111 | read_html(c("foo", "bar")) 112 | }) 113 | }) 114 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_schema.R: -------------------------------------------------------------------------------- 1 | test_that("xml schema validates", { 2 | doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2")) 3 | schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2")) 4 | expect_true(xml_validate(doc, schema)) 5 | }) 6 | 7 | test_that("xml schema errors", { 8 | str <- readLines(system.file("extdata/order-doc.xml", package = "xml2")) 9 | str <- sub("<quantity>1", "<quantity>", str) 10 | str <- sub("95819", "ABC95819", str) 11 | str <- sub('partNum="926-AA"', "", str) 12 | doc <- read_xml(paste(str, collapse = "\n")) 13 | schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2")) 14 | out <- xml_validate(doc, schema) 15 | expect_false(out) 16 | errors <- attr(out, "errors") 17 | expect_type(errors, "character") 18 | expect_length(errors, 4) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_serialize.R: -------------------------------------------------------------------------------- 1 | x <- read_xml("<a> 2 | <b><c>123</c></b> 3 | <b><c>456</c></b> 4 | </a>") 5 | 6 | test_that("xml_serialize and xml_unserialize work with xml_document input", { 7 | out <- xml_unserialize(xml_serialize(x, NULL)) 8 | expect_identical(as.character(x), as.character(out)) 9 | 10 | f <- tempfile() 11 | on.exit(unlink(f)) 12 | 13 | xml_serialize(x, f) 14 | expect_identical(as.character(xml_unserialize(f)), as.character(x)) 15 | }) 16 | 17 | test_that("xml_serialize and xml_unserialize work with xml_node input", { 18 | b <- xml_find_first(x, "//b") 19 | out <- xml_unserialize(xml_serialize(b, NULL)) 20 | expect_identical(as.character(b), as.character(out)) 21 | 22 | f <- tempfile() 23 | on.exit(unlink(f)) 24 | 25 | xml_serialize(b, f) 26 | expect_identical(as.character(xml_unserialize(f)), as.character(b)) 27 | }) 28 | 29 | test_that("xml_serialize and xml_unserialize work with xml_nodeset input", { 30 | b <- xml_find_all(x, "//b") 31 | out <- xml_unserialize(xml_serialize(b, NULL)) 32 | expect_identical(as.character(b), as.character(out)) 33 | 34 | f <- tempfile() 35 | on.exit(unlink(f)) 36 | 37 | xml_serialize(b, f) 38 | expect_identical(as.character(xml_unserialize(f)), as.character(b)) 39 | }) 40 | 41 | test_that("xml_serialize and xml_unserialize work with HTML-based xml_document input", { 42 | file <- system.file("extdata", "r-project.html", package = "xml2") 43 | x <- read_html(file) 44 | 45 | out <- xml_unserialize(xml_serialize(x, NULL)) 46 | expect_identical(as.character(x), as.character(out)) 47 | 48 | f <- tempfile() 49 | on.exit(unlink(f)) 50 | 51 | xml_serialize(x, f) 52 | expect_identical(as.character(xml_unserialize(f)), as.character(x)) 53 | }) 54 | 55 | test_that("xml_unserialize throws an error if given a invalid object", { 56 | expect_error(xml_unserialize(serialize(1, NULL)), "Not a serialized xml2 object") 57 | }) 58 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_structure.R: -------------------------------------------------------------------------------- 1 | test_that("xml_structure", { 2 | expect_output( 3 | xml_structure(read_xml("<a><b><c/><c/></b><d/></a>")), 4 | "<a> 5 | <b> 6 | <c> 7 | <c> 8 | <d>" 9 | ) 10 | 11 | expect_output( 12 | xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"), indent = 0L), 13 | "<a> 14 | <b> 15 | <c> 16 | <c> 17 | <d>" 18 | ) 19 | }) 20 | 21 | test_that("xml_structure can write to a file (#244)", { 22 | tmp <- tempfile() 23 | xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"), file = tmp) 24 | expect_equal(readLines(tmp), c("<a>", " <b>", " <c>", " <c>", " <d>")) 25 | 26 | # repeated calls erase existing content 27 | xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"), file = tmp) 28 | expect_equal(readLines(tmp), c("<a>", " <b>", " <c>", " <c>", " <d>")) 29 | }) 30 | 31 | test_that("xml_structure is correct", { 32 | x <- read_html(test_path("lego.html.bz2")) 33 | 34 | quicklinks <- xml_find_first(x, "//div[contains(@div, 'quicklinks')]") 35 | expect_snapshot(html_structure(quicklinks)) 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_text.R: -------------------------------------------------------------------------------- 1 | test_that("xml_text returns only text without markup", { 2 | x <- read_xml("<p>This is some text. This is <b>bold!</b></p>") 3 | 4 | expect_identical(xml_text(x), "This is some text. This is bold!") 5 | 6 | expect_identical(xml_text(xml_children(x)), "bold!") 7 | }) 8 | 9 | test_that("xml_text works properly with xml_nodeset objects", { 10 | x <- read_xml("<body> 11 | <p>Some <b>text</b>.</p> 12 | <p>Some <i>other</i>.</p> 13 | <p>No bold text</p> 14 | </body>") 15 | 16 | children <- xml_children(x) 17 | x <- xml_find_first(children, ".//b|.//i") 18 | 19 | expect_identical( 20 | xml_text(x), 21 | c("text", "other", NA) 22 | ) 23 | }) 24 | 25 | test_that("xml_text<- and xml_set_text work properly with xml_nodeset objects", { 26 | x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>") 27 | 28 | expect_identical(xml_text(x), "This is some text. This is some nested text.") 29 | 30 | xml_text(x) <- "test" 31 | expect_identical(xml_text(x), "testThis is some nested text.") 32 | xml_set_text(x, "test2") 33 | expect_identical(xml_text(x), "test2This is some nested text.") 34 | }) 35 | 36 | test_that("xml_text trims whitespace if requested, including non-breaking spaces", { 37 | x <- read_html("<p> Some text €  </p>") 38 | expect_identical( 39 | xml_text(x), 40 | " Some text \u20ac \u00a0" 41 | ) 42 | 43 | expect_identical( 44 | xml_text(x, trim = TRUE), 45 | "Some text \u20ac" 46 | ) 47 | 48 | x2 <- read_html("<body><p> Some text €  </p><p> and more € text  </body>") 49 | expect_identical( 50 | xml_text(xml_find_all(x2, ".//p"), trim = TRUE), 51 | c("Some text \u20ac", "and more \u20ac text") 52 | ) 53 | }) 54 | 55 | test_that("xml_integer() returns an integer vector", { 56 | x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>") 57 | 58 | expect_identical( 59 | xml_integer(xml_find_all(x, "//@x")), 60 | c(1L, 2L) 61 | ) 62 | }) 63 | 64 | 65 | test_that("xml_double() returns a numeric vector", { 66 | x <- read_xml("<earth><point latitude = '42.3466456' longitude = '-71.0390351' /><point latitude = '-36.8523378' longitude = '174.7691073' /></earth>") 67 | 68 | expect_identical(xml_double(xml_find_all(x, "//@latitude")), c(42.3466456, -36.8523378)) 69 | }) 70 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_type.R: -------------------------------------------------------------------------------- 1 | test_that("xml_type() works", { 2 | x <- read_xml("<body> 3 | <p>Some <b>text</b>.</p> 4 | <p>Some <i>other</i>.</p> 5 | <p>No bold text</p> 6 | </body>") 7 | 8 | children <- xml_children(x) 9 | x <- xml_find_first(children, ".//b|.//i") 10 | 11 | expect_equal(xml_type(x[[1]]), "element") 12 | expect_equal(xml_type(x[[3]]), NA_character_) 13 | 14 | expect_equal(xml_type(x), c("element", "element", NA)) 15 | 16 | empty <- xml_children(x) 17 | expect_identical(xml_type(empty), character()) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_url.R: -------------------------------------------------------------------------------- 1 | test_that("url_absolute", { 2 | expect_equal( 3 | url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d"), 4 | c("http://hadley.nz/a/b/c/", "http://hadley.nz/a/b/", "http://hadley.nz/", "http://hadley.nz/x") 5 | ) 6 | 7 | expect_error( 8 | url_absolute(c(".", "..", "/", "/x"), c("http://hadley.nz/a/b/c/d", "http://foo.bar")), 9 | "Base URL must be length 1" 10 | ) 11 | }) 12 | 13 | test_that("url_relative", { 14 | # The behavior of libxml2 with relative paths is fragile so we skip this test 15 | skip("libxml2-dependent") 16 | 17 | expect_equal( 18 | url_relative("http://hadley.nz/a/c", "http://hadley.nz"), 19 | "/a/c" 20 | ) 21 | 22 | expect_equal( 23 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/"), 24 | "a/c" 25 | ) 26 | 27 | expect_equal( 28 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b"), 29 | "c" 30 | ) 31 | 32 | expect_equal( 33 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/"), 34 | "../c" 35 | ) 36 | 37 | expect_error( 38 | url_relative("http://hadley.nz/a/c", c("http://hadley.nz/a/b/c/d", "http://foo.bar")), 39 | "Base URL must be length 1" 40 | ) 41 | }) 42 | 43 | test_that("url_parse", { 44 | expect_equal( 45 | url_parse("http://had.co.nz/"), 46 | data.frame( 47 | scheme = "http", server = "had.co.nz", port = NA_integer_, 48 | user = "", path = "/", query = "", fragment = "", stringsAsFactors = FALSE 49 | ) 50 | ) 51 | 52 | expect_equal( 53 | url_parse("http://had.co.nz:1234/"), 54 | data.frame( 55 | scheme = "http", server = "had.co.nz", port = 1234L, 56 | user = "", path = "/", query = "", fragment = "", stringsAsFactors = FALSE 57 | ) 58 | ) 59 | 60 | expect_equal( 61 | url_parse("http://had.co.nz:1234/?a=1&b=2"), 62 | data.frame( 63 | scheme = "http", server = "had.co.nz", port = 1234L, 64 | user = "", path = "/", query = "a=1&b=2", fragment = "", stringsAsFactors = FALSE 65 | ) 66 | ) 67 | 68 | expect_equal( 69 | url_parse("http://had.co.nz:1234/?a=1&b=2#def"), 70 | data.frame( 71 | scheme = "http", server = "had.co.nz", port = 1234L, 72 | user = "", path = "/", query = "a=1&b=2", fragment = "def", stringsAsFactors = FALSE 73 | ) 74 | ) 75 | }) 76 | 77 | test_that("url_escape", { 78 | expect_error( 79 | url_escape("a b c", reserved = c("a", "b")), 80 | "`reserved` must be character vector of length 1" 81 | ) 82 | 83 | expect_equal( 84 | url_escape("a b c"), 85 | "a%20b%20c" 86 | ) 87 | 88 | expect_equal( 89 | url_escape("a b c", " "), 90 | "a b c" 91 | ) 92 | 93 | expect_equal( 94 | url_unescape("a%20b%2fc"), 95 | "a b/c" 96 | ) 97 | 98 | expect_equal( 99 | url_unescape("%C2%B5"), 100 | "\u00B5" 101 | ) 102 | }) 103 | -------------------------------------------------------------------------------- /tests/testthat/test-xml_write.R: -------------------------------------------------------------------------------- 1 | test_that("write_xml errors for incorrect directory and with invalid inputs", { 2 | x <- read_xml("<x/>") 3 | filename <- "does_not_exist/test.xml" 4 | expect_error(write_xml(x, filename), "'does_not_exist' does not exist in current working directory") 5 | 6 | expect_snapshot(error = TRUE, { 7 | write_xml(x, c("test.xml", "foo")) 8 | }) 9 | }) 10 | 11 | test_that("write_xml works with relative file paths", { 12 | x <- read_xml("<x/>") 13 | 14 | filename <- "../test.xml" 15 | on.exit(unlink(filename)) 16 | write_xml(x, filename, options = "no_declaration") 17 | expect_identical(readChar(filename, 1000L), "<x/>\n") 18 | }) 19 | 20 | test_that("write_xml works with no options", { 21 | x <- read_xml("<x/>") 22 | 23 | filename <- "../test.xml" 24 | on.exit(unlink(filename)) 25 | write_xml(x, filename, options = NULL) 26 | expect_identical(readChar(filename, 1000L), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<x/>\n") 27 | }) 28 | 29 | test_that("write_xml works with an explicit connections", { 30 | x <- read_xml("<x/>") 31 | 32 | filename <- "../test.xml" 33 | file <- file(filename, "wb") 34 | on.exit(unlink(filename)) 35 | write_xml(x, file, options = "no_declaration") 36 | close(file) 37 | expect_identical(readChar(filename, 1000L), "<x/>\n") 38 | }) 39 | 40 | test_that("write_xml works with an implicit connections", { 41 | x <- read_xml("<x/>") 42 | 43 | filename <- "../test.xml.gz" 44 | write_xml(x, filename, options = "no_declaration") 45 | file <- gzfile(filename, "rb") 46 | on.exit({ 47 | unlink(filename) 48 | close(file) 49 | }) 50 | expect_identical(readChar(file, 1000L), "<x/>\n") 51 | }) 52 | 53 | test_that("write_xml works with nodeset input and files", { 54 | x <- read_xml("<x><y/><y><z/></y></x>") 55 | y <- xml_find_all(x, "//y") 56 | 57 | filename <- "../test.xml" 58 | on.exit(unlink(filename)) 59 | expect_error( 60 | write_xml(y, filename, options = "no_declaration"), 61 | "Can only save length 1 node sets" 62 | ) 63 | 64 | write_xml(y[1], filename, options = "no_declaration") 65 | expect_identical(readChar(filename, 1000L), "<y/>") 66 | }) 67 | 68 | test_that("write_xml works with nodeset input and connections", { 69 | x <- read_xml("<x><y/><y/></x>") 70 | y <- xml_find_all(x, "//y") 71 | 72 | filename <- "../test.xml.gz" 73 | expect_error( 74 | write_xml(y, filename, options = "no_declaration"), 75 | "Can only save length 1 node sets" 76 | ) 77 | 78 | expect_snapshot(error = TRUE, { 79 | write_xml(y[1], c(filename, "foo")) 80 | }) 81 | 82 | write_xml(y[1], filename, options = "no_declaration") 83 | file <- gzfile(filename, "rb") 84 | on.exit({ 85 | unlink(filename) 86 | close(file) 87 | }) 88 | expect_identical(readChar(file, 1000L), "<y/>") 89 | }) 90 | 91 | test_that("write_xml works with node input and files", { 92 | x <- read_xml("<x><y/><y/></x>") 93 | y <- xml_find_first(x, "//y") 94 | 95 | filename <- "../test.xml" 96 | expect_snapshot(error = TRUE, write_xml(y, c(filename, "foo"))) 97 | 98 | write_xml(y, filename, options = "no_declaration") 99 | on.exit(unlink(filename)) 100 | expect_identical(readChar(filename, 1000L), "<y/>") 101 | }) 102 | 103 | test_that("write_xml works with node input and connections", { 104 | x <- read_xml("<x><y/><y/></x>") 105 | y <- xml_find_first(x, "//y") 106 | 107 | filename <- "../test.xml.gz" 108 | write_xml(y, filename, options = "no_declaration") 109 | file <- gzfile(filename, "rb") 110 | on.exit({ 111 | unlink(filename) 112 | close(file) 113 | }) 114 | expect_identical(readChar(file, 1000L), "<y/>") 115 | }) 116 | 117 | test_that("write_html work with html input", { 118 | x <- read_html("<html><title>Foo") 119 | 120 | filename <- "../test.html.gz" 121 | write_html(x, filename) 122 | file <- gzfile(filename, "rb") 123 | on.exit({ 124 | unlink(filename) 125 | close(file) 126 | }) 127 | expect_identical( 128 | readChar(file, 1000L), 129 | "\n\n\nFoo\n\n" 130 | ) 131 | }) 132 | 133 | test_that("write_xml returns invisibly", { 134 | x <- read_xml("foo") 135 | tf <- tempfile() 136 | on.exit(unlink(tf)) 137 | 138 | res <- withVisible(write_xml(x, tf)) 139 | 140 | expect_null(res$value) 141 | expect_false(res$visible) 142 | }) 143 | -------------------------------------------------------------------------------- /tests/testthat/xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | Wake up to WonderWidgets! 14 | 15 | 16 | 17 | 18 | Overview 19 | Why WonderWidgets are great 20 | 21 | Who buys WonderWidgets 22 | 23 | 24 | -------------------------------------------------------------------------------- /tools/winlibs.R: -------------------------------------------------------------------------------- 1 | if(!file.exists("../windows/libxml2/include/libxml2/libxml")){ 2 | unlink("../windows", recursive = TRUE) 3 | url <- if(grepl("aarch", R.version$platform)){ 4 | "https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-clang-aarch64.tar.xz" 5 | } else if(grepl("clang", Sys.getenv('R_COMPILED_BY'))){ 6 | "https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-clang-x86_64.tar.xz" 7 | } else if(getRversion() >= "4.2") { 8 | "https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-ucrt-x86_64.tar.xz" 9 | } else { 10 | "https://github.com/rwinlib/libxml2/archive/v2.10.3.tar.gz" 11 | } 12 | download.file(url, basename(url), quiet = TRUE) 13 | dir.create("../windows", showWarnings = FALSE) 14 | untar(basename(url), exdir = "../windows", tar = 'internal') 15 | unlink(basename(url)) 16 | setwd("../windows") 17 | file.rename(list.files(), 'libxml2') 18 | } 19 | -------------------------------------------------------------------------------- /xml2.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 1ff14708-efe3-4c8d-a8e6-1d8c6504bbb8 3 | 4 | RestoreWorkspace: Default 5 | SaveWorkspace: Default 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | --------------------------------------------------------------------------------