├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── CODEOWNERS
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE.md
    ├── ISSUE_TEMPLATE
    │   └── issue_template.md
    ├── SUPPORT.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   ├── recheck.yaml
    │   ├── rhub.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── S4.R
    ├── as_list.R
    ├── as_xml_document.R
    ├── classes.R
    ├── format.R
    ├── import-standalone-obj-type.R
    ├── import-standalone-purrr.R
    ├── import-standalone-types-check.R
    ├── init.R
    ├── nodeset_apply.R
    ├── paths.R
    ├── utils.R
    ├── xml2-package.R
    ├── xml_attr.R
    ├── xml_children.R
    ├── xml_document.R
    ├── xml_find.R
    ├── xml_missing.R
    ├── xml_modify.R
    ├── xml_name.R
    ├── xml_namespaces.R
    ├── xml_node.R
    ├── xml_nodeset.R
    ├── xml_parse.R
    ├── xml_path.R
    ├── xml_schema.R
    ├── xml_serialize.R
    ├── xml_structure.R
    ├── xml_text.R
    ├── xml_type.R
    ├── xml_url.R
    ├── xml_write.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── cleanup
├── codecov.yml
├── configure
├── configure.win
├── cran-comments.md
├── docker
    └── r-devel-san
    │   └── Dockerfile
├── inst
    ├── extdata
    │   ├── cd_catalog.xml
    │   ├── order-doc.xml
    │   ├── order-schema.xml
    │   └── r-project.html
    └── include
    │   └── xml2_types.h
├── man
    ├── as_list.Rd
    ├── as_xml_document.Rd
    ├── download_xml.Rd
    ├── figures
    │   ├── lifecycle-archived.svg
    │   ├── lifecycle-defunct.svg
    │   ├── lifecycle-deprecated.svg
    │   ├── lifecycle-experimental.svg
    │   ├── lifecycle-maturing.svg
    │   ├── lifecycle-questioning.svg
    │   ├── lifecycle-soft-deprecated.svg
    │   ├── lifecycle-stable.svg
    │   └── lifecycle-superseded.svg
    ├── oldclass.Rd
    ├── read_xml.Rd
    ├── url_absolute.Rd
    ├── url_escape.Rd
    ├── url_parse.Rd
    ├── write_xml.Rd
    ├── xml2-package.Rd
    ├── xml2_example.Rd
    ├── xml_attr.Rd
    ├── xml_cdata.Rd
    ├── xml_children.Rd
    ├── xml_comment.Rd
    ├── xml_dtd.Rd
    ├── xml_find_all.Rd
    ├── xml_missing.Rd
    ├── xml_name.Rd
    ├── xml_new_document.Rd
    ├── xml_ns.Rd
    ├── xml_ns_strip.Rd
    ├── xml_path.Rd
    ├── xml_replace.Rd
    ├── xml_serialize.Rd
    ├── xml_set_namespace.Rd
    ├── xml_structure.Rd
    ├── xml_text.Rd
    ├── xml_type.Rd
    ├── xml_url.Rd
    └── xml_validate.Rd
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── email.yml
    ├── failures.md
    └── problems.md
├── src
    ├── .gitignore
    ├── Makevars.in
    ├── Makevars.win
    ├── connection.cpp
    ├── connection.h
    ├── init.c
    ├── xml2_doc.cpp
    ├── xml2_init.c
    ├── xml2_namespace.cpp
    ├── xml2_node.cpp
    ├── xml2_output.cpp
    ├── xml2_schema.cpp
    ├── xml2_url.cpp
    ├── xml2_utils.h
    └── xml2_xpath.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── xml_attr.md
    │       ├── xml_children.md
    │       ├── xml_document.md
    │       ├── xml_find.md
    │       ├── xml_name.md
    │       ├── xml_node.md
    │       ├── xml_nodeset.md
    │       ├── xml_parse.md
    │       ├── xml_structure.md
    │       └── xml_write.md
    │   ├── helper.R
    │   ├── lego.html.bz2
    │   ├── ns-multiple-aliases.xml
    │   ├── ns-multiple-default.xml
    │   ├── ns-multiple-prefix.xml
    │   ├── ns-multiple.xml
    │   ├── records.dtd
    │   ├── records.xml
    │   ├── setup.R
    │   ├── test-as_list.R
    │   ├── test-as_xml_document.R
    │   ├── test-classes.R
    │   ├── test-format.R
    │   ├── test-null.R
    │   ├── test-xml_attr.R
    │   ├── test-xml_children.R
    │   ├── test-xml_document.R
    │   ├── test-xml_find.R
    │   ├── test-xml_missing.R
    │   ├── test-xml_modify.R
    │   ├── test-xml_name.R
    │   ├── test-xml_namespaces.R
    │   ├── test-xml_node.R
    │   ├── test-xml_nodeset.R
    │   ├── test-xml_parse.R
    │   ├── test-xml_schema.R
    │   ├── test-xml_serialize.R
    │   ├── test-xml_structure.R
    │   ├── test-xml_text.R
    │   ├── test-xml_type.R
    │   ├── test-xml_url.R
    │   ├── test-xml_write.R
    │   └── xml
├── tools
    └── winlibs.R
├── vignettes
    └── modification.Rmd
└── xml2.Rproj


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | windows
 4 | .travis.yml
 5 | ^cran-comments\.md$
 6 | ^revdep$
 7 | ^src/Makevars$
 8 | ^codecov\.yml$
 9 | ^docker$
10 | ^src/\.ycm_extra_conf\.py$
11 | ^src/\.ycm_extra_conf\.pyc$
12 | ^appveyor\.yml$
13 | ^script\.R$
14 | ^\.github$
15 | ^TODO\.md$
16 | ^_pkgdown.yml$
17 | ^docs/$
18 | ^docs$
19 | ^CRAN-RELEASE$
20 | ^LICENSE\.md$
21 | ^Makefile$
22 | \.dll$
23 | \.o$
24 | \.so$
25 | ^CRAN-SUBMISSION$
26 | ^README\.Rmd$
27 | ^configure.log$
28 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # CODEOWNERS for xml2
2 | # https://www.tidyverse.org/development/understudies
3 | .github/CODEOWNERS @jimhester @jennybc
4 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to xml2
 2 | 
 3 | This outlines how to propose a change to xml2. 
 4 | For more detailed info about contributing to this, and other tidyverse packages, please see the
 5 | [**development contributing guide**](https://rstd.io/tidy-contrib). 
 6 | 
 7 | ## Fixing typos
 8 | 
 9 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 
10 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 
11 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line.
12 | 
13 | ## Bigger changes
14 | 
15 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 
16 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 
17 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed).
18 | 
19 | ### Pull request process
20 | 
21 | *   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("r-lib/xml2", fork = TRUE)`.
22 | 
23 | *   Install all development dependences with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 
24 |     If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 
25 | *   Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`.
26 | 
27 | *   Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
28 |     The title of your PR should briefly describe the change.
29 |     The body of your PR should contain `Fixes #issue-number`.
30 | 
31 | *  For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in <https://style.tidyverse.org/news.html>.
32 | 
33 | ### Code style
34 | 
35 | *   New code should follow the tidyverse [style guide](https://style.tidyverse.org). 
36 |     You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
37 | 
38 | *  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/markdown.html), for documentation.  
39 | 
40 | *  We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 
41 |    Contributions with test cases included are easier to accept.  
42 | 
43 | ## Code of Conduct
44 | 
45 | Please note that the xml2 project is released with a
46 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this
47 | project you agree to abide by its terms.
48 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### Issue Description and Expected Result
 2 | <!--Example: `read_xml()` fails on the following input.-->
 3 | 
 4 | ### Reproducible Example
 5 | <!--
 6 | If possible include a _small_ example of the error and R code which reproduces the problem.
 7 | 
 8 | See [How to Make a Great Reproducible Example](http://stackoverflow.com/q/5963269/2055486)
 9 | 
10 | Example:
11 | ```r
12 | library(xml2)
13 | x <- read_xml("<x><y/><z/></x>")
14 | b <- xml_find_first(x, "//zz")
15 | b
16 | #> {xml_missing}
17 | #> <NA>
18 | ```
19 | -->
20 | 
21 | <details>
22 | <summary>Session Info</summary>
23 | 
24 | ```r
25 | devtools::session_info() # Paste output below
26 | #> output
27 | ```
28 | </details>
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report or feature request
 3 | about: Describe a bug you've seen or make a case for a new feature
 4 | ---
 5 | 
 6 | Please briefly describe your problem and what output you expect. If you have a question, please don't use this form. Instead, ask on <https://stackoverflow.com/> or <https://community.rstudio.com/>.
 7 | 
 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading <https://www.tidyverse.org/help/#reprex>.
 9 | 
10 | Brief description of the problem
11 | 
12 | ```r
13 | # insert reprex here
14 | ```
15 | 


--------------------------------------------------------------------------------
/.github/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Getting help with xml2
 2 | 
 3 | Thanks for using xml2!
 4 | Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible.
 5 | 
 6 | ## Make a reprex
 7 | 
 8 | Start by making a minimal **repr**oducible **ex**ample using the  [reprex](https://reprex.tidyverse.org/) package. 
 9 | If you haven't heard of or used reprex before, you're in for a treat! 
10 | Seriously, reprex will make all of your R-question-asking endeavors easier (which is a pretty insane ROI for the five to ten minutes it'll take you to learn what it's all about). 
11 | For additional reprex pointers, check out the [Get help!](https://www.tidyverse.org/help/) section of the tidyverse site.
12 | 
13 | ## Where to ask?
14 | 
15 | Armed with your reprex, the next step is to figure out [where to ask](https://www.tidyverse.org/help/#where-to-ask). 
16 | 
17 | *   If it's a question: start with [community.rstudio.com](https://community.rstudio.com/), and/or StackOverflow. There are more people there to answer questions.  
18 | 
19 | *   If it's a bug: you're in the right place, [file an issue](https://github.com/r-lib/xml2/issues/new).  
20 |   
21 | *   If you're not sure: let the community help you figure it out! 
22 |     If your problem _is_ a bug or a feature request, you can easily return here and report it. 
23 | 
24 | Before opening a new issue, be sure to [search issues and pull requests](https://github.com/r-lib/xml2/issues) to make sure the bug hasn't been reported and/or already fixed in the development version. 
25 | By default, the search will be pre-populated with `is:issue is:open`. 
26 | You can [edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/)  (e.g. `is:pr`, `is:closed`) as needed. 
27 | For example, you'd simply remove `is:open` to search _all_ issues in the repo, open or closed.
28 | 
29 | ## What happens next?
30 | 
31 | To be as efficient as possible, development of tidyverse packages tends to be very bursty, so you shouldn't worry if you don't get an immediate response.
32 | Typically we don't look at a repo until a sufficient quantity of issues accumulates, then there’s a burst of intense activity as we focus our efforts. 
33 | That makes development more efficient because it avoids expensive context switching between problems, at the cost of taking longer to get back to you. 
34 | This process makes a good reprex particularly important because it might be multiple months between your initial report and when we start working on it. 
35 | If we can’t reproduce the bug, we can’t fix it!
36 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow is overkill for most R packages and
 5 | # check-standard.yaml is likely a better choice.
 6 | # usethis::use_github_action("check-standard") will install it.
 7 | on:
 8 |   push:
 9 |     branches: [main, master]
10 |   pull_request:
11 |     branches: [main, master]
12 | 
13 | name: R-CMD-check.yaml
14 | 
15 | permissions: read-all
16 | 
17 | jobs:
18 |   R-CMD-check:
19 |     runs-on: ${{ matrix.config.os }}
20 | 
21 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
22 | 
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         config:
27 |           - {os: macos-latest,   r: 'release'}
28 | 
29 |           - {os: windows-latest, r: 'release'}
30 |           - {os: windows-latest, r: 'oldrel-1'}
31 |           - {os: windows-latest, r: 'oldrel-2'}
32 |           - {os: windows-latest, r: 'oldrel-3'}
33 |           - {os: windows-latest, r: 'oldrel-4'}
34 | 
35 |           - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
36 |           - {os: ubuntu-latest,  r: 'release'}
37 |           - {os: ubuntu-latest,  r: 'oldrel-1'}
38 |           - {os: ubuntu-latest,  r: 'oldrel-2'}
39 |           - {os: ubuntu-latest,  r: 'oldrel-3'}
40 |           - {os: ubuntu-latest,  r: 'oldrel-4'}
41 | 
42 |     env:
43 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
44 |       R_KEEP_PKG_SOURCE: yes
45 | 
46 |     steps:
47 |       - uses: actions/checkout@v4
48 | 
49 |       - uses: r-lib/actions/setup-pandoc@v2
50 | 
51 |       - uses: r-lib/actions/setup-r@v2
52 |         with:
53 |           r-version: ${{ matrix.config.r }}
54 |           http-user-agent: ${{ matrix.config.http-user-agent }}
55 |           use-public-rspm: true
56 | 
57 |       - uses: r-lib/actions/setup-r-dependencies@v2
58 |         with:
59 |           extra-packages: any::rcmdcheck
60 |           needs: check
61 | 
62 |       - uses: r-lib/actions/check-r-package@v2
63 |         with:
64 |           upload-snapshots: true
65 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
66 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown.yaml
13 | 
14 | permissions: read-all
15 | 
16 | jobs:
17 |   pkgdown:
18 |     runs-on: ubuntu-latest
19 |     # Only restrict concurrency for non-PR jobs
20 |     concurrency:
21 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
22 |     env:
23 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 |     permissions:
25 |       contents: write
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - uses: r-lib/actions/setup-pandoc@v2
30 | 
31 |       - uses: r-lib/actions/setup-r@v2
32 |         with:
33 |           use-public-rspm: true
34 | 
35 |       - uses: r-lib/actions/setup-r-dependencies@v2
36 |         with:
37 |           extra-packages: any::pkgdown, local::.
38 |           needs: website
39 | 
40 |       - name: Build site
41 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Deploy to GitHub pages 🚀
45 |         if: github.event_name != 'pull_request'
46 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
47 |         with:
48 |           clean: false
49 |           branch: gh-pages
50 |           folder: docs
51 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: pr-commands.yaml
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   document:
13 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 |     name: document
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - uses: r-lib/actions/pr-fetch@v2
24 |         with:
25 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::roxygen2
34 |           needs: pr-document
35 | 
36 |       - name: Document
37 |         run: roxygen2::roxygenise()
38 |         shell: Rscript {0}
39 | 
40 |       - name: commit
41 |         run: |
42 |           git config --local user.name "$GITHUB_ACTOR"
43 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 |           git add man/\* NAMESPACE
45 |           git commit -m 'Document'
46 | 
47 |       - uses: r-lib/actions/pr-push@v2
48 |         with:
49 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
50 | 
51 |   style:
52 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 |     name: style
54 |     runs-on: ubuntu-latest
55 |     env:
56 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 |     permissions:
58 |       contents: write
59 |     steps:
60 |       - uses: actions/checkout@v4
61 | 
62 |       - uses: r-lib/actions/pr-fetch@v2
63 |         with:
64 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
65 | 
66 |       - uses: r-lib/actions/setup-r@v2
67 | 
68 |       - name: Install dependencies
69 |         run: install.packages("styler")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Style
73 |         run: styler::style_pkg()
74 |         shell: Rscript {0}
75 | 
76 |       - name: commit
77 |         run: |
78 |           git config --local user.name "$GITHUB_ACTOR"
79 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 |           git add \*.R
81 |           git commit -m 'Style'
82 | 
83 |       - uses: r-lib/actions/pr-push@v2
84 |         with:
85 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
86 | 


--------------------------------------------------------------------------------
/.github/workflows/recheck.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |     inputs:
 4 |       which:
 5 |         type: choice
 6 |         description: Which dependents to check
 7 |         options:
 8 |           - strong
 9 |           - most
10 |   schedule:
11 |     - cron: '0 4 1 * *'
12 | 
13 | name: Reverse dependency check
14 | 
15 | jobs:
16 |   revdep_check:
17 |     name: Reverse check ${{ inputs.which }} dependents
18 |     uses: r-devel/recheck/.github/workflows/recheck.yml@v1
19 |     with:
20 |       which: ${{ inputs.which }}
21 |       subdirectory: ''
22 | 


--------------------------------------------------------------------------------
/.github/workflows/rhub.yaml:
--------------------------------------------------------------------------------
 1 | # R-hub's genetic GitHub Actions workflow file. It's canonical location is at
 2 | # https://github.com/r-hub/rhub2/blob/v1/inst/workflow/rhub.yaml
 3 | # You can update this file to a newer version using the rhub2 package:
 4 | #
 5 | # rhub2::rhub_setup()
 6 | #
 7 | # It is unlikely that you need to modify this file manually.
 8 | 
 9 | name: R-hub
10 | run-name: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }} (${{ github.event.inputs.id }})
11 | 
12 | on:
13 |   workflow_dispatch:
14 |     inputs:
15 |       config:
16 |         description: 'A comma separated list of R-hub platforms to use.'
17 |         type: string
18 |         default: 'linux,windows,macos'
19 |       name:
20 |         description: 'Run name. You can leave this empty now.'
21 |         type: string
22 |       id:
23 |         description: 'Unique ID. You can leave this empty now.'
24 |         type: string
25 | 
26 | jobs:
27 | 
28 |   setup:
29 |     runs-on: ubuntu-latest
30 |     outputs:
31 |       containers: ${{ steps.rhub-setup.outputs.containers }}
32 |       platforms: ${{ steps.rhub-setup.outputs.platforms }}
33 | 
34 |     steps:
35 |     # NO NEED TO CHECKOUT HERE
36 |     - uses: r-hub/rhub2/actions/rhub-setup@v1
37 |       with:
38 |         config: ${{ github.event.inputs.config }}
39 |       id: rhub-setup
40 | 
41 |   linux-containers:
42 |     needs: setup
43 |     if: ${{ needs.setup.outputs.containers != '[]' }}
44 |     runs-on: ubuntu-latest
45 |     name: ${{ matrix.config.label }}
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         config: ${{ fromJson(needs.setup.outputs.containers) }}
50 |     container:
51 |       image: ${{ matrix.config.container }}
52 | 
53 |     steps:
54 |       - uses: actions/checkout@v3
55 |       - uses: r-hub/rhub2/actions/rhub-check@v1
56 |         with:
57 |           token: ${{ secrets.RHUB_TOKEN }}
58 |           job-config: ${{ matrix.config.job-config }}
59 | 
60 |   other-platforms:
61 |     needs: setup
62 |     if: ${{ needs.setup.outputs.platforms != '[]' }}
63 |     runs-on: ${{ matrix.config.os }}
64 |     name: ${{ matrix.config.label }}
65 |     strategy:
66 |       fail-fast: false
67 |       matrix:
68 |         config: ${{ fromJson(needs.setup.outputs.platforms) }}
69 | 
70 |     steps:
71 |       - uses: actions/checkout@v3
72 |       - uses: r-hub/rhub2/actions/rhub-setup-r@v1
73 |         with:
74 |           job-config: ${{ matrix.config.job-config }}
75 |           token: ${{ secrets.RHUB_TOKEN }}
76 |       - uses: r-hub/rhub2/actions/rhub-check@v1
77 |         with:
78 |           job-config: ${{ matrix.config.job-config }}
79 |           token: ${{ secrets.RHUB_TOKEN }}
80 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage.yaml
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - uses: r-lib/actions/setup-r@v2
23 |         with:
24 |           use-public-rspm: true
25 | 
26 |       - uses: r-lib/actions/setup-r-dependencies@v2
27 |         with:
28 |           extra-packages: any::covr, any::xml2
29 |           needs: coverage
30 | 
31 |       - name: Test coverage
32 |         run: |
33 |           cov <- covr::package_coverage(
34 |             quiet = FALSE,
35 |             clean = FALSE,
36 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
37 |           )
38 |           covr::to_cobertura(cov)
39 |         shell: Rscript {0}
40 | 
41 |       - uses: codecov/codecov-action@v4
42 |         with:
43 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
44 |           file: ./cobertura.xml
45 |           plugin: noop
46 |           disable_search: true
47 |           token: ${{ secrets.CODECOV_TOKEN }}
48 | 
49 |       - name: Show testthat output
50 |         if: always()
51 |         run: |
52 |           ## --------------------------------------------------------------------
53 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
54 |         shell: bash
55 | 
56 |       - name: Upload test results
57 |         if: failure()
58 |         uses: actions/upload-artifact@v4
59 |         with:
60 |           name: coverage-test-failures
61 |           path: ${{ runner.temp }}/package
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | src/*.o
 5 | src/*.so
 6 | src/*.dll
 7 | src/*.a
 8 | src/Makevars
 9 | inst/doc
10 | script.R
11 | TODO.md
12 | windows
13 | docs/
14 | Makefile
15 | configure.log
16 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: xml2
 2 | Title: Parse XML
 3 | Version: 1.3.8
 4 | Authors@R: c(
 5 |     person("Hadley", "Wickham", role = "aut"),
 6 |     person("Jim", "Hester", role = "aut"),
 7 |     person("Jeroen", "Ooms", email = "jeroenooms@gmail.com", role = c("aut", "cre")),
 8 |     person("Posit Software, PBC", role = c("cph", "fnd")),
 9 |     person("R Foundation", role = "ctb",
10 |            comment = "Copy of R-project homepage cached as example")
11 |   )
12 | Description: Bindings to 'libxml2' for working with XML data using a simple, 
13 |     consistent interface based on 'XPath' expressions. Also supports XML schema
14 |     validation; for 'XSLT' transformations see the 'xslt' package.
15 | License: MIT + file LICENSE
16 | URL: https://xml2.r-lib.org, https://r-lib.r-universe.dev/xml2
17 | BugReports: https://github.com/r-lib/xml2/issues
18 | Depends:
19 |     R (>= 3.6.0)
20 | Imports:
21 |     cli,
22 |     methods,
23 |     rlang (>= 1.1.0)
24 | Suggests:
25 |     covr,
26 |     curl,
27 |     httr,
28 |     knitr,
29 |     magrittr,
30 |     mockery,
31 |     rmarkdown,
32 |     testthat (>= 3.2.0),
33 |     xslt
34 | VignetteBuilder: 
35 |     knitr
36 | Config/Needs/website: tidyverse/tidytemplate
37 | Encoding: UTF-8
38 | Roxygen: list(markdown = TRUE)
39 | RoxygenNote: 7.2.3
40 | SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel (rpm)
41 | Collate: 
42 |     'S4.R'
43 |     'as_list.R'
44 |     'xml_parse.R'
45 |     'as_xml_document.R'
46 |     'classes.R'
47 |     'format.R'
48 |     'import-standalone-obj-type.R'
49 |     'import-standalone-purrr.R'
50 |     'import-standalone-types-check.R'
51 |     'init.R'
52 |     'nodeset_apply.R'
53 |     'paths.R'
54 |     'utils.R'
55 |     'xml2-package.R'
56 |     'xml_attr.R'
57 |     'xml_children.R'
58 |     'xml_document.R'
59 |     'xml_find.R'
60 |     'xml_missing.R'
61 |     'xml_modify.R'
62 |     'xml_name.R'
63 |     'xml_namespaces.R'
64 |     'xml_node.R'
65 |     'xml_nodeset.R'
66 |     'xml_path.R'
67 |     'xml_schema.R'
68 |     'xml_serialize.R'
69 |     'xml_structure.R'
70 |     'xml_text.R'
71 |     'xml_type.R'
72 |     'xml_url.R'
73 |     'xml_write.R'
74 |     'zzz.R'
75 | Config/testthat/edition: 3
76 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: xml2 authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 xml2 authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R/S4.R:
--------------------------------------------------------------------------------
 1 | #' Register S4 classes
 2 | #'
 3 | #' @description
 4 | #' Classes are exported so they can be re-used within S4 classes, see [methods::setOldClass()].
 5 | #'
 6 | #' * `xml_document`: a complete document.
 7 | #' * `xml_nodeset`: a _set_ of nodes within a document.
 8 | #' * `xml_missing`: a missing object, e.g. for an empty result set.
 9 | #' * `xml_node`: a single node in a document.
10 | #'
11 | #' @importFrom methods setOldClass
12 | #' @keywords internal
13 | #' @rdname oldclass
14 | #' @name xml_document-class
15 | #' @exportClass xml_document
16 | setOldClass("xml_document")
17 | 
18 | #' @name xml_missing-class
19 | #' @exportClass xml_missing
20 | #' @rdname oldclass
21 | setOldClass("xml_missing")
22 | 
23 | #' @name xml_node-class
24 | #' @exportClass xml_node
25 | #' @rdname oldclass
26 | setOldClass("xml_node")
27 | 
28 | #' @name xml_nodeset-class
29 | #' @exportClass xml_nodeset
30 | #' @rdname oldclass
31 | setOldClass("xml_nodeset")
32 | 


--------------------------------------------------------------------------------
/R/as_list.R:
--------------------------------------------------------------------------------
  1 | #' Coerce xml nodes to a list.
  2 | #'
  3 | #' This turns an XML document (or node or nodeset) into the equivalent R
  4 | #' list. Note that this is `as_list()`, not `as.list()`:
  5 | #' `lapply()` automatically calls `as.list()` on its inputs, so
  6 | #' we can't override the default.
  7 | #'
  8 | #' `as_list` currently only handles the four most common types of
  9 | #' children that an element might have:
 10 | #'
 11 | #' \itemize{
 12 | #'   \item Other elements, converted to lists.
 13 | #'   \item Attributes, stored as R attributes. Attributes that have special meanings in R
 14 | #'           ([class()], [comment()], [dim()],
 15 | #'           [dimnames()], [names()], [row.names()] and
 16 | #'           [tsp()]) are escaped with '.'
 17 | #'   \item Text, stored as a character vector.
 18 | #' }
 19 | #'
 20 | #' @inheritParams xml_name
 21 | #' @param ... Needed for compatibility with generic. Unused.
 22 | #' @export
 23 | #' @examples
 24 | #' as_list(read_xml("<foo> a <b /><c><![CDATA[<d></d>]]></c></foo>"))
 25 | #' as_list(read_xml("<foo> <bar><baz /></bar> </foo>"))
 26 | #' as_list(read_xml("<foo id = 'a'></foo>"))
 27 | #' as_list(read_xml("<foo><bar id='a'/><bar id='b'/></foo>"))
 28 | as_list <- function(x, ns = character(), ...) {
 29 |   UseMethod("as_list")
 30 | }
 31 | 
 32 | #' @export
 33 | as_list.xml_missing <- function(x, ns = character(), ...) {
 34 |   list()
 35 | }
 36 | 
 37 | #' @export
 38 | as_list.xml_document <- function(x, ns = character(), ...) {
 39 |   if (!inherits(x, "xml_node")) {
 40 |     return(list())
 41 |   }
 42 | 
 43 |   out <- list(NextMethod())
 44 |   names(out) <- xml_name(x)
 45 |   out
 46 | }
 47 | 
 48 | #' @export
 49 | as_list.xml_node <- function(x, ns = character(), ...) {
 50 |   contents <- xml_contents(x)
 51 |   if (length(contents) == 0) {
 52 |     # Base case - contents
 53 |     type <- xml_type(x)
 54 | 
 55 |     if (type %in% c("text", "cdata")) {
 56 |       return(xml_text(x))
 57 |     }
 58 |     if (type != "element" && type != "document") {
 59 |       return(paste("[", type, "]"))
 60 |     }
 61 | 
 62 |     out <- list()
 63 |   } else {
 64 |     out <- lapply(seq_along(contents), function(i) as_list(contents[[i]], ns = ns))
 65 | 
 66 |     nms <- ifelse(xml_type(contents) == "element", xml_name(contents, ns = ns), "")
 67 |     if (any(nms != "")) {
 68 |       names(out) <- nms
 69 |     }
 70 |   }
 71 | 
 72 |   # Add xml attributes as R attributes
 73 |   attributes(out) <- c(list(names = names(out)), xml_to_r_attrs(xml_attrs(x, ns = ns)))
 74 | 
 75 |   out
 76 | }
 77 | 
 78 | #' @export
 79 | as_list.xml_nodeset <- function(x, ns = character(), ...) {
 80 |   lapply(seq_along(x), function(i) as_list(x[[i]], ns = ns))
 81 | }
 82 | 
 83 | special_attributes <- c("class", "comment", "dim", "dimnames", "names", "row.names", "tsp")
 84 | 
 85 | xml_to_r_attrs <- function(x) {
 86 |   if (length(x) == 0) {
 87 |     return(NULL)
 88 |   }
 89 |   # escape special names
 90 |   special <- names(x) %in% special_attributes
 91 |   names(x)[special] <- paste0(".", names(x)[special])
 92 |   as.list(x)
 93 | }
 94 | 
 95 | r_attrs_to_xml <- function(x) {
 96 |   if (length(x) == 0) {
 97 |     return(NULL)
 98 |   }
 99 | 
100 |   # Drop R special attributes
101 |   x <- x[!names(x) %in% special_attributes]
102 | 
103 |   # Rename any xml attributes needed
104 |   special <- names(x) %in% paste0(".", special_attributes)
105 | 
106 |   names(x)[special] <- sub("^\\.", "", names(x)[special])
107 |   x
108 | }
109 | 


--------------------------------------------------------------------------------
/R/as_xml_document.R:
--------------------------------------------------------------------------------
 1 | #' Coerce a R list to xml nodes.
 2 | #'
 3 | #' This turns an R list into the equivalent XML document. Not all R lists will
 4 | #' produce valid XML, in particular there can only be one root node and all
 5 | #' child nodes need to be named (or empty) lists. R attributes become XML
 6 | #' attributes and R names become XML node names.
 7 | #'
 8 | #' @inheritParams as_list
 9 | #' @include as_list.R xml_parse.R
10 | #' @export
11 | #' @examples
12 | # empty lists generate empty nodes
13 | #' as_xml_document(list(x = list()))
14 | #'
15 | #' # Nesting multiple nodes
16 | #' as_xml_document(list(foo = list(bar = list(baz = list()))))
17 | #'
18 | #' # attributes are stored as R attributes
19 | #' as_xml_document(list(foo = structure(list(), id = "a")))
20 | #' as_xml_document(list(foo = list(
21 | #'   bar = structure(list(), id = "a"),
22 | #'   bar = structure(list(), id = "b")
23 | #' )))
24 | as_xml_document <- function(x, ...) {
25 |   UseMethod("as_xml_document")
26 | }
27 | 
28 | #' @export
29 | as_xml_document.character <- read_xml.character
30 | 
31 | #' @export
32 | as_xml_document.raw <- read_xml.raw
33 | 
34 | #' @export
35 | as_xml_document.connection <- read_xml.connection
36 | 
37 | #' @export
38 | as_xml_document.response <- read_xml.response
39 | 
40 | #' @export
41 | as_xml_document.list <- function(x, ...) {
42 |   if (length(x) > 1) {
43 |     cli::cli_abort("Root nodes must be of length 1.")
44 |   }
45 | 
46 | 
47 |   add_node <- function(x, parent, tag = NULL) {
48 |     if (is.atomic(x)) {
49 |       return(.Call(node_new_text, parent$node, as.character(x)))
50 |     }
51 |     if (!is.null(tag)) {
52 |       parent <- xml_add_child(parent, tag)
53 |       attr <- r_attrs_to_xml(attributes(x))
54 |       for (i in seq_along(attr)) {
55 |         xml_set_attr(parent, names(attr)[[i]], attr[[i]])
56 |       }
57 |     }
58 |     for (i in seq_along(x)) {
59 |       add_node(x[[i]], parent, names(x)[[i]])
60 |     }
61 |   }
62 | 
63 |   doc <- xml_new_document()
64 |   add_node(x, doc)
65 |   xml_root(doc)
66 | }
67 | 
68 | #' @export
69 | as_xml_document.xml_node <- function(x, ...) {
70 |   xml_new_root(.value = x, ..., .copy = TRUE)
71 | }
72 | 
73 | #' @export
74 | as_xml_document.xml_nodeset <- function(x, root, ...) {
75 |   doc <- xml_new_root(.value = root, ..., .copy = TRUE)
76 |   for (i in seq_along(x)) {
77 |     xml_add_child(doc, x[[i]], .copy = TRUE)
78 |   }
79 |   doc
80 | }
81 | 
82 | #' @export
83 | as_xml_document.xml_document <- function(x, ...) {
84 |   x
85 | }
86 | 


--------------------------------------------------------------------------------
/R/classes.R:
--------------------------------------------------------------------------------
 1 | #' @useDynLib xml2, .registration = TRUE
 2 | NULL
 3 | 
 4 | #' Construct a cdata node
 5 | #' @param content The CDATA content, does not include `<![CDATA[`
 6 | #' @examples
 7 | #' x <- xml_new_root("root")
 8 | #' xml_add_child(x, xml_cdata("<d/>"))
 9 | #' as.character(x)
10 | #' @export
11 | xml_cdata <- function(content) {
12 |   class(content) <- "xml_cdata"
13 |   content
14 | }
15 | 
16 | #' Construct a comment node
17 | #' @param content The comment content
18 | #' @examples
19 | #' x <- xml_new_document()
20 | #' r <- xml_add_child(x, "root")
21 | #' xml_add_child(r, xml_comment("Hello!"))
22 | #' as.character(x)
23 | #' @export
24 | xml_comment <- function(content) {
25 |   class(content) <- "xml_comment"
26 |   content
27 | }
28 | 
29 | #' Construct a document type definition
30 | #'
31 | #' This is used to create simple document type definitions. If you need to
32 | #' create a more complicated definition with internal subsets it is recommended
33 | #' to parse a string directly with `read_xml()`.
34 | #' @param name The name of the declaration
35 | #' @param external_id The external ID of the declaration
36 | #' @param system_id The system ID of the declaration
37 | #' @examples
38 | #' r <- xml_new_root(
39 | #'   xml_dtd(
40 | #'     "html",
41 | #'     "-//W3C//DTD XHTML 1.0 Transitional//EN",
42 | #'     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
43 | #'   )
44 | #' )
45 | #'
46 | #' # Use read_xml directly for more complicated DTD
47 | #' d <- read_xml(
48 | #'   '<!DOCTYPE doc [
49 | #' <!ELEMENT doc (#PCDATA)>
50 | #' <!ENTITY foo " test ">
51 | #' ]>
52 | #' <doc>This is a valid document &foo; !</doc>'
53 | #' )
54 | #' @export
55 | xml_dtd <- function(name = "", external_id = "", system_id = "") {
56 |   out <- list(name = name, external_id = external_id, system_id = system_id)
57 |   class(out) <- "xml_dtd"
58 |   out
59 | }
60 | 


--------------------------------------------------------------------------------
/R/format.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | format.xml_node <- function(x, ...) {
 3 |   attrs <- xml_attrs(x)
 4 |   paste0("<",
 5 |     paste(
 6 |       c(
 7 |         xml_name(x),
 8 |         format_attributes(attrs)
 9 |       ),
10 |       collapse = " "
11 |     ),
12 |     ">"
13 |   )
14 | }
15 | 
16 | format_attributes <- function(x) {
17 |   if (length(x) == 0) {
18 |     character(0)
19 |   } else {
20 |     paste(names(x), quote_str(x), sep = "=")
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/R/init.R:
--------------------------------------------------------------------------------
 1 | libxml2_version <- function() {
 2 |   as.numeric_version(.Call(libxml2_version_))
 3 | }
 4 | 
 5 | xml_parse_options <- function() {
 6 |   .Call(xml_parse_options_)
 7 | }
 8 | 
 9 | xml_save_options <- function() {
10 |   .Call(xml_save_options_)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/nodeset_apply.R:
--------------------------------------------------------------------------------
 1 | nodeset_apply <- function(x, fun, ...) UseMethod("nodeset_apply")
 2 | 
 3 | #' @export
 4 | nodeset_apply.xml_missing <- function(x, fun, ...) {
 5 |   xml_nodeset()
 6 | }
 7 | 
 8 | #' @export
 9 | nodeset_apply.xml_nodeset <- function(x, fun, ...) {
10 |   if (length(x) == 0) {
11 |     return(xml_nodeset())
12 |   }
13 | 
14 |   is_missing <- is.na(x)
15 |   res <- list(length(x))
16 | 
17 |   res[is_missing] <- list(xml_missing())
18 |   if (any(!is_missing)) {
19 |     res[!is_missing] <- lapply(x[!is_missing], function(x) fun(x$node, ...))
20 |   }
21 | 
22 |   make_nodeset(res, x[[1]]$doc)
23 | }
24 | 
25 | #' @export
26 | nodeset_apply.xml_node <- function(x, fun, ...) {
27 |   nodes <- fun(x$node, ...)
28 |   xml_nodeset(lapply(nodes, xml_node, doc = x$doc))
29 | }
30 | 
31 | #' @export
32 | nodeset_apply.xml_document <- function(x, fun, ...) {
33 |   if (inherits(x, "xml_node")) {
34 |     NextMethod()
35 |   } else {
36 |     xml_nodeset()
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/R/paths.R:
--------------------------------------------------------------------------------
 1 | path_to_connection <- function(path, check = c("file", "dir")) {
 2 |   check <- match.arg(check)
 3 | 
 4 |   if (!is.character(path) || length(path) != 1L) {
 5 |     return(path)
 6 |   }
 7 | 
 8 |   if (is_url(path)) {
 9 |     if (is_installed("curl")) {
10 |       return(curl::curl(path))
11 |     } else {
12 |       return(url(path))
13 |     }
14 |   }
15 | 
16 |   if (check == "file") {
17 |     path <- check_path(path)
18 |   } else {
19 |     path <- file.path(check_path(dirname(path)), basename(path))
20 |   }
21 |   switch(tools::file_ext(path),
22 |     gz = gzfile(path, ""),
23 |     bz2 = bzfile(path, ""),
24 |     xz = xzfile(path, ""),
25 |     zip = zipfile(path, ""),
26 |     path
27 |   )
28 | }
29 | 
30 | is_url <- function(path) {
31 |   grepl("^(http|ftp)s?://", path)
32 | }
33 | 
34 | check_path <- function(path, call = caller_env()) {
35 |   if (file.exists(path)) {
36 |     return(normalizePath(path, "/", mustWork = FALSE))
37 |   }
38 | 
39 | 
40 |   msg <- "{.file {path}} does not exist"
41 |   if (!is_absolute_path(path)) {
42 |     msg <- paste0(msg, " in current working directory ({.path {getwd()}})")
43 |   }
44 |   msg <- paste0(msg, ".")
45 | 
46 |   cli::cli_abort(msg, call = call)
47 | }
48 | 
49 | is_absolute_path <- function(path) {
50 |   grepl("^(/|[A-Za-z]:|\\\\|~)", path)
51 | }
52 | 
53 | zipfile <- function(path, open = "r") {
54 |   files <- utils::unzip(path, list = TRUE)
55 |   file <- files$Name[[1]]
56 | 
57 |   if (nrow(files) > 1) {
58 |     cli::cli_inform("Multiple files in zip: reading {.file {file}}")
59 |   }
60 | 
61 |   unz(path, file, open = open)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | `%||%` <- function(a, b) if (is.null(a)) b else a
 2 | 
 3 | is_named <- function(x) {
 4 |   all(has_names(x))
 5 | }
 6 | 
 7 | has_names <- function(x) {
 8 |   nms <- names(x)
 9 |   if (is.null(nms)) {
10 |     rep(FALSE, length(x))
11 |   } else {
12 |     !(is.na(nms) | nms == "")
13 |   }
14 | }
15 | 
16 | # non smart quote version of sQuote
17 | quote_str <- function(x, quote = "\"") {
18 |   if (!length(x)) {
19 |     return(character(0))
20 |   }
21 | 
22 |   paste0(quote, x, quote)
23 | }
24 | 
25 | # Format the C bitwise flags for display in Rd. The input object is a named
26 | # integer vector with a 'descriptions' character vector attribute that
27 | # corresponds to each flag.
28 | describe_options <- function(x) {
29 |   paste0(
30 |     "\\describe{\n",
31 |     paste0("  \\item{", names(x), "}{", attr(x, "descriptions"), "}", collapse = "\n"),
32 |     "\n}"
33 |   )
34 | }
35 | 
36 | s_quote <- function(x) paste0("'", x, "'")
37 | 
38 | # Similar to match.arg, but returns character() with NULL or empty input and
39 | # errors if any of the inputs are not found (fixing
40 | # https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16659)
41 | parse_options <- function(arg, options, error_call = caller_env()) {
42 |   if (is.numeric(arg)) {
43 |     return(as.integer(arg))
44 |   }
45 | 
46 |   if (is.null(arg) || !any(nzchar(arg))) {
47 |     return(0L)
48 |   }
49 | 
50 |   # set duplicates.ok = TRUE so any duplicates are counted differently than
51 |   # non-matches, then take only unique results
52 |   i <- pmatch(arg, names(options), duplicates.ok = TRUE)
53 |   if (anyNA(i)) {
54 |     cli::cli_abort(c(
55 |       x = "{.arg options} {.val {arg[is.na(i)][1L]}} is not a valid option.",
56 |       i = "Valid options are one of {.or {.val {names(options)}}}.",
57 |       i = "See {.help [read_html](xml2::read_html)} for all options."
58 |     ),
59 |     call = error_call)
60 |   }
61 |   sum(options[unique(i)])
62 | }
63 | 
64 | #' Get path to a xml2 example
65 | #'
66 | #' xml2 comes bundled with a number of sample files in its \sQuote{inst/extdata}
67 | #' directory. This function makes them easy to access.
68 | #' @param path Name of file. If `NULL`, the example files will be listed.
69 | #' @export
70 | xml2_example <- function(path = NULL) {
71 |   if (is.null(path)) {
72 |     dir(system.file("extdata", package = "xml2"))
73 |   } else {
74 |     system.file("extdata", path, package = "xml2", mustWork = TRUE)
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/R/xml2-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | ## usethis namespace: start
5 | #' @import rlang
6 | ## usethis namespace: end
7 | NULL
8 | 


--------------------------------------------------------------------------------
/R/xml_children.R:
--------------------------------------------------------------------------------
  1 | #' Navigate around the family tree.
  2 | #'
  3 | #' `xml_children` returns only elements, `xml_contents` returns
  4 | #' all nodes. `xml_length` returns the number of children.
  5 | #' `xml_parent` returns the parent node, `xml_parents`
  6 | #' returns all parents up to the root. `xml_siblings` returns all nodes
  7 | #' at the same level. `xml_child` makes it easy to specify a specific
  8 | #' child to return.
  9 | #'
 10 | #' @inheritParams xml_name
 11 | #' @param only_elements For `xml_length`, should it count all children,
 12 | #'   or just children that are elements (the default)?
 13 | #' @param search For `xml_child`, either the child number to return (by
 14 | #'   position), or the name of the child node to return. If there are multiple
 15 | #'   child nodes with the same name, the first will be returned
 16 | #' @return A node or nodeset (possibly empty). Results are always de-duplicated.
 17 | #' @export
 18 | #' @examples
 19 | #' x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>")
 20 | #' xml_children(x)
 21 | #' xml_children(xml_children(x))
 22 | #' xml_siblings(xml_children(x)[[1]])
 23 | #'
 24 | #' # Note the each unique node only appears once in the output
 25 | #' xml_parent(xml_children(x))
 26 | #'
 27 | #' # Mixed content
 28 | #' x <- read_xml("<foo> a <b/> c <d>e</d> f</foo>")
 29 | #' # Childen gets the elements, contents gets all node types
 30 | #' xml_children(x)
 31 | #' xml_contents(x)
 32 | #'
 33 | #' xml_length(x)
 34 | #' xml_length(x, only_elements = FALSE)
 35 | #'
 36 | #' # xml_child makes it easier to select specific children
 37 | #' xml_child(x)
 38 | #' xml_child(x, 2)
 39 | #' xml_child(x, "baz")
 40 | xml_children <- function(x) {
 41 |   nodeset_apply(x, function(x) .Call(node_children, x, TRUE))
 42 | }
 43 | 
 44 | #' @export
 45 | #' @rdname xml_children
 46 | xml_child <- function(x, search = 1, ns = xml_ns(x)) {
 47 |   if (length(search) != 1) {
 48 |     cli::cli_abort("{.arg {search}} must be of length 1.")
 49 |   }
 50 | 
 51 |   if (is.numeric(search)) {
 52 |     xml_children(x)[[search]]
 53 |   } else if (is.character(search)) {
 54 |     xml_find_first(x, xpath = paste0("./", search), ns = ns)
 55 |   } else {
 56 |     cli::cli_abort("{.arg search} must be `numeric` or `character`.")
 57 |   }
 58 | }
 59 | 
 60 | #' @export
 61 | #' @rdname xml_children
 62 | xml_contents <- function(x) {
 63 |   nodeset_apply(x, function(x) .Call(node_children, x, FALSE))
 64 | }
 65 | 
 66 | #' @export
 67 | #' @rdname xml_children
 68 | xml_parents <- function(x) {
 69 |   nodeset_apply(x, function(x) .Call(node_parents, x))
 70 | }
 71 | 
 72 | #' @export
 73 | #' @rdname xml_children
 74 | xml_siblings <- function(x) {
 75 |   nodeset_apply(x, function(x) .Call(node_siblings, x, TRUE))
 76 | }
 77 | 
 78 | #' @export
 79 | #' @rdname xml_children
 80 | xml_parent <- function(x) {
 81 |   UseMethod("xml_parent")
 82 | }
 83 | 
 84 | #' @export
 85 | xml_parent.xml_missing <- function(x) {
 86 |   xml_missing()
 87 | }
 88 | 
 89 | #' @export
 90 | xml_parent.xml_node <- function(x) {
 91 |   xml_node(.Call(node_parent, x$node), x$doc)
 92 | }
 93 | 
 94 | #' @export
 95 | xml_parent.xml_nodeset <- function(x) {
 96 |   nodeset_apply(x, function(x) .Call(node_parent, x))
 97 | }
 98 | 
 99 | 
100 | #' @export
101 | #' @rdname xml_children
102 | xml_length <- function(x, only_elements = TRUE) {
103 |   .Call(node_length, x, only_elements)
104 | }
105 | 
106 | #' @export
107 | #' @rdname xml_children
108 | xml_root <- function(x) {
109 |   stopifnot(inherits(x, c("xml_node", "xml_document", "xml_nodeset")))
110 | 
111 |   if (inherits(x, "xml_nodeset")) {
112 |     if (length(x) == 0) {
113 |       return(NULL)
114 |     } else {
115 |       return(xml_root(x[[1]]))
116 |     }
117 |   }
118 |   if (!.Call(doc_has_root, x$doc)) {
119 |     xml_missing()
120 |   } else {
121 |     xml_document(x$doc)
122 |   }
123 | }
124 | 


--------------------------------------------------------------------------------
/R/xml_document.R:
--------------------------------------------------------------------------------
 1 | xml_document <- function(doc) {
 2 |   if (.Call(doc_has_root, doc)) {
 3 |     x <- xml_node(.Call(doc_root, doc), doc)
 4 |     class(x) <- c("xml_document", class(x))
 5 |     x
 6 |   } else {
 7 |     out <- list(doc = doc)
 8 |     class(out) <- "xml_document"
 9 |     out
10 |   }
11 | }
12 | 
13 | doc_type <- function(x) {
14 |   if (is.null(x$doc)) {
15 |     return("xml")
16 |   }
17 |   if (.Call(doc_is_html, x$doc)) {
18 |     "html"
19 |   } else {
20 |     "xml"
21 |   }
22 | }
23 | 
24 | #' @export
25 | print.xml_document <- function(x, width = getOption("width"), max_n = 20, ...) {
26 |   doc <- xml_document(x$doc)
27 |   cat("{", doc_type(x), "_document}\n", sep = "")
28 |   if (inherits(doc, "xml_node")) {
29 |     cat(format(doc), "\n", sep = "")
30 |     show_nodes(xml_children(doc), width = width, max_n = max_n)
31 |   }
32 | }
33 | 
34 | #' @export
35 | as.character.xml_document <- function(x, ..., options = "format", encoding = "UTF-8") {
36 |   options <- parse_options(options, xml_save_options())
37 |   .Call(doc_write_character, x$doc, encoding, options)
38 | }
39 | 


--------------------------------------------------------------------------------
/R/xml_missing.R:
--------------------------------------------------------------------------------
 1 | #' Construct an missing xml object
 2 | #' @export
 3 | #' @keywords internal
 4 | xml_missing <- function() {
 5 |   out <- list()
 6 |   class(out) <- "xml_missing"
 7 |   out
 8 | }
 9 | 
10 | format.xml_missing <- function(x, ...) {
11 |   "<NA>"
12 | }
13 | 
14 | #' @export
15 | print.xml_missing <- function(x, width = getOption("width"), max_n = 20, ...) {
16 |   cat("{xml_missing}\n")
17 |   cat(format(x), "\n", sep = "")
18 | }
19 | 
20 | #' @export
21 | as.character.xml_missing <- function(x, ...) {
22 |   NA_character_
23 | }
24 | 
25 | # These mimic the behavior of NA[[1]], NA[[2]], NA[1], NA[2]
26 | 
27 | #' @export
28 | `[.xml_missing` <- function(x, i, ...) x
29 | 
30 | #' @export
31 | `[[.xml_missing` <- function(x, i, ...) if (i == 1L) x else cli::cli_abort("subscript out of bounds")
32 | 
33 | #' @export
34 | is.na.xml_missing <- function(x) {
35 |   TRUE
36 | }
37 | 


--------------------------------------------------------------------------------
/R/xml_name.R:
--------------------------------------------------------------------------------
 1 | #' The (tag) name of an xml element.
 2 | #'
 3 | #' @param x A document, node, or node set.
 4 | #' @param ns Optionally, a named vector giving prefix-url pairs, as produced
 5 | #'   by [xml_ns()]. If provided, all names will be explicitly
 6 | #'   qualified with the ns prefix, i.e. if the element `bar` is defined
 7 | #'   in namespace `foo`, it will be called `foo:bar`. (And
 8 | #'   similarly for attributes). Default namespaces must be given an explicit
 9 | #'   name. The ns is ignored when using [xml_name<-()] and
10 | #'   [xml_set_name()].
11 | #' @return A character vector.
12 | #' @export
13 | #' @examples
14 | #' x <- read_xml("<bar>123</bar>")
15 | #' xml_name(x)
16 | #'
17 | #' y <- read_xml("<bar><baz>1</baz>abc<foo /></bar>")
18 | #' z <- xml_children(y)
19 | #' xml_name(xml_children(y))
20 | xml_name <- function(x, ns = character()) {
21 |   .Call(node_name, x, ns)
22 | }
23 | 
24 | #' Modify the (tag) name of an element
25 | #'
26 | #' @param value a character vector with replacement name.
27 | #' @rdname xml_name
28 | #' @export
29 | `xml_name<-` <- function(x, ns = character(), value) {
30 |   UseMethod("xml_name<-")
31 | }
32 | 
33 | #' @export
34 | `xml_name<-.xml_node` <- function(x, ns = character(), value) {
35 |   .Call(node_set_name, x$node, value)
36 |   x
37 | }
38 | 
39 | #' @export
40 | `xml_name<-.xml_nodeset` <- function(x, ns = character(), value) {
41 |   if (length(x) == 0) {
42 |     return(x)
43 |   }
44 |   if (!is.list(ns)) {
45 |     ns <- list(ns)
46 |   }
47 |   Map(`xml_name<-`, x, ns, value)
48 |   x
49 | }
50 | 
51 | #' @export
52 | `xml_name<-.xml_missing` <- function(x, ns = character(), value) {
53 |   x
54 | }
55 | 
56 | set_name <- function(x, value, ns = character()) {
57 |   xml_name(x = x, ns = ns) <- value
58 |   x
59 | }
60 | 
61 | #' @rdname xml_name
62 | #' @export
63 | xml_set_name <- function(x, value, ns = character()) {
64 |   UseMethod("xml_set_name")
65 | }
66 | 
67 | #' @export
68 | xml_set_name.xml_node <- set_name
69 | 
70 | #' @export
71 | xml_set_name.xml_nodeset <- set_name
72 | 
73 | #' @export
74 | xml_set_name.xml_missing <- set_name
75 | 


--------------------------------------------------------------------------------
/R/xml_namespaces.R:
--------------------------------------------------------------------------------
  1 | #' XML namespaces.
  2 | #'
  3 | #' `xml_ns` extracts all namespaces from a document, matching each
  4 | #' unique namespace url with the prefix it was first associated with. Default
  5 | #' namespaces are named `d1`, `d2` etc. Use `xml_ns_rename`
  6 | #' to change the prefixes. Once you have a namespace object, you can pass it to
  7 | #' other functions to work with fully qualified names instead of local names.
  8 | #'
  9 | #' @export
 10 | #' @inheritParams xml_name
 11 | #' @param old,... An existing xml_namespace object followed by name-value
 12 | #'   (old prefix-new prefix) pairs to replace.
 13 | #' @return A character vector with class `xml_namespace` so the
 14 | #'   default display is a little nicer.
 15 | #' @examples
 16 | #' x <- read_xml('
 17 | #'  <root>
 18 | #'    <doc1 xmlns = "http://foo.com"><baz /></doc1>
 19 | #'    <doc2 xmlns = "http://bar.com"><baz /></doc2>
 20 | #'  </root>
 21 | #' ')
 22 | #' xml_ns(x)
 23 | #'
 24 | #' # When there are default namespaces, it's a good idea to rename
 25 | #' # them to give informative names:
 26 | #' ns <- xml_ns_rename(xml_ns(x), d1 = "foo", d2 = "bar")
 27 | #' ns
 28 | #'
 29 | #' # Now we can pass ns to other xml function to use fully qualified names
 30 | #' baz <- xml_children(xml_children(x))
 31 | #' xml_name(baz)
 32 | #' xml_name(baz, ns)
 33 | #'
 34 | #' xml_find_all(x, "//baz")
 35 | #' xml_find_all(x, "//foo:baz", ns)
 36 | #'
 37 | #' str(as_list(x))
 38 | #' str(as_list(x, ns))
 39 | xml_ns <- function(x) {
 40 |   UseMethod("xml_ns")
 41 | }
 42 | 
 43 | #' @export
 44 | xml_ns.xml_document <- function(x) {
 45 |   if (length(x) == 0) {
 46 |     return(character())
 47 |   }
 48 | 
 49 |   stopifnot(inherits(x, "xml_document"))
 50 |   doc <- x$doc
 51 |   x <- .Call(doc_namespaces, doc)
 52 | 
 53 |   # Number default namespaces
 54 |   is_default <- names(x) == ""
 55 |   names(x)[is_default] <- paste0("d", seq_len(sum(is_default)))
 56 | 
 57 |   # Make prefixes unique
 58 |   names(x) <- make.unique(names(x), "")
 59 | 
 60 |   class(x) <- "xml_namespace"
 61 | 
 62 |   x
 63 | }
 64 | 
 65 | #' @export
 66 | xml_ns.xml_node <- function(x) {
 67 |   xml_ns(xml_root(x))
 68 | }
 69 | 
 70 | #' @export
 71 | xml_ns.xml_nodeset <- function(x) {
 72 |   if (length(x) == 0) {
 73 |     return(character())
 74 |   }
 75 |   xml_ns(x[[1]])
 76 | }
 77 | 
 78 | #' @export
 79 | xml_ns.xml_missing <- function(x) {
 80 |   character()
 81 | }
 82 | 
 83 | #' @export
 84 | print.xml_namespace <- function(x, ...) {
 85 |   prefix <- format(names(x))
 86 | 
 87 |   cat(paste0(prefix, " <-> ", x, collapse = "\n"), "\n", sep = "")
 88 | }
 89 | 
 90 | #' @export
 91 | #' @rdname xml_ns
 92 | xml_ns_rename <- function(old, ...) {
 93 |   new <- c(...)
 94 | 
 95 |   m <- match(names(new), names(old))
 96 |   if (anyNA(m)) {
 97 |     missing <- paste(names(new)[is.na(m)], collapse = ", ")
 98 |     cli::cli_abort("Some prefixes [{missing}] don't already exist.")
 99 |   }
100 | 
101 |   names(old)[m] <- new
102 |   old
103 | }
104 | 


--------------------------------------------------------------------------------
/R/xml_node.R:
--------------------------------------------------------------------------------
 1 | # node -------------------------------------------------------------------------
 2 | 
 3 | xml_node <- function(node = NULL, doc = NULL) {
 4 |   if (inherits(node, "xml_node")) {
 5 |     node
 6 |   } else {
 7 |     out <- list(node = node, doc = doc)
 8 |     class(out) <- "xml_node"
 9 |     out
10 |   }
11 | }
12 | 
13 | #' @export
14 | as.character.xml_node <- function(x, ..., options = "format", encoding = "UTF-8") {
15 |   options <- parse_options(options, xml_save_options())
16 |   .Call(node_write_character, x$node, encoding, options)
17 | }
18 | 
19 | #' @export
20 | print.xml_node <- function(x, width = getOption("width"), max_n = 20, ...) {
21 |   cat("{", doc_type(x), "_node}\n", sep = "")
22 |   cat(format(x), "\n", sep = "")
23 |   show_nodes(xml_children(x), width = width, max_n = max_n)
24 | }
25 | 
26 | #' @export
27 | is.na.xml_node <- function(x) {
28 |   FALSE
29 | }
30 | 


--------------------------------------------------------------------------------
/R/xml_nodeset.R:
--------------------------------------------------------------------------------
 1 | xml_nodeset <- function(nodes = list(), deduplicate = TRUE) {
 2 |   if (isTRUE(deduplicate)) {
 3 |     nodes <- nodes[!.Call(nodes_duplicated, nodes)]
 4 |   }
 5 |   class(nodes) <- "xml_nodeset"
 6 |   nodes
 7 | }
 8 | 
 9 | #' @param nodes A list (possible nested) of external pointers to nodes
10 | #' @return a nodeset
11 | #' @noRd
12 | make_nodeset <- function(nodes, doc) {
13 |   nodes <- unlist(nodes, recursive = FALSE)
14 | 
15 |   xml_nodeset(lapply(nodes, xml_node, doc = doc))
16 | }
17 | 
18 | #' @export
19 | print.xml_nodeset <- function(x, width = getOption("width"), max_n = 20, ...) {
20 |   n <- length(x)
21 |   cat("{", doc_type(x), "_nodeset (", n, ")}\n", sep = "")
22 | 
23 |   if (n > 0) {
24 |     show_nodes(x, width = width, max_n = max_n)
25 |   }
26 | }
27 | 
28 | #' @export
29 | as.character.xml_nodeset <- function(x, ...) {
30 |   vapply(x, as.character, FUN.VALUE = character(1))
31 | }
32 | 
33 | #' @export
34 | `[.xml_nodeset` <- function(x, i, ...) {
35 |   if (length(x) == 0) {
36 |     return(x)
37 |   }
38 |   xml_nodeset(NextMethod())
39 | }
40 | 
41 | #' Wrapper for encodeString() that takes width into consideration
42 | #'
43 | #' encodeString() is relatively expensive to run (see #366), so
44 | #'   avoid doing so to very wide inputs by first trimming inputs
45 | #'   to approximately the correct width, then encoding. A second
46 | #'   round of truncation occurs after encoding to account for
47 | #'   any newly-inserted characters bumping an input too wide.
48 | #' @noRd
49 | encode_with_width <- function(x, width) {
50 |   truncate_raw <- nchar(x) > width
51 |   x[truncate_raw] <- substr(x[truncate_raw], 1L, width - 3L)
52 |   x <- encodeString(x)
53 |   truncate_encoded <- truncate_raw | nchar(x) > width
54 |   x[truncate_encoded] <- paste(substr(x[truncate_encoded], 1L, width - 3L), "...")
55 |   x
56 | }
57 | 
58 | show_nodes <- function(x, width = getOption("width"), max_n = 20) {
59 |   stopifnot(inherits(x, "xml_nodeset"))
60 | 
61 |   n <- length(x)
62 |   if (n == 0) {
63 |     return()
64 |   }
65 | 
66 |   trunc <- n > max_n
67 |   if (trunc) {
68 |     n <- max_n
69 |     x <- x[seq_len(n)]
70 |   }
71 | 
72 |   label <- format(paste0("[", seq_len(n), "]"), justify = "right")
73 |   contents <- vapply(x, as.character, FUN.VALUE = character(1L))
74 | 
75 |   desc <- encode_with_width(paste(label, contents), width)
76 | 
77 |   cat(desc, sep = "\n")
78 |   if (trunc) {
79 |     cat("...\n")
80 |   }
81 |   invisible()
82 | }
83 | 
84 | #' @export
85 | is.na.xml_nodeset <- function(x) {
86 |   vapply(x, is.na, logical(1))
87 | }
88 | 


--------------------------------------------------------------------------------
/R/xml_path.R:
--------------------------------------------------------------------------------
 1 | #' Retrieve the xpath to a node
 2 | #'
 3 | #' This is useful when you want to figure out where nodes matching an
 4 | #' xpath expression live in a document.
 5 | #'
 6 | #' @inheritParams xml_name
 7 | #' @return A character vector.
 8 | #' @export
 9 | #' @examples
10 | #' x <- read_xml("<foo><bar><baz /></bar><baz /></foo>")
11 | #' xml_path(xml_find_all(x, ".//baz"))
12 | xml_path <- function(x) {
13 |   .Call(node_path, x)
14 | }
15 | 


--------------------------------------------------------------------------------
/R/xml_schema.R:
--------------------------------------------------------------------------------
 1 | #' Validate XML schema
 2 | #'
 3 | #' Validate an XML document against an XML 1.0 schema.
 4 | #'
 5 | #' @inheritParams xml_name
 6 | #' @return TRUE or FALSE
 7 | #' @export
 8 | #' @param schema an XML document containing the schema
 9 | #' @examples # Example from https://msdn.microsoft.com/en-us/library/ms256129(v=vs.110).aspx
10 | #' doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2"))
11 | #' schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
12 | #' xml_validate(doc, schema)
13 | xml_validate <- function(x, schema) {
14 |   UseMethod("xml_validate")
15 | }
16 | 
17 | #' @export
18 | xml_validate.xml_document <- function(x, schema) {
19 |   stopifnot(inherits(schema, "xml_document"))
20 |   .Call(doc_validate, x$doc, schema$doc)
21 | }
22 | 


--------------------------------------------------------------------------------
/R/xml_serialize.R:
--------------------------------------------------------------------------------
 1 | #' Serializing XML objects to connections.
 2 | #'
 3 | #' @inheritParams base::serialize
 4 | #' @param ... Additional arguments passed to [read_xml()].
 5 | #' @inherit base::serialize return
 6 | #' @examples
 7 | #' library(xml2)
 8 | #' x <- read_xml("<a>
 9 | #'   <b><c>123</c></b>
10 | #'   <b><c>456</c></b>
11 | #' </a>")
12 | #'
13 | #' b <- xml_find_all(x, "//b")
14 | #' out <- xml_serialize(b, NULL)
15 | #' xml_unserialize(out)
16 | #' @export
17 | xml_serialize <- function(object, connection, ...) UseMethod("xml_serialize")
18 | 
19 | #' @export
20 | xml_serialize.xml_document <- function(object, connection, ...) {
21 |   if (is.character(connection)) {
22 |     connection <- file(connection, "w", raw = TRUE)
23 |     on.exit(close(connection))
24 |   }
25 |   serialize(structure(as.character(object, ...), doc_type = doc_type(object), class = "xml_serialized_document"), connection)
26 | }
27 | 
28 | #' @export
29 | xml_serialize.xml_node <- function(object, connection, ...) {
30 |   if (is.character(connection)) {
31 |     connection <- file(connection, "w", raw = TRUE)
32 |     on.exit(close(connection))
33 |   }
34 |   x <- as_xml_document(object)
35 |   serialize(structure(as.character(x, ...), class = "xml_serialized_node"), connection)
36 | }
37 | 
38 | #' @export
39 | xml_serialize.xml_nodeset <- function(object, connection, ...) {
40 |   if (is.character(connection)) {
41 |     connection <- file(connection, "w", raw = TRUE)
42 |     on.exit(close(connection))
43 |   }
44 |   x <- as_xml_document(object, "root")
45 |   serialize(structure(as.character(x, ...), class = "xml_serialized_nodeset"), connection)
46 | }
47 | 
48 | #' @rdname xml_serialize
49 | #' @export
50 | xml_unserialize <- function(connection, ...) {
51 |   if (is.character(connection)) {
52 |     connection <- file(connection, "r", raw = TRUE)
53 |     on.exit(close(connection))
54 |   }
55 |   object <- unserialize(connection)
56 |   if (inherits(object, "xml_serialized_nodeset")) {
57 |     x <- read_xml(unclass(object), ...)
58 | 
59 |     # Select only the direct children of the root
60 |     res <- xml_find_all(x, "/*/node()")
61 |   } else if (inherits(object, "xml_serialized_node")) {
62 |     x <- read_xml(unclass(object), ...)
63 | 
64 |     # Select only the root
65 |     res <- xml_find_first(x, "/node()")
66 |   } else if (inherits(object, "xml_serialized_document")) {
67 |     read_xml_int <- function(object, as_html = FALSE, ...) {
68 |       if (missing(as_html)) {
69 |         as_html <- identical(attr(object, "doc_type", exact = TRUE), "html")
70 |       }
71 |       read_xml(unclass(object), as_html = as_html, ...)
72 |     }
73 |     res <- read_xml_int(unclass(object), ...)
74 |   } else {
75 |     cli::cli_abort("Not a serialized xml2 object.")
76 |   }
77 |   res
78 | }
79 | 


--------------------------------------------------------------------------------
/R/xml_structure.R:
--------------------------------------------------------------------------------
  1 | #' Show the structure of an html/xml document.
  2 | #'
  3 | #' Show the structure of an html/xml document without displaying any of
  4 | #' the values. This is useful if you want to get a high level view of the
  5 | #' way a document is organised. Compared to `xml_structure`,
  6 | #' `html_structure` prints the id and class attributes.
  7 | #'
  8 | #' @param x HTML/XML document (or part there of)
  9 | #' @param indent Number of spaces to ident
 10 | #' @inheritParams base::cat
 11 | #' @export
 12 | #' @examples
 13 | #' xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"))
 14 | #'
 15 | #' rproj <- read_html(system.file("extdata", "r-project.html", package = "xml2"))
 16 | #' xml_structure(rproj)
 17 | #' xml_structure(xml_find_all(rproj, ".//p"))
 18 | #'
 19 | #' h <- read_html("<body><p id = 'a'></p><p class = 'c d'></p></body>")
 20 | #' html_structure(h)
 21 | xml_structure <- function(x, indent = 2, file = "") {
 22 |   cat(file = file)
 23 |   tree_structure(x, indent = indent, html = FALSE, file = file)
 24 | }
 25 | 
 26 | #' @export
 27 | #' @rdname xml_structure
 28 | html_structure <- function(x, indent = 2, file = "") {
 29 |   cat(file = file)
 30 |   tree_structure(x, indent = indent, html = TRUE, file = file)
 31 | }
 32 | 
 33 | tree_structure <- function(x, indent = 2, html = FALSE, file = "") {
 34 |   UseMethod("tree_structure")
 35 | }
 36 | 
 37 | #' @export
 38 | tree_structure.xml_missing <- function(x, indent = 2, html = FALSE, file = "") {
 39 |   NA_character_
 40 | }
 41 | 
 42 | #' @export
 43 | tree_structure.xml_nodeset <- function(x, indent = 2, html = FALSE, file = "") {
 44 |   for (i in seq_along(x)) {
 45 |     cat("[[", i, "]]\n", sep = "", file = file, append = TRUE)
 46 |     print_xml_structure(x[[i]], indent = indent, html = html, file = file)
 47 |     cat("\n", file = file, append = TRUE)
 48 |   }
 49 | 
 50 |   invisible()
 51 | }
 52 | 
 53 | #' @export
 54 | tree_structure.xml_node <- function(x, indent = 2, html = FALSE, file = "") {
 55 |   print_xml_structure(x, indent = indent, html = html, file = file)
 56 |   invisible()
 57 | }
 58 | 
 59 | print_xml_structure <- function(x, prefix = 0, indent = 2, html = FALSE, file = "") {
 60 |   padding <- paste(rep(" ", prefix), collapse = "")
 61 |   type <- xml_type(x)
 62 | 
 63 |   if (type == "element") {
 64 |     attr <- xml_attrs(x)
 65 |     if (html) {
 66 |       html_attrs <- list()
 67 |       if ("id" %in% names(attr)) {
 68 |         html_attrs$id <- paste0("#", attr[["id"]])
 69 |         attr <- attr[setdiff(names(attr), "id")]
 70 |       }
 71 | 
 72 |       if ("class" %in% names(attr)) {
 73 |         html_attrs$class <- paste0(".", gsub(" ", ".", attr[["class"]]))
 74 |         attr <- attr[setdiff(names(attr), "class")]
 75 |       }
 76 | 
 77 |       attr_str <- paste(unlist(html_attrs), collapse = " ")
 78 |     } else {
 79 |       attr_str <- ""
 80 |     }
 81 | 
 82 |     if (length(attr) > 0) {
 83 |       attr_str <- paste0(attr_str, " [", paste0(names(attr), collapse = ", "), "]")
 84 |     }
 85 | 
 86 |     node <- paste0("<", xml_name(x), attr_str, ">")
 87 | 
 88 |     cat(padding, node, "\n", sep = "", file = file, append = TRUE)
 89 |     lapply(
 90 |       xml_contents(x),
 91 |       print_xml_structure,
 92 |       prefix = prefix + indent,
 93 |       indent = indent,
 94 |       html = html,
 95 |       file = file
 96 |     )
 97 |   } else {
 98 |     cat(padding, "{", type, "}\n", sep = "", file = file, append = TRUE)
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/R/xml_text.R:
--------------------------------------------------------------------------------
 1 | #' Extract or modify the text
 2 | #'
 3 | #' `xml_text` returns a character vector, `xml_double` returns a
 4 | #' numeric vector, `xml_integer` returns an integer vector.
 5 | #' @inheritParams xml_name
 6 | #' @param trim If `TRUE` will trim leading and trailing spaces.
 7 | #' @return A character vector, the same length as x.
 8 | #' @examples
 9 | #' x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
10 | #' xml_text(x)
11 | #' xml_text(xml_children(x))
12 | #'
13 | #' x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
14 | #' xml_text(x)
15 | #' xml_text(xml_find_all(x, "//x"))
16 | #'
17 | #' x <- read_xml("<p>   Some text    </p>")
18 | #' xml_text(x, trim = TRUE)
19 | #'
20 | #' # xml_double() and xml_integer() are useful for extracting numeric attributes
21 | #' x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>")
22 | #' xml_integer(xml_find_all(x, "//@x"))
23 | #' @export
24 | xml_text <- function(x, trim = FALSE) {
25 |   res <- .Call(node_text, x)
26 |   if (isTRUE(trim)) {
27 |     res <- trim_text(res)
28 |   }
29 |   res
30 | }
31 | 
32 | trim_text <- function(x) {
33 |   x <- sub("^[[:space:]\u00a0]+", "", x)
34 |   sub("[[:space:]\u00a0]+$", "", x)
35 | }
36 | 
37 | #' @rdname xml_text
38 | #' @param value character vector with replacement text.
39 | #' @export
40 | `xml_text<-` <- function(x, value) {
41 |   UseMethod("xml_text<-")
42 | }
43 | 
44 | #' @export
45 | `xml_text<-.xml_nodeset` <- function(x, value) {
46 |   if (length(x) == 0) {
47 |     return(x)
48 |   }
49 |   # We need to do the modification in reverse order as the modification can
50 |   # potentially delete nodes
51 |   Map(`xml_text<-`, rev(x), rev(value))
52 | 
53 |   # what to return here, setting the text could invalidate some nodes in
54 |   # the nodeset having pointers to free'd memory.
55 |   x
56 | }
57 | 
58 | #' @export
59 | `xml_text<-.xml_node` <- function(x, value) {
60 |   if (xml_type(x) != "text") {
61 |     text_child <- xml_find_first(x, ".//text()[1]", ns = character())
62 |     if (inherits(text_child, "xml_missing")) {
63 |       .Call(node_append_content, x$node, value)
64 |     } else {
65 |       .Call(node_set_content, text_child$node, value)
66 |     }
67 |   } else {
68 |     .Call(node_set_content, x$node, value)
69 |   }
70 | 
71 |   x
72 | }
73 | 
74 | #' @export
75 | `xml_text<-.xml_missing` <- function(x, value) {
76 |   NA_character_
77 | }
78 | 
79 | #' @export
80 | #' @rdname xml_text
81 | `xml_set_text` <- `xml_text<-`
82 | 
83 | #' @rdname xml_text
84 | #' @export
85 | xml_double <- function(x) {
86 |   as.numeric(xml_text(x))
87 | }
88 | 
89 | #' @rdname xml_text
90 | #' @export
91 | xml_integer <- function(x) {
92 |   as.integer(xml_text(x))
93 | }
94 | 


--------------------------------------------------------------------------------
/R/xml_type.R:
--------------------------------------------------------------------------------
 1 | #' Determine the type of a node.
 2 | #'
 3 | #' @inheritParams xml_name
 4 | #' @export
 5 | #' @examples
 6 | #' x <- read_xml("<foo> a <b /> <![CDATA[ blah]]></foo>")
 7 | #' xml_type(x)
 8 | #' xml_type(xml_contents(x))
 9 | xml_type <- function(x) {
10 |   types <- .Call(node_type, x)
11 |   xmlElementType[types]
12 | }
13 | 
14 | xmlElementType <- c(
15 |   "element",
16 |   "attribute",
17 |   "text",
18 |   "cdata",
19 |   "entity_ref",
20 |   "entity",
21 |   "pi",
22 |   "comment",
23 |   "document",
24 |   "document_type",
25 |   "document_frag",
26 |   "notation",
27 |   "html_document",
28 |   "dtd",
29 |   "element_decl",
30 |   "attribute_decl",
31 |   "entity_decl",
32 |   "namespace_decl",
33 |   "xinclude_start",
34 |   "xinclude_end",
35 |   "docb_document"
36 | )
37 | 


--------------------------------------------------------------------------------
/R/xml_url.R:
--------------------------------------------------------------------------------
 1 | #' The URL of an XML document
 2 | #'
 3 | #' This is useful for interpreting relative urls with [url_relative()].
 4 | #'
 5 | #' @param x A node or document.
 6 | #' @return A character vector of length 1. Returns `NA` if the name is
 7 | #'   not set.
 8 | #' @export
 9 | #' @examples
10 | #' catalog <- read_xml(xml2_example("cd_catalog.xml"))
11 | #' xml_url(catalog)
12 | #'
13 | #' x <- read_xml("<foo/>")
14 | #' xml_url(x)
15 | xml_url <- function(x) {
16 |   UseMethod("xml_url")
17 | }
18 | 
19 | #' @export
20 | xml_url.xml_missing <- function(x) {
21 |   NA_character_
22 | }
23 | 
24 | #' @export
25 | xml_url.xml_node <- function(x) {
26 |   .Call(doc_url, x$doc)
27 | }
28 | 
29 | #' @export
30 | xml_url.xml_nodeset <- function(x) {
31 |   vapply(x, function(x) .Call(doc_url, x), character(1))
32 | }
33 | 
34 | #' Convert between relative and absolute urls.
35 | #'
36 | #' @param x A character vector of urls relative to that base
37 | #' @param base A string giving a base url.
38 | #' @return A character vector of urls
39 | #' @seealso \code{\link{xml_url}} to retrieve the URL associated with a document
40 | #' @export
41 | #' @examples
42 | #' url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d")
43 | #'
44 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz")
45 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz/")
46 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b")
47 | #' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/")
48 | #' @export
49 | url_absolute <- function(x, base) {
50 |   .Call(url_absolute_, x, base)
51 | }
52 | 
53 | #' @rdname url_absolute
54 | #' @export
55 | url_relative <- function(x, base) {
56 |   .Call(url_relative_, x, base)
57 | }
58 | 
59 | #' Escape and unescape urls.
60 | #'
61 | #' @param x A character vector of urls.
62 | #' @param reserved A string containing additional characters to avoid escaping.
63 | #' @export
64 | #' @examples
65 | #' url_escape("a b c")
66 | #' url_escape("a b c", "")
67 | #'
68 | #' url_unescape("a%20b%2fc")
69 | #' url_unescape("%C2%B5")
70 | url_escape <- function(x, reserved = "") {
71 |   .Call(url_escape_, x, reserved)
72 | }
73 | 
74 | #' @rdname url_escape
75 | #' @export
76 | url_unescape <- function(x) {
77 |   .Call(url_unescape_, x)
78 | }
79 | 
80 | #' Parse a url into its component pieces.
81 | #'
82 | #' @param x A character vector of urls.
83 | #' @return A dataframe with one row for each element of \code{x} and
84 | #'   columns: scheme, server, port, user, path, query, fragment.
85 | #' @export
86 | #' @examples
87 | #' url_parse("http://had.co.nz/")
88 | #' url_parse("http://had.co.nz:1234/")
89 | #' url_parse("http://had.co.nz:1234/?a=1&b=2")
90 | #' url_parse("http://had.co.nz:1234/?a=1&b=2#def")
91 | url_parse <- function(x) {
92 |   .Call(url_parse_, x)
93 | }
94 | 


--------------------------------------------------------------------------------
/R/xml_write.R:
--------------------------------------------------------------------------------
  1 | #' Write XML or HTML to disk.
  2 | #'
  3 | #' This writes out both XML and normalised HTML. The default behavior will
  4 | #' output the same format which was read. If you want to force output pass
  5 | #' `option = "as_xml"` or `option = "as_html"` respectively.
  6 | #'
  7 | #' @param x A document or node to write to disk. It's not possible to
  8 | #'   save nodesets containing more than one node.
  9 | #' @param file Path to file or connection to write to.
 10 | #' @param encoding The character encoding to use in the document. The default
 11 | #' encoding is \sQuote{UTF-8}. Available encodings are specified at
 12 | #' <http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding>.
 13 | #' @param options default: \sQuote{format}. Zero or more of
 14 | #' \Sexpr[results=rd, stage=build]{xml2:::describe_options(xml2:::xml_save_options())}
 15 | #' @param ... additional arguments passed to methods.
 16 | #' @export
 17 | #' @examples
 18 | #' h <- read_html("<p>Hi!</p>")
 19 | #'
 20 | #' tmp <- tempfile(fileext = ".xml")
 21 | #' write_xml(h, tmp, options = "format")
 22 | #' readLines(tmp)
 23 | #'
 24 | #' # write formatted HTML output
 25 | #' write_html(h, tmp, options = "format")
 26 | #' readLines(tmp)
 27 | write_xml <- function(x, file, ...) {
 28 |   UseMethod("write_xml")
 29 | }
 30 | 
 31 | #' @export
 32 | write_xml.xml_missing <- function(x, file, ...) {
 33 |   cli::cli_abort("Missing data cannot be written.")
 34 | }
 35 | 
 36 | #' @rdname write_xml
 37 | #' @export
 38 | write_xml.xml_document <- function(x, file, ..., options = "format", encoding = "UTF-8") {
 39 |   options <- parse_options(options, xml_save_options())
 40 |   file <- path_to_connection(file, check = "dir")
 41 | 
 42 |   if (inherits(file, "connection")) {
 43 |     if (!isOpen(file)) {
 44 |       open(file, "wb")
 45 |       on.exit(close(file))
 46 |     }
 47 |     .Call(doc_write_connection, x$doc, file, encoding, options)
 48 |   } else {
 49 |     check_string(file)
 50 |     .Call(doc_write_file, x$doc, file, encoding, options)
 51 |   }
 52 | 
 53 |   invisible()
 54 | }
 55 | 
 56 | #' @export
 57 | write_xml.xml_nodeset <- function(x, file, ..., options = "format", encoding = "UTF-8") {
 58 |   if (length(x) != 1) {
 59 |     cli::cli_abort("Can only save length 1 node sets.")
 60 |   }
 61 | 
 62 |   options <- parse_options(options, xml_save_options())
 63 |   file <- path_to_connection(file, check = "dir")
 64 | 
 65 |   if (inherits(file, "connection")) {
 66 |     if (!isOpen(file)) {
 67 |       open(file, "wb")
 68 |       on.exit(close(file))
 69 |     }
 70 |     .Call(node_write_connection, x[[1]]$node, file, encoding, options)
 71 |   } else {
 72 |     check_string(file)
 73 |     .Call(node_write_file, x[[1]]$node, file, encoding, options)
 74 |   }
 75 | 
 76 |   invisible()
 77 | }
 78 | 
 79 | #' @export
 80 | write_xml.xml_node <- function(x, file, ..., options = "format", encoding = "UTF-8") {
 81 |   options <- parse_options(options, xml_save_options())
 82 | 
 83 |   file <- path_to_connection(file, check = "dir")
 84 |   if (inherits(file, "connection")) {
 85 |     if (!isOpen(file)) {
 86 |       open(file, "wb")
 87 |       on.exit(close(file))
 88 |     }
 89 |     .Call(node_write_connection, x$node, file, encoding, options)
 90 |   } else {
 91 |     check_string(file)
 92 |     .Call(node_write_file, x$node, file, encoding, options)
 93 |   }
 94 | 
 95 |   invisible()
 96 | }
 97 | 
 98 | 
 99 | #' @export
100 | #' @rdname write_xml
101 | write_html <- function(x, file, ...) {
102 |   UseMethod("write_html")
103 | }
104 | 
105 | #' @export
106 | write_html.xml_missing <- function(x, file, ...) {
107 |   cli::cli_abort("Missing data cannot be written.")
108 | }
109 | 
110 | #' @rdname write_xml
111 | #' @export
112 | write_html.xml_document <- write_xml.xml_document
113 | 
114 | #' @export
115 | write_html.xml_nodeset <- write_xml.xml_nodeset
116 | 
117 | #' @export
118 | write_html.xml_node <- write_xml.xml_node
119 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onUnload <- function(libpath) {
2 |   gc() # trigger finalisers
3 |   library.dynam.unload("xml2", libpath)
4 | }
5 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "100%"
13 | )
14 | ```
15 | 
16 | # xml2
17 | 
18 | <!-- badges: start -->
19 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/xml2)](https://cran.r-project.org/package=xml2)
20 | [![Codecov test coverage](https://codecov.io/gh/r-lib/xml2/branch/master/graph/badge.svg)](https://app.codecov.io/gh/r-lib/xml2?branch=main)
21 | [![R build status](https://github.com/r-lib/xml2/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/xml2/actions)
22 | [![R-CMD-check](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml)
23 | <!-- badges: end -->
24 | 
25 | The xml2 package is a binding to [libxml2](http://xmlsoft.org), making it easy to work with HTML and XML from R. The API is somewhat inspired by [jQuery](https://jquery.com).
26 | 
27 | ## Installation
28 | 
29 | You can install xml2 from CRAN, 
30 | 
31 | ```r
32 | install.packages("xml2")
33 | ```
34 | 
35 | or you can install the development version from github, using `pak`:
36 | 
37 | ```r
38 | # install.packages("pak")
39 | pak::pak("r-lib/xml2")
40 | ```
41 | 
42 | ## Usage
43 | 
44 | ```r
45 | library(xml2)
46 | x <- read_xml("<foo> <bar> text <baz/> </bar> </foo>")
47 | x
48 | 
49 | xml_name(x)
50 | xml_children(x)
51 | xml_text(x)
52 | xml_find_all(x, ".//baz")
53 | 
54 | h <- read_html("<html><p>Hi <b>!")
55 | h
56 | xml_name(h)
57 | xml_text(h)
58 | ```
59 | 
60 | There are three key classes:
61 | 
62 | * `xml_node`: a single node in a document.
63 | 
64 | * `xml_doc`: the complete document. Acting on a document is usually the same 
65 |   as acting on the root node of the document.
66 | 
67 | * `xml_nodeset`: a __set__ of nodes within the document. Operations on 
68 |   `xml_nodeset`s are vectorised, apply the operation over each node in the set.
69 | 
70 | ## Compared to the XML package
71 | 
72 | xml2 has similar goals to the XML package. The main differences are:
73 | 
74 | * xml2 takes care of memory management for you. It will automatically
75 |   free the memory used by an XML document as soon as the last reference
76 |   to it goes away.
77 | 
78 | * xml2 has a very simple class hierarchy so you don't need to think about exactly 
79 |   what type of object you have, xml2 will just do the right thing.
80 | 
81 | * More convenient handling of namespaces in Xpath expressions - see `xml_ns()` 
82 |   and `xml_ns_strip()` to get started.
83 | 
84 | ## Code of Conduct
85 | 
86 | Please note that the xml2 project is released with a [Contributor Code of Conduct](https://xml2.r-lib.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # xml2
 5 | 
 6 | <!-- badges: start -->
 7 | 
 8 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/xml2)](https://cran.r-project.org/package=xml2)
 9 | [![Codecov test
10 | coverage](https://codecov.io/gh/r-lib/xml2/branch/master/graph/badge.svg)](https://app.codecov.io/gh/r-lib/xml2?branch=main)
11 | [![R build
12 | status](https://github.com/r-lib/xml2/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/xml2/actions)
13 | [![R-CMD-check](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml)
14 | <!-- badges: end -->
15 | 
16 | The xml2 package is a binding to [libxml2](http://xmlsoft.org), making
17 | it easy to work with HTML and XML from R. The API is somewhat inspired
18 | by [jQuery](https://jquery.com).
19 | 
20 | ## Installation
21 | 
22 | You can install xml2 from CRAN,
23 | 
24 | ``` r
25 | install.packages("xml2")
26 | ```
27 | 
28 | or you can install the development version from github, using
29 | `pak`:
30 | 
31 | ``` r
32 | # install.packages("pak")
33 | pak::pak("r-lib/xml2")
34 | ```
35 | 
36 | ## Usage
37 | 
38 | ``` r
39 | library(xml2)
40 | x <- read_xml("<foo> <bar> text <baz/> </bar> </foo>")
41 | x
42 | 
43 | xml_name(x)
44 | xml_children(x)
45 | xml_text(x)
46 | xml_find_all(x, ".//baz")
47 | 
48 | h <- read_html("<html><p>Hi <b>!")
49 | h
50 | xml_name(h)
51 | xml_text(h)
52 | ```
53 | 
54 | There are three key classes:
55 | 
56 | - `xml_node`: a single node in a document.
57 | 
58 | - `xml_doc`: the complete document. Acting on a document is usually the
59 |   same as acting on the root node of the document.
60 | 
61 | - `xml_nodeset`: a **set** of nodes within the document. Operations on
62 |   `xml_nodeset`s are vectorised, apply the operation over each node in
63 |   the set.
64 | 
65 | ## Compared to the XML package
66 | 
67 | xml2 has similar goals to the XML package. The main differences are:
68 | 
69 | - xml2 takes care of memory management for you. It will automatically
70 |   free the memory used by an XML document as soon as the last reference
71 |   to it goes away.
72 | 
73 | - xml2 has a very simple class hierarchy so you don’t need to think
74 |   about exactly what type of object you have, xml2 will just do the
75 |   right thing.
76 | 
77 | - More convenient handling of namespaces in Xpath expressions - see
78 |   `xml_ns()` and `xml_ns_strip()` to get started.
79 | 
80 | ## Code of Conduct
81 | 
82 | Please note that the xml2 project is released with a [Contributor Code
83 | of Conduct](https://xml2.r-lib.org/CODE_OF_CONDUCT.html). By
84 | contributing to this project, you agree to abide by its terms.
85 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: http://xml2.r-lib.org
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 |   package: tidytemplate
 6 | 
 7 |   includes:
 8 |     in_header: |
 9 |       <script defer data-domain="xml2.r-lib.org,all.tidyverse.org" src="https://plausible.io/js/plausible.js"></script>
10 | 
11 | development:
12 |   mode: auto
13 | 
14 | reference:
15 |   - title: Read and write documents
16 |     contents:
17 |     - starts_with("read_")
18 |     - starts_with("write_")
19 |     - starts_with("download_")
20 | 
21 |   - title: Class coercion
22 |     contents:
23 |     - starts_with("as_")
24 | 
25 |   - title: URL manipulation
26 |     contents:
27 |     - starts_with("url_")
28 | 
29 |   - title: Create and modify and document
30 |     contents:
31 |     - starts_with("xml_new")
32 |     - starts_with("xml_add")
33 |     - starts_with("xml_set")
34 |     - xml_cdata
35 |     - xml_comment
36 |     - xml_dtd
37 |     - xml_ns_strip
38 |     - xml_replace
39 |     - xml_remove
40 | 
41 |   - title: Search and navigate a document
42 |     contents:
43 |     - starts_with("xml_find")
44 |     - starts_with("xml_attr")
45 |     - xml_path
46 | 
47 |   - title: Inspect a document
48 |     contents:
49 |     - starts_with("xml_ns")
50 |     - xml_children
51 |     - ends_with("structure")
52 |     - xml_type
53 |     - xml_url
54 |     - xml_validate
55 | 
56 |   - title: Utilities
57 |     contents:
58 |     - ends_with("serialize")
59 |     - xml2_example
60 | 
61 | news:
62 |   releases:
63 |   - text: Version 1.0.0
64 |     href: https://www.rstudio.com/blog/xml2-1-0-0/
65 |   - text: Version 1.1.1
66 |     href: https://www.rstudio.com/blog/xml-1-1-1/
67 | 


--------------------------------------------------------------------------------
/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | rm -f src/Makevars configure.log
3 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Anticonf (tm) script by Jeroen Ooms (2015)
 3 | # This script will query 'pkg-config' for the required cflags and ldflags.
 4 | # If pkg-config is unavailable or does not find the library, try setting
 5 | # INCLUDE_DIR and LIB_DIR manually via e.g:
 6 | # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib'
 7 | 
 8 | # Library settings
 9 | PKG_CONFIG_NAME="libxml-2.0"
10 | PKG_DEB_NAME="libxml2-dev"
11 | PKG_RPM_NAME="libxml2-devel"
12 | PKG_CSW_NAME="libxml2_dev"
13 | PKG_TEST_HEADER="<libxml/tree.h>"
14 | PKG_LIBS="-lxml2"
15 | 
16 | # Note that cflags may be empty in case of success
17 | if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then
18 |   echo "Found INCLUDE_DIR and/or LIB_DIR!"
19 |   PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
20 |   PKG_LIBS="-L$LIB_DIR $PKG_LIBS"
21 | else
22 |   # Use xml2-config if available
23 |   xml2-config --version >/dev/null 2>&1
24 |   if [ $? -eq 0 ]; then
25 |     PKGCONFIG_CFLAGS=`xml2-config --cflags`
26 |     PKGCONFIG_LIBS=`xml2-config --libs`
27 | 
28 |     # Fix a missing libxml2 directory on the requested include directory
29 |     # https://github.com/r-lib/xml2/issues/296
30 |     if [ `uname` = "Darwin" ] && echo "${PKGCONFIG_CFLAGS}" | grep -sq "/usr/include$"; then
31 |       PKGCONFIG_CFLAGS="$PKGCONFIG_CFLAGS/libxml2"
32 |     fi
33 | 
34 |   else
35 |     pkg-config --version >/dev/null 2>&1
36 |     if [ $? -eq 0 ]; then
37 |       PKGCONFIG_CFLAGS=`pkg-config --cflags $PKG_CONFIG_NAME`
38 |       PKGCONFIG_LIBS=`pkg-config --libs $PKG_CONFIG_NAME`
39 |     fi
40 |   fi
41 | 
42 |   if [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
43 |     echo "Found pkg-config cflags and libs!"
44 |     PKG_CFLAGS=${PKGCONFIG_CFLAGS}
45 |     PKG_LIBS=${PKGCONFIG_LIBS}
46 |   fi
47 | fi
48 | 
49 | # Find compiler
50 | CC=`${R_HOME}/bin/R CMD config CC`
51 | CFLAGS=`${R_HOME}/bin/R CMD config CFLAGS`
52 | CPPFLAGS=`${R_HOME}/bin/R CMD config CPPFLAGS`
53 | 
54 | # For debugging
55 | echo "Using PKG_CFLAGS=$PKG_CFLAGS"
56 | echo "Using PKG_LIBS=$PKG_LIBS"
57 | 
58 | # Test configuration
59 | echo "#include $PKG_TEST_HEADER" | ${CC} ${CPPFLAGS} ${PKG_CFLAGS} ${CFLAGS} -E -xc - >/dev/null 2>configure.log
60 | 
61 | # Customize the error
62 | if [ $? -ne 0 ]; then
63 |   echo "------------------------- ANTICONF ERROR ---------------------------"
64 |   echo "Configuration failed because $PKG_CONFIG_NAME was not found. Try installing:"
65 |   echo " * deb: $PKG_DEB_NAME (Debian, Ubuntu, etc)"
66 |   echo " * rpm: $PKG_RPM_NAME (Fedora, CentOS, RHEL)"
67 |   echo " * csw: $PKG_CSW_NAME (Solaris)"
68 |   echo "If $PKG_CONFIG_NAME is already installed, check that 'pkg-config' is in your"
69 |   echo "PATH and PKG_CONFIG_PATH contains a $PKG_CONFIG_NAME.pc file. If pkg-config"
70 |   echo "is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:"
71 |   echo "R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'"
72 |   echo "-------------------------- [ERROR MESSAGE] ---------------------------"
73 |   cat configure.log
74 |   echo "--------------------------------------------------------------------"
75 |   exit 1
76 | fi
77 | 
78 | # Write to Makevars
79 | sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
80 | 
81 | # Success
82 | exit 0
83 | 


--------------------------------------------------------------------------------
/configure.win:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-lib/xml2/bf5619bbb6452d1f23cd88a9e0960d77e98a0d7b/configure.win


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## R CMD check results
 2 | 
 3 | 0 errors | 0 warnings | 0 note
 4 | 
 5 | ## revdepcheck results
 6 | 
 7 | We checked 570 reverse dependencies (526 from CRAN + 44 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package.
 8 | 
 9 |  * We saw 0 new problems
10 |  * We failed to check 0 packages
11 | 


--------------------------------------------------------------------------------
/docker/r-devel-san/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/r-devel-san
 2 | 
 3 | RUN apt-get -qq update \
 4 | 	&& apt-get -qq dist-upgrade -y \
 5 | 	&& apt-get -qq install git pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev -y \
 6 | 	&& RD -e 'install.packages(c("Rcpp", "BH", "httr", "testthat", "magrittr", "knitr", "rmarkdown", "covr"), quiet = T)'
 7 | 
 8 | RUN git clone https://github.com/hadley/xml2 \
 9 | 	&& RD CMD build xml2 --no-build-vignettes \
10 | 	&& RD CMD INSTALL xml2_*.tar.gz --install-tests
11 | 
12 | RUN RD -e 'testthat::test_package("xml2"); q("no");' || true
13 | 
14 | RUN RD CMD check xml2_*.tar.gz
15 | 


--------------------------------------------------------------------------------
/inst/extdata/order-doc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <purchaseOrder xmlns="http://tempuri.org/po.xsd" orderDate="1999-10-20">
 3 |     <shipTo country="US">
 4 |         <name>Alice Smith</name>
 5 |         <street>123 Maple Street</street>
 6 |         <city>Mill Valley</city>
 7 |         <state>CA</state>
 8 |         <zip>90952</zip>
 9 |     </shipTo>
10 |     <billTo country="US">
11 |         <name>Robert Smith</name>
12 |         <street>8 Oak Avenue</street>
13 |         <city>Old Town</city>
14 |         <state>PA</state>
15 |         <zip>95819</zip>
16 |     </billTo>
17 |     <comment>Hurry, my lawn is going wild!</comment>
18 |     <items>
19 |         <item partNum="872-AA">
20 |             <productName>Lawnmower</productName>
21 |             <quantity>1</quantity>
22 |             <USPrice>148.95</USPrice>
23 |             <comment>Confirm this is electric</comment>
24 |         </item>
25 |         <item partNum="926-AA">
26 |             <productName>Baby Monitor</productName>
27 |             <quantity>1</quantity>
28 |             <USPrice>39.98</USPrice>
29 |             <shipDate>1999-05-21</shipDate>
30 |         </item>
31 |     </items>
32 | </purchaseOrder>
33 | 


--------------------------------------------------------------------------------
/inst/extdata/order-schema.xml:
--------------------------------------------------------------------------------
 1 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://tempuri.org/po.xsd" 
 2 | xmlns="http://tempuri.org/po.xsd" elementFormDefault="qualified">
 3 |  <xs:annotation>
 4 |   <xs:documentation xml:lang="en">
 5 |    Purchase order schema for Example.com.
 6 |    Copyright 2000 Example.com. All rights reserved.
 7 |   </xs:documentation>
 8 |  </xs:annotation>
 9 | 
10 |  <xs:element name="purchaseOrder" type="PurchaseOrderType"/>
11 | 
12 |  <xs:element name="comment" type="xs:string"/>
13 | 
14 |  <xs:complexType name="PurchaseOrderType">
15 |   <xs:sequence>
16 |    <xs:element name="shipTo" type="USAddress"/>
17 |    <xs:element name="billTo" type="USAddress"/>
18 |    <xs:element ref="comment" minOccurs="0"/>
19 |    <xs:element name="items"  type="Items"/>
20 |   </xs:sequence>
21 |   <xs:attribute name="orderDate" type="xs:date"/>
22 |  </xs:complexType>
23 | 
24 |  <xs:complexType name="USAddress">
25 |       <xs:annotation>
26 |       <xs:documentation>
27 |        Purchase order schema for Example.Microsoft.com.
28 |        Copyright 2001 Example.Microsoft.com. All rights reserved.
29 |       </xs:documentation>
30 |       <xs:appinfo>
31 |         Application info.
32 |       </xs:appinfo>
33 |      </xs:annotation>
34 | 
35 |   <xs:sequence>
36 |    <xs:element name="name"   type="xs:string"/>
37 |    <xs:element name="street" type="xs:string"/>
38 |    <xs:element name="city"   type="xs:string"/>
39 |    <xs:element name="state"  type="xs:string"/>
40 |    <xs:element name="zip"    type="xs:decimal"/>
41 |   </xs:sequence>
42 |   <xs:attribute name="country" type="xs:NMTOKEN"
43 |      fixed="US"/>
44 |  </xs:complexType>
45 | 
46 |  <xs:complexType name="Items">
47 |   <xs:sequence>
48 |    <xs:element name="item" minOccurs="0" maxOccurs="unbounded">
49 |     <xs:complexType>
50 |      <xs:sequence>
51 |       <xs:element name="productName" type="xs:string"/>
52 |       <xs:element name="quantity">
53 |        <xs:simpleType>
54 |         <xs:restriction base="xs:positiveInteger">
55 |          <xs:maxExclusive value="100"/>
56 |         </xs:restriction>
57 |        </xs:simpleType>
58 |       </xs:element>
59 |       <xs:element name="USPrice"    type="xs:decimal"/>
60 |       <xs:element ref="comment"   minOccurs="0"/>
61 |       <xs:element name="shipDate" type="xs:date" minOccurs="0"/>
62 |      </xs:sequence>
63 |      <xs:attribute name="partNum" type="SKU" use="required"/>
64 |     </xs:complexType>
65 |    </xs:element>
66 |   </xs:sequence>
67 |  </xs:complexType>
68 | 
69 |  <!-- Stock Keeping Unit, a code for identifying products -->
70 |  <xs:simpleType name="SKU">
71 |   <xs:restriction base="xs:string">
72 |    <xs:pattern value="\d{3}-[A-Z]{2}"/>
73 |   </xs:restriction>
74 |  </xs:simpleType>
75 | 
76 | </xs:schema>
77 | 


--------------------------------------------------------------------------------
/inst/extdata/r-project.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |     <title>R: The R Project for Statistical Computing</title>
  8 | 
  9 |     <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32" />
 10 |     <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16" />
 11 | 
 12 |     <!-- Bootstrap -->
 13 |     <link href="/css/bootstrap.min.css" rel="stylesheet">
 14 |     <link href="/css/R.css" rel="stylesheet">
 15 | 
 16 |     <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
 17 |     <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
 18 |     <!--[if lt IE 9]>
 19 |       <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
 20 |       <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 21 |     <![endif]-->
 22 |   </head>
 23 |   <body>
 24 |     <div class="container page">
 25 |       <div class="row">
 26 |         <div class="col-xs-12 col-sm-offset-1 col-sm-2 sidebar" role="navigation">
 27 | <div class="row">
 28 | <div class="col-xs-4 col-sm-12">
 29 | <p><a href="/"><img src="/Rlogo.jpg" alt="R" /></a></p>
 30 | <p><small><a href="/">[Home]</a></small></p>
 31 | <h2>Download</h2>
 32 | <p><a href="http://cran.r-project.org/mirrors.html">CRAN</a></p>
 33 | <h2>R Project</h2>
 34 | <ul>
 35 | <li><a href="/about.html">About R</a></li>
 36 | <li><a href="/contributors.html">Contributors</a></li>
 37 | <li><a href="/news.html">What’s New?</a></li>
 38 | <li><a href="/mail.html">Mailing Lists</a></li>
 39 | <li><a href="http://bugs.R-project.org">Bug Tracking</a></li>
 40 | <li><a href="/conferences.html">Conferences</a></li>
 41 | <li><a href="/search.html">Search</a></li>
 42 | </ul>
 43 | </div>
 44 | <div class="col-xs-4 col-sm-12">
 45 | <h2>R Foundation</h2>
 46 | <ul>
 47 | <li><a href="/foundation/">Foundation</a></li>
 48 | <li><a href="/foundation/board.html">Board</a></li>
 49 | <li><a href="/foundation/members.html">Members</a></li>
 50 | <li><a href="/foundation/donors.html">Donors</a></li>
 51 | <li><a href="/foundation/donations.html">Donate</a></li>
 52 | </ul>
 53 | </div>
 54 | <div class="col-xs-4 col-sm-12">
 55 | <h2>Documentation</h2>
 56 | <ul>
 57 | <li><a href="http://cran.r-project.org/manuals.html">Manuals</a></li>
 58 | <li><a href="http://cran.r-project.org/faqs.html">FAQs</a></li>
 59 | <li><a href="http://journal.r-project.org">The R Journal</a></li>
 60 | <li><a href="/doc/bib/R-books.html">Books</a></li>
 61 | <li><a href="/certification.html">Certification</a></li>
 62 | <li><a href="/other-docs.html">Other</a></li>
 63 | </ul>
 64 | </div>
 65 | <div class="col-xs-4 col-sm-12">
 66 | <h2>Links</h2>
 67 | <ul>
 68 | <li><a href="http://www.bioconductor.org">Bioconductor</a></li>
 69 | <li><a href="/other-projects.html">Related Projects</a></li>
 70 | </ul>
 71 | </div>
 72 | </div>
 73 |         </div>
 74 |         <div class="col-xs-12 col-sm-7">
 75 |         <h1>The R Project for Statistical Computing</h1>
 76 | <h2 id="getting-started">Getting Started</h2>
 77 | <p>R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To <strong><a href="http://cran.r-project.org/mirrors.html">download R</a></strong>, please choose your preferred <a href="http://cran.r-project.org/mirrors.html">CRAN mirror</a>.</p>
 78 | <p>If you have questions about R like how to download and install the software, or what the license terms are, please read our <a href="http://cran.R-project.org/faqs.html">answers to frequently asked questions</a> before you send an email.</p>
 79 | <h2 id="news">News</h2>
 80 | <ul>
 81 | <li><p><a href="http://cran.r-project.org/src/base-prerelease"><strong>R 3.2.0 (Full of Ingredients) prerelease versions</strong></a> will appear starting March 19. Final release is scheduled for 2015-04-16.</p></li>
 82 | <li><p><strong>R version 3.1.3</strong> (Smooth Sidewalk) has been released on 2015-03-09.</p></li>
 83 | <li><p><a href="http://journal.r-project.org"><strong>The R Journal Volume 6/2</strong></a> is available.</p></li>
 84 | <li><p><strong>R version 3.1.2</strong> (Pumpkin Helmet) has been released on 2014-10-31.</p></li>
 85 | <li><p><strong><a href="http://www.r-project.org/useR-2015">useR! 2015</a></strong>, will take place at the University of Aalborg, Denmark, June 30 - July 3, 2015.</p></li>
 86 | <li><p><strong><a href="http://www.r-project.org/useR-2014">useR! 2014</a></strong>, took place at the University of California, Los Angeles, USA June 30 - July 3, 2014.</p></li>
 87 | </ul>
 88 | <!--- (Boilerplate for release run-in)
 89 | -   [**R 3.1.3 (Smooth Sidewalk) prerelease versions**](http://cran.r-project.org/src/base-prerelease) will appear starting February 28. Final release is scheduled for 2015-03-09.
 90 | -->
 91 |         </div>
 92 |       </div>
 93 |       <div class="raw footer">
 94 |         &copy; The R Foundation.
 95 |       </div>
 96 |     </div>
 97 |     <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
 98 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
 99 |     <!-- Include all compiled plugins (below), or include individual files as needed -->
100 |     <script src="js/bootstrap.min.js"></script>
101 |   </body>
102 | </html>
103 | 


--------------------------------------------------------------------------------
/inst/include/xml2_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef __XML2_XML2_TYPES__
 2 | #define __XML2_XML2_TYPES__
 3 | 
 4 | #include <libxml/tree.h>
 5 | #include <Rinternals.h>
 6 | 
 7 | template <typename T> class XPtr {
 8 |   protected:
 9 |   SEXP data_;
10 | 
11 |   public:
12 |   XPtr(SEXP x) : data_(x) {
13 |     if (TYPEOF(data_) != EXTPTRSXP) {
14 |       Rf_error("Expecting an external pointer: [type=%s]", Rf_type2char(TYPEOF(data_)));
15 |     }
16 |     R_PreserveObject(data_);
17 |   }
18 | 
19 |   XPtr(T* p) {
20 |     data_ = R_MakeExternalPtr((void *) p, R_NilValue, R_NilValue);
21 |     R_PreserveObject(data_);
22 |   }
23 | 
24 |   XPtr(const XPtr<T> &old) {
25 |     data_ = old.data_;
26 |     R_PreserveObject(data_);
27 |   }
28 | 
29 |   XPtr& operator=(const XPtr<T> &other) {
30 |     R_PreserveObject(other.data_);
31 |     if (data_ != NULL) {
32 |       R_ReleaseObject(data_);
33 |     }
34 |     data_ = other.data_;
35 |     return *this;
36 |   }
37 | 
38 |   operator SEXP() const { return data_; }
39 | 
40 |   T* get() const {
41 |     return (T*)(R_ExternalPtrAddr(data_));
42 |   }
43 | 
44 |   T* checked_get() const {
45 |     T* ptr = get();
46 |     if (ptr == NULL) {
47 |       Rf_error("external pointer is not valid");
48 |     }
49 |     return ptr;
50 |   }
51 | 
52 |   operator T*() {
53 |     return checked_get();
54 |   }
55 | 
56 |   T* operator->() const {
57 |     return checked_get();
58 |   }
59 | 
60 |   ~XPtr() {
61 |     R_ReleaseObject(data_);
62 |   }
63 | };
64 | 
65 | 
66 | class XPtrDoc : public ::XPtr<xmlDoc> {
67 |   static void finalizeXPtrDoc(SEXP p) {
68 |     if (TYPEOF(p) != EXTPTRSXP) {
69 |       return;
70 |     }
71 | 
72 |     xmlDoc* ptr = (xmlDoc*) R_ExternalPtrAddr(p);
73 | 
74 |     if (ptr == NULL) {
75 |       return;
76 |     }
77 | 
78 |     R_ClearExternalPtr(p);
79 | 
80 |     xmlFreeDoc(ptr);
81 |   }
82 | 
83 |   public:
84 |   XPtrDoc(xmlDoc* p) : ::XPtr<xmlDoc>(p) {
85 |     R_RegisterCFinalizerEx(data_, finalizeXPtrDoc, (Rboolean) false);
86 |   }
87 | 
88 |   XPtrDoc(SEXP x) : ::XPtr<xmlDoc>(x) {}
89 | };
90 | 
91 | typedef ::XPtr<xmlNode> XPtrNode;
92 | typedef ::XPtr<xmlNs> XPtrNs;
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/man/as_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as_list.R
 3 | \name{as_list}
 4 | \alias{as_list}
 5 | \title{Coerce xml nodes to a list.}
 6 | \usage{
 7 | as_list(x, ns = character(), ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A document, node, or node set.}
11 | 
12 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
13 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
14 | qualified with the ns prefix, i.e. if the element \code{bar} is defined
15 | in namespace \code{foo}, it will be called \code{foo:bar}. (And
16 | similarly for attributes). Default namespaces must be given an explicit
17 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
18 | \code{\link[=xml_set_name]{xml_set_name()}}.}
19 | 
20 | \item{...}{Needed for compatibility with generic. Unused.}
21 | }
22 | \description{
23 | This turns an XML document (or node or nodeset) into the equivalent R
24 | list. Note that this is \code{as_list()}, not \code{as.list()}:
25 | \code{lapply()} automatically calls \code{as.list()} on its inputs, so
26 | we can't override the default.
27 | }
28 | \details{
29 | \code{as_list} currently only handles the four most common types of
30 | children that an element might have:
31 | 
32 | \itemize{
33 | \item Other elements, converted to lists.
34 | \item Attributes, stored as R attributes. Attributes that have special meanings in R
35 | (\code{\link[=class]{class()}}, \code{\link[=comment]{comment()}}, \code{\link[=dim]{dim()}},
36 | \code{\link[=dimnames]{dimnames()}}, \code{\link[=names]{names()}}, \code{\link[=row.names]{row.names()}} and
37 | \code{\link[=tsp]{tsp()}}) are escaped with '.'
38 | \item Text, stored as a character vector.
39 | }
40 | }
41 | \examples{
42 | as_list(read_xml("<foo> a <b /><c><![CDATA[<d></d>]]></c></foo>"))
43 | as_list(read_xml("<foo> <bar><baz /></bar> </foo>"))
44 | as_list(read_xml("<foo id = 'a'></foo>"))
45 | as_list(read_xml("<foo><bar id='a'/><bar id='b'/></foo>"))
46 | }
47 | 


--------------------------------------------------------------------------------
/man/as_xml_document.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as_xml_document.R
 3 | \name{as_xml_document}
 4 | \alias{as_xml_document}
 5 | \title{Coerce a R list to xml nodes.}
 6 | \usage{
 7 | as_xml_document(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A document, node, or node set.}
11 | 
12 | \item{...}{Needed for compatibility with generic. Unused.}
13 | }
14 | \description{
15 | This turns an R list into the equivalent XML document. Not all R lists will
16 | produce valid XML, in particular there can only be one root node and all
17 | child nodes need to be named (or empty) lists. R attributes become XML
18 | attributes and R names become XML node names.
19 | }
20 | \examples{
21 | as_xml_document(list(x = list()))
22 | 
23 | # Nesting multiple nodes
24 | as_xml_document(list(foo = list(bar = list(baz = list()))))
25 | 
26 | # attributes are stored as R attributes
27 | as_xml_document(list(foo = structure(list(), id = "a")))
28 | as_xml_document(list(foo = list(
29 |   bar = structure(list(), id = "a"),
30 |   bar = structure(list(), id = "b")
31 | )))
32 | }
33 | 


--------------------------------------------------------------------------------
/man/download_xml.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_parse.R
 3 | \name{download_xml}
 4 | \alias{download_xml}
 5 | \alias{download_html}
 6 | \title{Download a HTML or XML file}
 7 | \usage{
 8 | download_xml(
 9 |   url,
10 |   file = basename(url),
11 |   quiet = TRUE,
12 |   mode = "wb",
13 |   handle = curl::new_handle()
14 | )
15 | 
16 | download_html(
17 |   url,
18 |   file = basename(url),
19 |   quiet = TRUE,
20 |   mode = "wb",
21 |   handle = curl::new_handle()
22 | )
23 | }
24 | \arguments{
25 | \item{url}{A character string naming the URL of a resource to be downloaded.}
26 | 
27 | \item{file}{A character string with the name where the downloaded file is
28 | saved.}
29 | 
30 | \item{quiet}{If \code{TRUE}, suppress status messages (if any), and the
31 | progress bar.}
32 | 
33 | \item{mode}{A character string specifying the mode with which to write the file.
34 | Useful values are \code{"w"}, \code{"wb"} (binary), \code{"a"} (append)
35 | and \code{"ab"}.}
36 | 
37 | \item{handle}{a curl handle object}
38 | }
39 | \value{
40 | Path of downloaded file (invisibly).
41 | }
42 | \description{
43 | Libcurl implementation of \code{C_download} (the "internal" download method)
44 | with added support for https, ftps, gzip, etc. Default behavior is identical
45 | to \code{\link[=download.file]{download.file()}}, but request can be fully configured by passing
46 | a custom \code{\link[curl:handle]{curl::handle()}}.
47 | }
48 | \details{
49 | The main difference between \code{curl_download} and \code{curl_fetch_disk}
50 | is that \code{curl_download} checks the http status code before starting the
51 | download, and raises an error when status is non-successful. The behavior of
52 | \code{curl_fetch_disk} on the other hand is to proceed as normal and write
53 | the error page to disk in case of a non success response.
54 | 
55 | For a more advanced download interface which supports concurrent requests and
56 | resuming large files, have a look at the \link[curl]{multi_download} function.
57 | }
58 | \examples{
59 | \dontrun{
60 | download_html("http://tidyverse.org/index.html")
61 | }
62 | }
63 | \seealso{
64 | \link[curl:curl_download]{curl_download}
65 | }
66 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-archived.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112" height="20" role="img" aria-label="lifecycle: archived">
 2 |     <title>lifecycle: archived</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="112" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="57" height="20" fill="#e05d44" />
13 |         <rect width="112" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="825" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">archived</text>
19 |         <text x="825" y="140" transform="scale(.1)" fill="#fff" textLength="470">archived</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-defunct.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="106" height="20" role="img" aria-label="lifecycle: defunct">
 2 |     <title>lifecycle: defunct</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="106" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="51" height="20" fill="#fe7d37" />
13 |         <rect width="106" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="795" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">defunct</text>
19 |         <text x="795" y="140" transform="scale(.1)" fill="#fff" textLength="410">defunct</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="126" height="20" role="img" aria-label="lifecycle: deprecated">
 2 |     <title>lifecycle: deprecated</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="126" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="71" height="20" fill="#fe7d37" />
13 |         <rect width="126" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="895" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">deprecated</text>
19 |         <text x="895" y="140" transform="scale(.1)" fill="#fff" textLength="610">deprecated</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="138" height="20" role="img" aria-label="lifecycle: experimental">
 2 |     <title>lifecycle: experimental</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="138" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="83" height="20" fill="#fe7d37" />
13 |         <rect width="138" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="955" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text>
19 |         <text x="955" y="140" transform="scale(.1)" fill="#fff" textLength="730">experimental</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-maturing.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="lifecycle: maturing">
 2 |     <title>lifecycle: maturing</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="116" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="61" height="20" fill="#007ec6" />
13 |         <rect width="116" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="845" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">maturing</text>
19 |         <text x="845" y="140" transform="scale(.1)" fill="#fff" textLength="510">maturing</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-questioning.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20" role="img" aria-label="lifecycle: questioning">
 2 |     <title>lifecycle: questioning</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="128" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="73" height="20" fill="#007ec6" />
13 |         <rect width="128" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">questioning</text>
19 |         <text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="630">questioning</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-soft-deprecated.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="152" height="20" role="img" aria-label="lifecycle: soft-deprecated">
 2 |     <title>lifecycle: soft-deprecated</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="152" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="97" height="20" fill="#007ec6" />
13 |         <rect width="152" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="1025" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="870">soft-deprecated</text>
19 |         <text x="1025" y="140" transform="scale(.1)" fill="#fff" textLength="870">soft-deprecated</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="lifecycle: stable">
 2 |     <title>lifecycle: stable</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="98" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="43" height="20" fill="#4c1" />
13 |         <rect width="98" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">
17 |       lifecycle
18 |     </text>
19 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">
20 |       lifecycle
21 |     </text>
22 |         <text aria-hidden="true" x="755" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">
23 |       stable
24 |     </text>
25 |         <text x="755" y="140" transform="scale(.1)" fill="#fff" textLength="330">
26 |       stable
27 |     </text>
28 |     </g>
29 | </svg>
30 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20" role="img" aria-label="lifecycle: superseded">
 2 |     <title>lifecycle: superseded</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="128" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="73" height="20" fill="#007ec6" />
13 |         <rect width="128" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">superseded</text>
19 |         <text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="630">superseded</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/oldclass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S4.R
 3 | \name{xml_document-class}
 4 | \alias{xml_document-class}
 5 | \alias{xml_missing-class}
 6 | \alias{xml_node-class}
 7 | \alias{xml_nodeset-class}
 8 | \title{Register S4 classes}
 9 | \description{
10 | Classes are exported so they can be re-used within S4 classes, see \code{\link[methods:setOldClass]{methods::setOldClass()}}.
11 | \itemize{
12 | \item \code{xml_document}: a complete document.
13 | \item \code{xml_nodeset}: a \emph{set} of nodes within a document.
14 | \item \code{xml_missing}: a missing object, e.g. for an empty result set.
15 | \item \code{xml_node}: a single node in a document.
16 | }
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/read_xml.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/xml_parse.R
  3 | \name{read_xml}
  4 | \alias{read_xml}
  5 | \alias{read_html}
  6 | \alias{read_xml.character}
  7 | \alias{read_xml.raw}
  8 | \alias{read_xml.connection}
  9 | \title{Read HTML or XML.}
 10 | \usage{
 11 | read_xml(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS")
 12 | 
 13 | read_html(x, encoding = "", ..., options = c("RECOVER", "NOERROR", "NOBLANKS"))
 14 | 
 15 | \method{read_xml}{character}(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS")
 16 | 
 17 | \method{read_xml}{raw}(
 18 |   x,
 19 |   encoding = "",
 20 |   base_url = "",
 21 |   ...,
 22 |   as_html = FALSE,
 23 |   options = "NOBLANKS"
 24 | )
 25 | 
 26 | \method{read_xml}{connection}(
 27 |   x,
 28 |   encoding = "",
 29 |   n = 64 * 1024,
 30 |   verbose = FALSE,
 31 |   ...,
 32 |   base_url = "",
 33 |   as_html = FALSE,
 34 |   options = "NOBLANKS"
 35 | )
 36 | }
 37 | \arguments{
 38 | \item{x}{A string, a connection, or a raw vector.
 39 | 
 40 | A string can be either a path, a url or literal xml. Urls will
 41 | be converted into connections either using \code{base::url} or, if
 42 | installed, \code{curl::curl}. Local paths ending in \code{.gz},
 43 | \code{.bz2}, \code{.xz}, \code{.zip} will be automatically uncompressed.
 44 | 
 45 | If a connection, the complete connection is read into a raw vector before
 46 | being parsed.}
 47 | 
 48 | \item{encoding}{Specify a default encoding for the document. Unless
 49 | otherwise specified XML documents are assumed to be in UTF-8 or
 50 | UTF-16. If the document is not UTF-8/16, and lacks an explicit
 51 | encoding directive, this allows you to supply a default.}
 52 | 
 53 | \item{...}{Additional arguments passed on to methods.}
 54 | 
 55 | \item{as_html}{Optionally parse an xml file as if it's html.}
 56 | 
 57 | \item{options}{Set parsing options for the libxml2 parser. Zero or more of
 58 | \Sexpr[results=rd, stage=build]{xml2:::describe_options(xml2:::xml_parse_options())}}
 59 | 
 60 | \item{base_url}{When loading from a connection, raw vector or literal
 61 | html/xml, this allows you to specify a base url for the document. Base
 62 | urls are used to turn relative urls into absolute urls.}
 63 | 
 64 | \item{n}{If \code{file} is a connection, the number of bytes to read per
 65 | iteration. Defaults to 64kb.}
 66 | 
 67 | \item{verbose}{When reading from a slow connection, this prints some
 68 | output on every iteration so you know its working.}
 69 | }
 70 | \value{
 71 | An XML document. HTML is normalised to valid XML - this may not
 72 | be exactly the same transformation performed by the browser, but it's
 73 | a reasonable approximation.
 74 | }
 75 | \description{
 76 | Read HTML or XML.
 77 | }
 78 | \section{Setting the "user agent" header}{
 79 | 
 80 | 
 81 | When performing web scraping tasks it is both good practice --- and often required ---
 82 | to set the \href{https://en.wikipedia.org/wiki/User_agent}{user agent} request header
 83 | to a specific value. Sometimes this value is assigned to emulate a browser in order
 84 | to have content render in a certain way (e.g. \verb{Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0} to emulate more recent Windows browsers). Most often,
 85 | this value should be set to provide the web resource owner information on who you are
 86 | and the intent of your actions like this Google scraping bot user agent identifier:
 87 | \verb{Googlebot/2.1 (+http://www.google.com/bot.html)}.
 88 | 
 89 | You can set the HTTP user agent for URL-based requests using \code{\link[httr:set_config]{httr::set_config()}} and \code{\link[httr:user_agent]{httr::user_agent()}}:
 90 | 
 91 | \code{httr::set_config(httr::user_agent("me@example.com; +https://example.com/info.html"))}
 92 | 
 93 | \code{\link[httr:set_config]{httr::set_config()}} changes the configuration globally,
 94 | \code{\link[httr:with_config]{httr::with_config()}} can be used to change configuration temporarily.
 95 | }
 96 | 
 97 | \examples{
 98 | # Literal xml/html is useful for small examples
 99 | read_xml("<foo><bar /></foo>")
100 | read_html("<html><title>Hi<title></html>")
101 | read_html("<html><title>Hi")
102 | 
103 | # From a local path
104 | read_html(system.file("extdata", "r-project.html", package = "xml2"))
105 | 
106 | \dontrun{
107 | # From a url
108 | cd <- read_xml(xml2_example("cd_catalog.xml"))
109 | me <- read_html("http://had.co.nz")
110 | }
111 | }
112 | 


--------------------------------------------------------------------------------
/man/url_absolute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_url.R
 3 | \name{url_absolute}
 4 | \alias{url_absolute}
 5 | \alias{url_relative}
 6 | \title{Convert between relative and absolute urls.}
 7 | \usage{
 8 | url_absolute(x, base)
 9 | 
10 | url_relative(x, base)
11 | }
12 | \arguments{
13 | \item{x}{A character vector of urls relative to that base}
14 | 
15 | \item{base}{A string giving a base url.}
16 | }
17 | \value{
18 | A character vector of urls
19 | }
20 | \description{
21 | Convert between relative and absolute urls.
22 | }
23 | \examples{
24 | url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d")
25 | 
26 | url_relative("http://hadley.nz/a/c", "http://hadley.nz")
27 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/")
28 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b")
29 | url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/")
30 | }
31 | \seealso{
32 | \code{\link{xml_url}} to retrieve the URL associated with a document
33 | }
34 | 


--------------------------------------------------------------------------------
/man/url_escape.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_url.R
 3 | \name{url_escape}
 4 | \alias{url_escape}
 5 | \alias{url_unescape}
 6 | \title{Escape and unescape urls.}
 7 | \usage{
 8 | url_escape(x, reserved = "")
 9 | 
10 | url_unescape(x)
11 | }
12 | \arguments{
13 | \item{x}{A character vector of urls.}
14 | 
15 | \item{reserved}{A string containing additional characters to avoid escaping.}
16 | }
17 | \description{
18 | Escape and unescape urls.
19 | }
20 | \examples{
21 | url_escape("a b c")
22 | url_escape("a b c", "")
23 | 
24 | url_unescape("a\%20b\%2fc")
25 | url_unescape("\%C2\%B5")
26 | }
27 | 


--------------------------------------------------------------------------------
/man/url_parse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_url.R
 3 | \name{url_parse}
 4 | \alias{url_parse}
 5 | \title{Parse a url into its component pieces.}
 6 | \usage{
 7 | url_parse(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A character vector of urls.}
11 | }
12 | \value{
13 | A dataframe with one row for each element of \code{x} and
14 | columns: scheme, server, port, user, path, query, fragment.
15 | }
16 | \description{
17 | Parse a url into its component pieces.
18 | }
19 | \examples{
20 | url_parse("http://had.co.nz/")
21 | url_parse("http://had.co.nz:1234/")
22 | url_parse("http://had.co.nz:1234/?a=1&b=2")
23 | url_parse("http://had.co.nz:1234/?a=1&b=2#def")
24 | }
25 | 


--------------------------------------------------------------------------------
/man/write_xml.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_write.R
 3 | \name{write_xml}
 4 | \alias{write_xml}
 5 | \alias{write_xml.xml_document}
 6 | \alias{write_html}
 7 | \alias{write_html.xml_document}
 8 | \title{Write XML or HTML to disk.}
 9 | \usage{
10 | write_xml(x, file, ...)
11 | 
12 | \method{write_xml}{xml_document}(x, file, ..., options = "format", encoding = "UTF-8")
13 | 
14 | write_html(x, file, ...)
15 | 
16 | \method{write_html}{xml_document}(x, file, ..., options = "format", encoding = "UTF-8")
17 | }
18 | \arguments{
19 | \item{x}{A document or node to write to disk. It's not possible to
20 | save nodesets containing more than one node.}
21 | 
22 | \item{file}{Path to file or connection to write to.}
23 | 
24 | \item{...}{additional arguments passed to methods.}
25 | 
26 | \item{options}{default: \sQuote{format}. Zero or more of
27 | \Sexpr[results=rd, stage=build]{xml2:::describe_options(xml2:::xml_save_options())}}
28 | 
29 | \item{encoding}{The character encoding to use in the document. The default
30 | encoding is \sQuote{UTF-8}. Available encodings are specified at
31 | \url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.}
32 | }
33 | \description{
34 | This writes out both XML and normalised HTML. The default behavior will
35 | output the same format which was read. If you want to force output pass
36 | \code{option = "as_xml"} or \code{option = "as_html"} respectively.
37 | }
38 | \examples{
39 | h <- read_html("<p>Hi!</p>")
40 | 
41 | tmp <- tempfile(fileext = ".xml")
42 | write_xml(h, tmp, options = "format")
43 | readLines(tmp)
44 | 
45 | # write formatted HTML output
46 | write_html(h, tmp, options = "format")
47 | readLines(tmp)
48 | }
49 | 


--------------------------------------------------------------------------------
/man/xml2-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml2-package.R
 3 | \docType{package}
 4 | \name{xml2-package}
 5 | \alias{xml2}
 6 | \alias{xml2-package}
 7 | \title{xml2: Parse XML}
 8 | \description{
 9 | Work with XML files using a simple, consistent interface. Built on top of the 'libxml2' C library.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://xml2.r-lib.org}
15 |   \item \url{https://github.com/r-lib/xml2}
16 |   \item Report bugs at \url{https://github.com/r-lib/xml2/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Hadley Wickham \email{hadley@posit.co}
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Jim Hester
26 |   \item Jeroen Ooms
27 | }
28 | 
29 | Other contributors:
30 | \itemize{
31 |   \item Posit Software, PBC [copyright holder, funder]
32 |   \item R Foundation (Copy of R-project homepage cached as example) [contributor]
33 | }
34 | 
35 | }
36 | \keyword{internal}
37 | 


--------------------------------------------------------------------------------
/man/xml2_example.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{xml2_example}
 4 | \alias{xml2_example}
 5 | \title{Get path to a xml2 example}
 6 | \usage{
 7 | xml2_example(path = NULL)
 8 | }
 9 | \arguments{
10 | \item{path}{Name of file. If \code{NULL}, the example files will be listed.}
11 | }
12 | \description{
13 | xml2 comes bundled with a number of sample files in its \sQuote{inst/extdata}
14 | directory. This function makes them easy to access.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/xml_attr.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/xml_attr.R
  3 | \name{xml_attr}
  4 | \alias{xml_attr}
  5 | \alias{xml_has_attr}
  6 | \alias{xml_attrs}
  7 | \alias{xml_attr<-}
  8 | \alias{xml_set_attr}
  9 | \alias{xml_attrs<-}
 10 | \alias{xml_set_attrs}
 11 | \title{Retrieve an attribute.}
 12 | \usage{
 13 | xml_attr(x, attr, ns = character(), default = NA_character_)
 14 | 
 15 | xml_has_attr(x, attr, ns = character())
 16 | 
 17 | xml_attrs(x, ns = character())
 18 | 
 19 | xml_attr(x, attr, ns = character()) <- value
 20 | 
 21 | xml_set_attr(x, attr, value, ns = character())
 22 | 
 23 | xml_attrs(x, ns = character()) <- value
 24 | 
 25 | xml_set_attrs(x, value, ns = character())
 26 | }
 27 | \arguments{
 28 | \item{x}{A document, node, or node set.}
 29 | 
 30 | \item{attr}{Name of attribute to extract.}
 31 | 
 32 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
 33 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
 34 | qualified with the ns prefix, i.e. if the element \code{bar} is defined
 35 | in namespace \code{foo}, it will be called \code{foo:bar}. (And
 36 | similarly for attributes). Default namespaces must be given an explicit
 37 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
 38 | \code{\link[=xml_set_name]{xml_set_name()}}.}
 39 | 
 40 | \item{default}{Default value to use when attribute is not present.}
 41 | 
 42 | \item{value}{character vector of new value.}
 43 | }
 44 | \value{
 45 | \code{xml_attr()} returns a character vector. \code{NA} is used
 46 | to represent of attributes that aren't defined.
 47 | 
 48 | \code{xml_has_attr()} returns a logical vector.
 49 | 
 50 | \code{xml_attrs()} returns a named character vector if \code{x} x is single
 51 | node, or a list of character vectors if given a nodeset
 52 | }
 53 | \description{
 54 | \code{xml_attrs()} retrieves all attributes values as a named character
 55 | vector, \verb{xml_attrs() <-} or \code{xml_set_attrs()} sets all attribute
 56 | values. \code{xml_attr()} retrieves the value of single attribute and
 57 | \verb{xml_attr() <-} or \code{xml_set_attr()} modifies its value. If the
 58 | attribute doesn't exist, it will return \code{default}, which defaults to
 59 | \code{NA}. \code{xml_has_attr()} tests if an attribute is present.
 60 | }
 61 | \examples{
 62 | x <- read_xml("<root id='1'><child id ='a' /><child id='b' d='b'/></root>")
 63 | xml_attr(x, "id")
 64 | xml_attr(x, "apple")
 65 | xml_attrs(x)
 66 | 
 67 | kids <- xml_children(x)
 68 | kids
 69 | xml_attr(kids, "id")
 70 | xml_has_attr(kids, "id")
 71 | xml_attrs(kids)
 72 | 
 73 | # Missing attributes give missing values
 74 | xml_attr(xml_children(x), "d")
 75 | xml_has_attr(xml_children(x), "d")
 76 | 
 77 | # If the document has a namespace, use the ns argument and
 78 | # qualified attribute names
 79 | x <- read_xml('
 80 |  <root xmlns:b="http://bar.com" xmlns:f="http://foo.com">
 81 |    <doc b:id="b" f:id="f" id="" />
 82 |  </root>
 83 | ')
 84 | doc <- xml_children(x)[[1]]
 85 | ns <- xml_ns(x)
 86 | 
 87 | xml_attrs(doc)
 88 | xml_attrs(doc, ns)
 89 | 
 90 | # If you don't supply a ns spec, you get the first matching attribute
 91 | xml_attr(doc, "id")
 92 | xml_attr(doc, "b:id", ns)
 93 | xml_attr(doc, "id", ns)
 94 | 
 95 | # Can set a single attribute with `xml_attr() <-` or `xml_set_attr()`
 96 | xml_attr(doc, "id") <- "one"
 97 | xml_set_attr(doc, "id", "two")
 98 | 
 99 | # Or set multiple attributes with `xml_attrs()` or `xml_set_attrs()`
100 | xml_attrs(doc) <- c("b:id" = "one", "f:id" = "two", "id" = "three")
101 | xml_set_attrs(doc, c("b:id" = "one", "f:id" = "two", "id" = "three"))
102 | }
103 | 


--------------------------------------------------------------------------------
/man/xml_cdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/classes.R
 3 | \name{xml_cdata}
 4 | \alias{xml_cdata}
 5 | \title{Construct a cdata node}
 6 | \usage{
 7 | xml_cdata(content)
 8 | }
 9 | \arguments{
10 | \item{content}{The CDATA content, does not include \verb{<![CDATA[}}
11 | }
12 | \description{
13 | Construct a cdata node
14 | }
15 | \examples{
16 | x <- xml_new_root("root")
17 | xml_add_child(x, xml_cdata("<d/>"))
18 | as.character(x)
19 | }
20 | 


--------------------------------------------------------------------------------
/man/xml_children.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_children.R
 3 | \name{xml_children}
 4 | \alias{xml_children}
 5 | \alias{xml_child}
 6 | \alias{xml_contents}
 7 | \alias{xml_parents}
 8 | \alias{xml_siblings}
 9 | \alias{xml_parent}
10 | \alias{xml_length}
11 | \alias{xml_root}
12 | \title{Navigate around the family tree.}
13 | \usage{
14 | xml_children(x)
15 | 
16 | xml_child(x, search = 1, ns = xml_ns(x))
17 | 
18 | xml_contents(x)
19 | 
20 | xml_parents(x)
21 | 
22 | xml_siblings(x)
23 | 
24 | xml_parent(x)
25 | 
26 | xml_length(x, only_elements = TRUE)
27 | 
28 | xml_root(x)
29 | }
30 | \arguments{
31 | \item{x}{A document, node, or node set.}
32 | 
33 | \item{search}{For \code{xml_child}, either the child number to return (by
34 | position), or the name of the child node to return. If there are multiple
35 | child nodes with the same name, the first will be returned}
36 | 
37 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
38 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
39 | qualified with the ns prefix, i.e. if the element \code{bar} is defined
40 | in namespace \code{foo}, it will be called \code{foo:bar}. (And
41 | similarly for attributes). Default namespaces must be given an explicit
42 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
43 | \code{\link[=xml_set_name]{xml_set_name()}}.}
44 | 
45 | \item{only_elements}{For \code{xml_length}, should it count all children,
46 | or just children that are elements (the default)?}
47 | }
48 | \value{
49 | A node or nodeset (possibly empty). Results are always de-duplicated.
50 | }
51 | \description{
52 | \code{xml_children} returns only elements, \code{xml_contents} returns
53 | all nodes. \code{xml_length} returns the number of children.
54 | \code{xml_parent} returns the parent node, \code{xml_parents}
55 | returns all parents up to the root. \code{xml_siblings} returns all nodes
56 | at the same level. \code{xml_child} makes it easy to specify a specific
57 | child to return.
58 | }
59 | \examples{
60 | x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>")
61 | xml_children(x)
62 | xml_children(xml_children(x))
63 | xml_siblings(xml_children(x)[[1]])
64 | 
65 | # Note the each unique node only appears once in the output
66 | xml_parent(xml_children(x))
67 | 
68 | # Mixed content
69 | x <- read_xml("<foo> a <b/> c <d>e</d> f</foo>")
70 | # Childen gets the elements, contents gets all node types
71 | xml_children(x)
72 | xml_contents(x)
73 | 
74 | xml_length(x)
75 | xml_length(x, only_elements = FALSE)
76 | 
77 | # xml_child makes it easier to select specific children
78 | xml_child(x)
79 | xml_child(x, 2)
80 | xml_child(x, "baz")
81 | }
82 | 


--------------------------------------------------------------------------------
/man/xml_comment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/classes.R
 3 | \name{xml_comment}
 4 | \alias{xml_comment}
 5 | \title{Construct a comment node}
 6 | \usage{
 7 | xml_comment(content)
 8 | }
 9 | \arguments{
10 | \item{content}{The comment content}
11 | }
12 | \description{
13 | Construct a comment node
14 | }
15 | \examples{
16 | x <- xml_new_document()
17 | r <- xml_add_child(x, "root")
18 | xml_add_child(r, xml_comment("Hello!"))
19 | as.character(x)
20 | }
21 | 


--------------------------------------------------------------------------------
/man/xml_dtd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/classes.R
 3 | \name{xml_dtd}
 4 | \alias{xml_dtd}
 5 | \title{Construct a document type definition}
 6 | \usage{
 7 | xml_dtd(name = "", external_id = "", system_id = "")
 8 | }
 9 | \arguments{
10 | \item{name}{The name of the declaration}
11 | 
12 | \item{external_id}{The external ID of the declaration}
13 | 
14 | \item{system_id}{The system ID of the declaration}
15 | }
16 | \description{
17 | This is used to create simple document type definitions. If you need to
18 | create a more complicated definition with internal subsets it is recommended
19 | to parse a string directly with \code{read_xml()}.
20 | }
21 | \examples{
22 | r <- xml_new_root(
23 |   xml_dtd(
24 |     "html",
25 |     "-//W3C//DTD XHTML 1.0 Transitional//EN",
26 |     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
27 |   )
28 | )
29 | 
30 | # Use read_xml directly for more complicated DTD
31 | d <- read_xml(
32 |   '<!DOCTYPE doc [
33 | <!ELEMENT doc (#PCDATA)>
34 | <!ENTITY foo " test ">
35 | ]>
36 | <doc>This is a valid document &foo; !</doc>'
37 | )
38 | }
39 | 


--------------------------------------------------------------------------------
/man/xml_find_all.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/xml_find.R
  3 | \name{xml_find_all}
  4 | \alias{xml_find_all}
  5 | \alias{xml_find_all.xml_nodeset}
  6 | \alias{xml_find_first}
  7 | \alias{xml_find_num}
  8 | \alias{xml_find_int}
  9 | \alias{xml_find_chr}
 10 | \alias{xml_find_lgl}
 11 | \alias{xml_find_one}
 12 | \title{Find nodes that match an xpath expression.}
 13 | \usage{
 14 | xml_find_all(x, xpath, ns = xml_ns(x), ...)
 15 | 
 16 | \method{xml_find_all}{xml_nodeset}(x, xpath, ns = xml_ns(x), flatten = TRUE, ...)
 17 | 
 18 | xml_find_first(x, xpath, ns = xml_ns(x))
 19 | 
 20 | xml_find_num(x, xpath, ns = xml_ns(x))
 21 | 
 22 | xml_find_int(x, xpath, ns = xml_ns(x))
 23 | 
 24 | xml_find_chr(x, xpath, ns = xml_ns(x))
 25 | 
 26 | xml_find_lgl(x, xpath, ns = xml_ns(x))
 27 | }
 28 | \arguments{
 29 | \item{x}{A document, node, or node set.}
 30 | 
 31 | \item{xpath}{A string containing an xpath (1.0) expression.}
 32 | 
 33 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
 34 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
 35 | qualified with the ns prefix, i.e. if the element \code{bar} is defined
 36 | in namespace \code{foo}, it will be called \code{foo:bar}. (And
 37 | similarly for attributes). Default namespaces must be given an explicit
 38 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
 39 | \code{\link[=xml_set_name]{xml_set_name()}}.}
 40 | 
 41 | \item{...}{Further arguments passed to or from other methods.}
 42 | 
 43 | \item{flatten}{A logical indicating whether to return a single, flattened
 44 | nodeset or a list of nodesets.}
 45 | }
 46 | \value{
 47 | \code{xml_find_all} returns a nodeset if applied to a node, and a nodeset
 48 | or a list of nodesets if applied to a nodeset. If there are no matches,
 49 | the nodeset(s) will be empty. Within each nodeset, the result will always
 50 | be unique; repeated nodes are automatically de-duplicated.
 51 | 
 52 | \code{xml_find_first} returns a node if applied to a node, and a nodeset
 53 | if applied to a nodeset. The output is \emph{always} the same size as
 54 | the input. If there are no matches, \code{xml_find_first} will return a
 55 | missing node; if there are multiple matches, it will return the first
 56 | only.
 57 | 
 58 | \code{xml_find_num}, \code{xml_find_chr}, \code{xml_find_lgl} return
 59 | numeric, character and logical results respectively.
 60 | }
 61 | \description{
 62 | Xpath is like regular expressions for trees - it's worth learning if
 63 | you're trying to extract nodes from arbitrary locations in a document.
 64 | Use \code{xml_find_all} to find all matches - if there's no match you'll
 65 | get an empty result. Use \code{xml_find_first} to find a specific match -
 66 | if there's no match you'll get an \code{xml_missing} node.
 67 | }
 68 | \section{Deprecated functions}{
 69 | 
 70 | \code{xml_find_one()} has been deprecated. Instead use
 71 | \code{xml_find_first()}.
 72 | }
 73 | 
 74 | \examples{
 75 | x <- read_xml("<foo><bar><baz/></bar><baz/></foo>")
 76 | xml_find_all(x, ".//baz")
 77 | xml_path(xml_find_all(x, ".//baz"))
 78 | 
 79 | # Note the difference between .// and //
 80 | # //  finds anywhere in the document (ignoring the current node)
 81 | # .// finds anywhere beneath the current node
 82 | (bar <- xml_find_all(x, ".//bar"))
 83 | xml_find_all(bar, ".//baz")
 84 | xml_find_all(bar, "//baz")
 85 | 
 86 | # Find all vs find one -----------------------------------------------------
 87 | x <- read_xml("<body>
 88 |   <p>Some <b>text</b>.</p>
 89 |   <p>Some <b>other</b> <b>text</b>.</p>
 90 |   <p>No bold here!</p>
 91 | </body>")
 92 | para <- xml_find_all(x, ".//p")
 93 | 
 94 | # By default, if you apply xml_find_all to a nodeset, it finds all matches,
 95 | # de-duplicates them, and returns as a single nodeset. This means you
 96 | # never know how many results you'll get
 97 | xml_find_all(para, ".//b")
 98 | 
 99 | # If you set flatten to FALSE, though, xml_find_all will return a list of
100 | # nodesets, where each nodeset contains the matches for the corresponding
101 | # node in the original nodeset.
102 | xml_find_all(para, ".//b", flatten = FALSE)
103 | 
104 | # xml_find_first only returns the first match per input node. If there are 0
105 | # matches it will return a missing node
106 | xml_find_first(para, ".//b")
107 | xml_text(xml_find_first(para, ".//b"))
108 | 
109 | # Namespaces ---------------------------------------------------------------
110 | # If the document uses namespaces, you'll need use xml_ns to form
111 | # a unique mapping between full namespace url and a short prefix
112 | x <- read_xml('
113 |  <root xmlns:f = "http://foo.com" xmlns:g = "http://bar.com">
114 |    <f:doc><g:baz /></f:doc>
115 |    <f:doc><g:baz /></f:doc>
116 |  </root>
117 | ')
118 | xml_find_all(x, ".//f:doc")
119 | xml_find_all(x, ".//f:doc", xml_ns(x))
120 | }
121 | \seealso{
122 | \code{\link[=xml_ns_strip]{xml_ns_strip()}} to remove the default namespaces
123 | }
124 | 


--------------------------------------------------------------------------------
/man/xml_missing.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_missing.R
 3 | \name{xml_missing}
 4 | \alias{xml_missing}
 5 | \title{Construct an missing xml object}
 6 | \usage{
 7 | xml_missing()
 8 | }
 9 | \description{
10 | Construct an missing xml object
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/xml_name.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_name.R
 3 | \name{xml_name}
 4 | \alias{xml_name}
 5 | \alias{xml_name<-}
 6 | \alias{xml_set_name}
 7 | \title{The (tag) name of an xml element.}
 8 | \usage{
 9 | xml_name(x, ns = character())
10 | 
11 | xml_name(x, ns = character()) <- value
12 | 
13 | xml_set_name(x, value, ns = character())
14 | }
15 | \arguments{
16 | \item{x}{A document, node, or node set.}
17 | 
18 | \item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
19 | by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
20 | qualified with the ns prefix, i.e. if the element \code{bar} is defined
21 | in namespace \code{foo}, it will be called \code{foo:bar}. (And
22 | similarly for attributes). Default namespaces must be given an explicit
23 | name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
24 | \code{\link[=xml_set_name]{xml_set_name()}}.}
25 | 
26 | \item{value}{a character vector with replacement name.}
27 | }
28 | \value{
29 | A character vector.
30 | }
31 | \description{
32 | The (tag) name of an xml element.
33 | 
34 | Modify the (tag) name of an element
35 | }
36 | \examples{
37 | x <- read_xml("<bar>123</bar>")
38 | xml_name(x)
39 | 
40 | y <- read_xml("<bar><baz>1</baz>abc<foo /></bar>")
41 | z <- xml_children(y)
42 | xml_name(xml_children(y))
43 | }
44 | 


--------------------------------------------------------------------------------
/man/xml_new_document.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_modify.R
 3 | \name{xml_new_document}
 4 | \alias{xml_new_document}
 5 | \alias{xml_new_root}
 6 | \title{Create a new document, possibly with a root node}
 7 | \usage{
 8 | xml_new_document(version = "1.0", encoding = "UTF-8")
 9 | 
10 | xml_new_root(
11 |   .value,
12 |   ...,
13 |   .copy = inherits(.value, "xml_node"),
14 |   .version = "1.0",
15 |   .encoding = "UTF-8"
16 | )
17 | }
18 | \arguments{
19 | \item{version}{The version number of the document.}
20 | 
21 | \item{encoding}{The character encoding to use in the document. The default
22 | encoding is \sQuote{UTF-8}. Available encodings are specified at
23 | \url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.}
24 | 
25 | \item{.value}{node to insert.}
26 | 
27 | \item{...}{If named attributes or namespaces to set on the node, if unnamed
28 | text to assign to the node.}
29 | 
30 | \item{.copy}{whether to copy the \code{.value} before replacing. If this is \code{FALSE}
31 | then the node will be moved from it's current location.}
32 | 
33 | \item{.version}{The version number of the document, passed to \code{xml_new_document(version)}.}
34 | 
35 | \item{.encoding}{The encoding of the document, passed to \code{xml_new_document(encoding)}.}
36 | }
37 | \value{
38 | A \code{xml_document} object.
39 | }
40 | \description{
41 | \code{xml_new_document} creates only a new document without a root node. In
42 | most cases you should instead use \code{xml_new_root}, which creates a new
43 | document and assigns the root node in one step.
44 | }
45 | 


--------------------------------------------------------------------------------
/man/xml_ns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_namespaces.R
 3 | \name{xml_ns}
 4 | \alias{xml_ns}
 5 | \alias{xml_ns_rename}
 6 | \title{XML namespaces.}
 7 | \usage{
 8 | xml_ns(x)
 9 | 
10 | xml_ns_rename(old, ...)
11 | }
12 | \arguments{
13 | \item{x}{A document, node, or node set.}
14 | 
15 | \item{old, ...}{An existing xml_namespace object followed by name-value
16 | (old prefix-new prefix) pairs to replace.}
17 | }
18 | \value{
19 | A character vector with class \code{xml_namespace} so the
20 | default display is a little nicer.
21 | }
22 | \description{
23 | \code{xml_ns} extracts all namespaces from a document, matching each
24 | unique namespace url with the prefix it was first associated with. Default
25 | namespaces are named \code{d1}, \code{d2} etc. Use \code{xml_ns_rename}
26 | to change the prefixes. Once you have a namespace object, you can pass it to
27 | other functions to work with fully qualified names instead of local names.
28 | }
29 | \examples{
30 | x <- read_xml('
31 |  <root>
32 |    <doc1 xmlns = "http://foo.com"><baz /></doc1>
33 |    <doc2 xmlns = "http://bar.com"><baz /></doc2>
34 |  </root>
35 | ')
36 | xml_ns(x)
37 | 
38 | # When there are default namespaces, it's a good idea to rename
39 | # them to give informative names:
40 | ns <- xml_ns_rename(xml_ns(x), d1 = "foo", d2 = "bar")
41 | ns
42 | 
43 | # Now we can pass ns to other xml function to use fully qualified names
44 | baz <- xml_children(xml_children(x))
45 | xml_name(baz)
46 | xml_name(baz, ns)
47 | 
48 | xml_find_all(x, "//baz")
49 | xml_find_all(x, "//foo:baz", ns)
50 | 
51 | str(as_list(x))
52 | str(as_list(x, ns))
53 | }
54 | 


--------------------------------------------------------------------------------
/man/xml_ns_strip.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_modify.R
 3 | \name{xml_ns_strip}
 4 | \alias{xml_ns_strip}
 5 | \title{Strip the default namespaces from a document}
 6 | \usage{
 7 | xml_ns_strip(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A document, node, or node set.}
11 | }
12 | \description{
13 | Strip the default namespaces from a document
14 | }
15 | \examples{
16 | x <- read_xml(
17 |   "<foo xmlns = 'http://foo.com'>
18 |    <baz/>
19 |    <bar xmlns = 'http://bar.com'>
20 |      <baz/>
21 |    </bar>
22 |   </foo>"
23 | )
24 | # Need to specify the default namespaces to find the baz nodes
25 | xml_find_all(x, "//d1:baz")
26 | xml_find_all(x, "//d2:baz")
27 | 
28 | # After stripping the default namespaces you can find both baz nodes directly
29 | xml_ns_strip(x)
30 | xml_find_all(x, "//baz")
31 | }
32 | 


--------------------------------------------------------------------------------
/man/xml_path.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_path.R
 3 | \name{xml_path}
 4 | \alias{xml_path}
 5 | \title{Retrieve the xpath to a node}
 6 | \usage{
 7 | xml_path(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A document, node, or node set.}
11 | }
12 | \value{
13 | A character vector.
14 | }
15 | \description{
16 | This is useful when you want to figure out where nodes matching an
17 | xpath expression live in a document.
18 | }
19 | \examples{
20 | x <- read_xml("<foo><bar><baz /></bar><baz /></foo>")
21 | xml_path(xml_find_all(x, ".//baz"))
22 | }
23 | 


--------------------------------------------------------------------------------
/man/xml_replace.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_modify.R
 3 | \name{xml_replace}
 4 | \alias{xml_replace}
 5 | \alias{xml_add_sibling}
 6 | \alias{xml_add_child}
 7 | \alias{xml_add_parent}
 8 | \alias{xml_remove}
 9 | \title{Modify a tree by inserting, replacing or removing nodes}
10 | \usage{
11 | xml_replace(.x, .value, ..., .copy = TRUE)
12 | 
13 | xml_add_sibling(.x, .value, ..., .where = c("after", "before"), .copy = TRUE)
14 | 
15 | xml_add_child(.x, .value, ..., .where = length(xml_children(.x)), .copy = TRUE)
16 | 
17 | xml_add_parent(.x, .value, ...)
18 | 
19 | xml_remove(.x, free = FALSE)
20 | }
21 | \arguments{
22 | \item{.x}{a document, node or nodeset.}
23 | 
24 | \item{.value}{node to insert.}
25 | 
26 | \item{...}{If named attributes or namespaces to set on the node, if unnamed
27 | text to assign to the node.}
28 | 
29 | \item{.copy}{whether to copy the \code{.value} before replacing. If this is \code{FALSE}
30 | then the node will be moved from it's current location.}
31 | 
32 | \item{.where}{to add the new node, for \code{xml_add_child} the position
33 | after which to add, use \code{0} for the first child. For
34 | \code{xml_add_sibling} either \sQuote{"before"} or \sQuote{"after"}
35 | indicating if the new node should be before or after \code{.x}.}
36 | 
37 | \item{free}{When removing the node also free the memory used for that node.
38 | Note if you use this option you cannot use any existing objects pointing to
39 | the node or its children, it is likely to crash R or return garbage.}
40 | }
41 | \description{
42 | \code{xml_add_sibling()} and \code{xml_add_child()} are used to insert a node
43 | as a sibling or a child. \code{xml_add_parent()} adds a new parent in
44 | between the input node and the current parent. \code{xml_replace()}
45 | replaces an existing node with a new node. \code{xml_remove()} removes a
46 | node from the tree.
47 | }
48 | \details{
49 | Care needs to be taken when using \code{xml_remove()},
50 | }
51 | 


--------------------------------------------------------------------------------
/man/xml_serialize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_serialize.R
 3 | \name{xml_serialize}
 4 | \alias{xml_serialize}
 5 | \alias{xml_unserialize}
 6 | \title{Serializing XML objects to connections.}
 7 | \usage{
 8 | xml_serialize(object, connection, ...)
 9 | 
10 | xml_unserialize(connection, ...)
11 | }
12 | \arguments{
13 | \item{object}{\R object to serialize.}
14 | 
15 | \item{connection}{an open \link[base]{connection} or (for \code{serialize})
16 |     \code{NULL} or (for \code{unserialize}) a raw vector
17 |     (see \sQuote{Details}).}
18 | 
19 | \item{...}{Additional arguments passed to \code{\link[=read_xml]{read_xml()}}.}
20 | }
21 | \value{
22 | For \code{serialize}, \code{NULL} unless \code{connection = NULL}, when
23 |   the result is returned in a raw vector.
24 | 
25 |   For \code{unserialize} an \R object.
26 | }
27 | \description{
28 | Serializing XML objects to connections.
29 | }
30 | \examples{
31 | library(xml2)
32 | x <- read_xml("<a>
33 |   <b><c>123</c></b>
34 |   <b><c>456</c></b>
35 | </a>")
36 | 
37 | b <- xml_find_all(x, "//b")
38 | out <- xml_serialize(b, NULL)
39 | xml_unserialize(out)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/xml_set_namespace.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_modify.R
 3 | \name{xml_set_namespace}
 4 | \alias{xml_set_namespace}
 5 | \title{Set the node's namespace}
 6 | \usage{
 7 | xml_set_namespace(.x, prefix = "", uri = "")
 8 | }
 9 | \arguments{
10 | \item{.x}{a node}
11 | 
12 | \item{prefix}{The namespace prefix to use}
13 | 
14 | \item{uri}{The namespace URI to use}
15 | }
16 | \value{
17 | the node (invisibly)
18 | }
19 | \description{
20 | The namespace to be set must be already defined in one of the node's
21 | ancestors.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/xml_structure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_structure.R
 3 | \name{xml_structure}
 4 | \alias{xml_structure}
 5 | \alias{html_structure}
 6 | \title{Show the structure of an html/xml document.}
 7 | \usage{
 8 | xml_structure(x, indent = 2, file = "")
 9 | 
10 | html_structure(x, indent = 2, file = "")
11 | }
12 | \arguments{
13 | \item{x}{HTML/XML document (or part there of)}
14 | 
15 | \item{indent}{Number of spaces to ident}
16 | 
17 | \item{file}{A \link[base]{connection}, or a character string naming the file
18 |     to print to.  If \code{""} (the default), \code{cat} prints to the
19 |     standard output connection, the console unless redirected by
20 |     \code{\link[base]{sink}}.
21 |     If it is \code{"|cmd"}, the output is piped to the command given
22 |     by \file{cmd}, by opening a pipe connection.
23 |   }
24 | }
25 | \description{
26 | Show the structure of an html/xml document without displaying any of
27 | the values. This is useful if you want to get a high level view of the
28 | way a document is organised. Compared to \code{xml_structure},
29 | \code{html_structure} prints the id and class attributes.
30 | }
31 | \examples{
32 | xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"))
33 | 
34 | rproj <- read_html(system.file("extdata", "r-project.html", package = "xml2"))
35 | xml_structure(rproj)
36 | xml_structure(xml_find_all(rproj, ".//p"))
37 | 
38 | h <- read_html("<body><p id = 'a'></p><p class = 'c d'></p></body>")
39 | html_structure(h)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/xml_text.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_text.R
 3 | \name{xml_text}
 4 | \alias{xml_text}
 5 | \alias{xml_text<-}
 6 | \alias{xml_set_text}
 7 | \alias{xml_double}
 8 | \alias{xml_integer}
 9 | \title{Extract or modify the text}
10 | \usage{
11 | xml_text(x, trim = FALSE)
12 | 
13 | xml_text(x) <- value
14 | 
15 | xml_set_text(x, value)
16 | 
17 | xml_double(x)
18 | 
19 | xml_integer(x)
20 | }
21 | \arguments{
22 | \item{x}{A document, node, or node set.}
23 | 
24 | \item{trim}{If \code{TRUE} will trim leading and trailing spaces.}
25 | 
26 | \item{value}{character vector with replacement text.}
27 | }
28 | \value{
29 | A character vector, the same length as x.
30 | }
31 | \description{
32 | \code{xml_text} returns a character vector, \code{xml_double} returns a
33 | numeric vector, \code{xml_integer} returns an integer vector.
34 | }
35 | \examples{
36 | x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
37 | xml_text(x)
38 | xml_text(xml_children(x))
39 | 
40 | x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
41 | xml_text(x)
42 | xml_text(xml_find_all(x, "//x"))
43 | 
44 | x <- read_xml("<p>   Some text    </p>")
45 | xml_text(x, trim = TRUE)
46 | 
47 | # xml_double() and xml_integer() are useful for extracting numeric attributes
48 | x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>")
49 | xml_integer(xml_find_all(x, "//@x"))
50 | }
51 | 


--------------------------------------------------------------------------------
/man/xml_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_type.R
 3 | \name{xml_type}
 4 | \alias{xml_type}
 5 | \title{Determine the type of a node.}
 6 | \usage{
 7 | xml_type(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A document, node, or node set.}
11 | }
12 | \description{
13 | Determine the type of a node.
14 | }
15 | \examples{
16 | x <- read_xml("<foo> a <b /> <![CDATA[ blah]]></foo>")
17 | xml_type(x)
18 | xml_type(xml_contents(x))
19 | }
20 | 


--------------------------------------------------------------------------------
/man/xml_url.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_url.R
 3 | \name{xml_url}
 4 | \alias{xml_url}
 5 | \title{The URL of an XML document}
 6 | \usage{
 7 | xml_url(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A node or document.}
11 | }
12 | \value{
13 | A character vector of length 1. Returns \code{NA} if the name is
14 | not set.
15 | }
16 | \description{
17 | This is useful for interpreting relative urls with \code{\link[=url_relative]{url_relative()}}.
18 | }
19 | \examples{
20 | catalog <- read_xml(xml2_example("cd_catalog.xml"))
21 | xml_url(catalog)
22 | 
23 | x <- read_xml("<foo/>")
24 | xml_url(x)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/xml_validate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xml_schema.R
 3 | \name{xml_validate}
 4 | \alias{xml_validate}
 5 | \title{Validate XML schema}
 6 | \usage{
 7 | xml_validate(x, schema)
 8 | }
 9 | \arguments{
10 | \item{x}{A document, node, or node set.}
11 | 
12 | \item{schema}{an XML document containing the schema}
13 | }
14 | \value{
15 | TRUE or FALSE
16 | }
17 | \description{
18 | Validate an XML document against an XML 1.0 schema.
19 | }
20 | \examples{
21 | # Example from https://msdn.microsoft.com/en-us/library/ms256129(v=vs.110).aspx
22 | doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2"))
23 | schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
24 | xml_validate(doc, schema)
25 | }
26 | 


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | **/
2 | checks
3 | library
4 | checks.noindex
5 | library.noindex
6 | data.sqlite
7 | *.html
8 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
1 | # Revdeps
2 | 
3 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 2 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: ???
2 | rel_release_date: ???
3 | my_news_url: ???
4 | release_version: ???
5 | release_details: ???
6 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/Makevars.in:
--------------------------------------------------------------------------------
1 | PKG_CPPFLAGS=-I../inst/include @cflags@ -DUCHAR_TYPE=wchar_t -DU_SHOW_CPLUSPLUS_API=0 -DSTRICT_R_HEADERS -DR_NO_REMAP
2 | PKG_CFLAGS=$(C_VISIBILITY)
3 | PKG_CXXFLAGS=$(CXX_VISIBILITY)
4 | PKG_LIBS=@libs@
5 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
 1 | PKG_CONFIG_NAME = libxml-2.0
 2 | PKG_CONFIG ?= $(BINPREF)pkg-config
 3 | PKG_LIBS := $(shell $(PKG_CONFIG) --libs $(PKG_CONFIG_NAME))
 4 | STATIC_CFLAGS = -DSTRICT_R_HEADERS -DR_NO_REMAP -DLIBXML_STATIC -I../inst/include
 5 | 
 6 | ifneq ($(PKG_LIBS),)
 7 | $(info using $(PKG_CONFIG_NAME) from Rtools)
 8 | PKG_CPPFLAGS := $(shell $(PKG_CONFIG) --cflags $(PKG_CONFIG_NAME)) $(STATIC_CFLAGS)
 9 | else
10 | RWINLIB = ../windows/libxml2
11 | PKG_CPPFLAGS = -I$(RWINLIB)/include -I$(RWINLIB)/include/libxml2 $(STATIC_CFLAGS)
12 | PKG_LIBS = -L$(RWINLIB)/lib$(subst gcc,,$(COMPILED_BY))$(R_ARCH) -L$(RWINLIB)/lib \
13 | 	-lxml2 -liconv -lz -lws2_32
14 | endif
15 | 
16 | all: $(SHLIB)
17 | 
18 | $(OBJECTS): $(RWINLIB)
19 | 
20 | $(RWINLIB):
21 | 	"${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "../tools/winlibs.R"
22 | 
23 | clean:
24 | 	rm -f $(SHLIB) $(OBJECTS)
25 | 


--------------------------------------------------------------------------------
/src/connection.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | #include <iterator>
 3 | #include <vector>
 4 | #include "xml2_utils.h"
 5 | 
 6 | // Wrapper around R's read_bin function
 7 | SEXP read_bin(SEXP con, size_t bytes) {
 8 |   SEXP e;
 9 |   SEXP raw_sxp = PROTECT(Rf_mkString("raw"));
10 |   SEXP bytes_sxp = PROTECT(Rf_ScalarInteger(bytes));
11 |   PROTECT(e = Rf_lang4(Rf_install("readBin"), con, raw_sxp, bytes_sxp));
12 |   SEXP res = Rf_eval(e, R_GlobalEnv);
13 |   UNPROTECT(3);
14 |   return res;
15 | }
16 | 
17 | // Wrapper around R's write_bin function
18 | SEXP write_bin(SEXP data, SEXP con) {
19 |   SEXP e;
20 |   PROTECT(e = Rf_lang3(Rf_install("writeBin"), data, con));
21 |   SEXP res = Rf_eval(e, R_GlobalEnv);
22 |   UNPROTECT(1);
23 |   return res;
24 | }
25 | 
26 | // Read data from a connection in chunks and then combine into a single
27 | // raw vector.
28 | //
29 | // [[export]]
30 | extern "C" SEXP read_connection_(SEXP con_sxp, SEXP read_size_sxp) {
31 | 
32 |   BEGIN_CPP
33 |   std::vector<char> buffer;
34 |   size_t read_size = REAL(read_size_sxp)[0];
35 | 
36 |   SEXP chunk = read_bin(con_sxp, read_size);
37 |   R_xlen_t chunk_size = Rf_xlength(chunk);
38 |   while(chunk_size > 0) {
39 |     std::copy(RAW(chunk), RAW(chunk) + chunk_size, std::back_inserter(buffer));
40 |     chunk = read_bin(con_sxp, read_size);
41 |     chunk_size = Rf_xlength(chunk);
42 |   }
43 | 
44 |   size_t size = buffer.size();
45 | 
46 |   SEXP out = PROTECT(Rf_allocVector(RAWSXP, size));
47 |   std::copy(buffer.begin(), buffer.end(), RAW(out));
48 | 
49 |   UNPROTECT(1);
50 | 
51 |   return out;
52 | 
53 |   END_CPP
54 | }
55 | 


--------------------------------------------------------------------------------
/src/connection.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Rinternals.h>
 4 | #include <algorithm>
 5 | #include <cstring>
 6 | 
 7 | SEXP read_bin(SEXP con, size_t bytes = 64 * 1024);
 8 | SEXP write_bin(SEXP data, SEXP con);
 9 | 
10 | inline SEXP R_GetConnection(SEXP con) { return con; }
11 | 
12 | inline size_t R_ReadConnection(SEXP con, void* buf, size_t n) {
13 |   SEXP res = PROTECT(read_bin(con, n));
14 | 
15 |   R_xlen_t size = Rf_xlength(res);
16 | 
17 |   memcpy(buf, RAW(res), size);
18 | 
19 |   UNPROTECT(1);
20 | 
21 |   return Rf_xlength(res);
22 | }
23 | 
24 | inline size_t R_WriteConnection(SEXP con, void* buf, size_t n) {
25 |   SEXP payload = PROTECT(Rf_allocVector(RAWSXP, n));
26 | 
27 |   memcpy(RAW(payload), buf, n);
28 | 
29 |   write_bin(payload, con);
30 | 
31 |   UNPROTECT(1);
32 | 
33 |   return n;
34 | }
35 | 


--------------------------------------------------------------------------------
/src/xml2_init.c:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | #include <libxml/xmlversion.h>
 3 | #include <libxml/xmlerror.h>
 4 | #include <libxml/parser.h>
 5 | #include <string.h>
 6 | 
 7 | /* * *
 8 |  * Author: Nick Wellnhofer <wellnhofer@aevum.de>
 9 |  * Date:   Tue, 24 Oct 2023 15:02:36 +0200
10 |  * https://github.com/GNOME/libxml2/commit/61034116d0a3c8b295c6137956adc3ae55720711
11 |  *
12 |  * error: Make more xmlError structs constant
13 |  */
14 | #if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 21200)
15 | void handleStructuredError(void* userData, const xmlError* error) {
16 | #else
17 | void handleStructuredError(void* userData, xmlError* error) {
18 | #endif
19 | 
20 |   int len = strlen(error->message);
21 |   if(len > 2){
22 |     error->message[len-1] = '\0';
23 |   }
24 | 
25 |   if (error->level <= 2) {
26 |     Rf_warning("%s [%i]", error->message, (int) error->code);
27 |   } else {
28 |     Rf_error("%s [%i]", error->message, (int) error->code);
29 |   }
30 | }
31 | 
32 | void handleGenericError(void *ctx, const char *fmt, ...){
33 |   char buffer[BUFSIZ];
34 |   va_list arg;
35 | 
36 |   if (fmt == NULL) fmt = "(null)";
37 | 
38 |   va_start(arg, fmt);
39 |   vsnprintf(buffer, BUFSIZ, fmt, arg);
40 |   Rf_error("%s", buffer);
41 | }
42 | 
43 | void init_libxml2_library(void) {
44 |   // Check that header and libs are compatible
45 |   LIBXML_TEST_VERSION
46 | 
47 |   xmlInitParser();
48 |   xmlSetStructuredErrorFunc(NULL, handleStructuredError);
49 |   xmlSetGenericErrorFunc(NULL, handleGenericError);
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/src/xml2_namespace.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | #include <libxml/tree.h>
 3 | 
 4 | #include "xml2_types.h"
 5 | #include "xml2_utils.h"
 6 | 
 7 | // [[export]]
 8 | extern "C" SEXP unique_ns(SEXP ns) {
 9 |   BEGIN_CPP
10 |   return NsMap(ns).out();
11 |   END_CPP
12 | }
13 | 
14 | void cache_namespace(xmlNode* node, NsMap* nsMap) {
15 |   // Iterate over namespace definitions
16 |   for(xmlNs* cur = node->nsDef; cur != NULL; cur = cur->next) {
17 |     nsMap->add(cur->prefix, cur->href);
18 |   }
19 | 
20 |   // Iterate over children, calling this function recursively
21 |   //for(xmlNode* cur = node->children; cur != NULL; cur = cur->next)
22 |   for(xmlNode* cur = node->children; cur != NULL && cur->type != XML_ENTITY_DECL; cur = cur->next)
23 |     cache_namespace(cur, nsMap);
24 | }
25 | 
26 | // [[export]]
27 | extern "C" SEXP doc_namespaces(SEXP doc_sxp) {
28 |   BEGIN_CPP
29 |   XPtrDoc doc(doc_sxp);
30 | 
31 |   NsMap nsMap;
32 | 
33 |   xmlNode* root = xmlDocGetRootElement(doc.checked_get());
34 |   cache_namespace(root, &nsMap);
35 | 
36 |   return nsMap.out();
37 |   END_CPP
38 | }
39 | 
40 | // [[export]]
41 | extern "C" SEXP ns_lookup_uri(SEXP doc_sxp, SEXP node_sxp, SEXP uri_sxp) {
42 |   BEGIN_CPP
43 |   XPtrDoc doc(doc_sxp);
44 |   XPtrNode node(node_sxp);
45 | 
46 |   xmlNsPtr ns = xmlSearchNsByHref(doc.checked_get(), node.checked_get(), asXmlChar(uri_sxp));
47 |   if (ns == NULL) {
48 |     Rf_error("No namespace with URI `%s` found", CHAR(STRING_ELT(uri_sxp, 0)));
49 |   }
50 |   XPtrNs out(ns);
51 |   return SEXP(out);
52 |   END_CPP
53 | }
54 | 
55 | // [[export]]
56 | extern "C" SEXP ns_lookup(SEXP doc_sxp, SEXP node_sxp, SEXP prefix_sxp) {
57 |   BEGIN_CPP
58 |   XPtrDoc doc(doc_sxp);
59 |   XPtrNode node(node_sxp);
60 | 
61 |   xmlNsPtr ns = NULL;
62 |   if (Rf_xlength(STRING_ELT(prefix_sxp, 0)) == 0) {
63 |     ns = xmlSearchNs(doc.checked_get(), node.checked_get(), NULL);
64 |   } else {
65 |     ns = xmlSearchNs(doc.checked_get(), node.checked_get(), asXmlChar(prefix_sxp));
66 |     if (ns == NULL) {
67 |       Rf_error("No namespace with prefix `%s` found", CHAR(STRING_ELT(prefix_sxp, 0)));
68 |     }
69 |   }
70 | 
71 |   XPtrNs out(ns);
72 |   return SEXP(out);
73 |   END_CPP
74 | }
75 | 
76 | // [[export]]
77 | extern "C" SEXP libxml2_version_(){
78 |   return Rf_mkString(LIBXML_DOTTED_VERSION);
79 | }
80 | 


--------------------------------------------------------------------------------
/src/xml2_schema.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | #include <libxml/xmlschemas.h>
 3 | #include <vector>
 4 | #include <string>
 5 | 
 6 | #include "xml2_types.h"
 7 | #include "xml2_utils.h"
 8 | 
 9 | /* * *
10 |  * Author: Nick Wellnhofer <wellnhofer@aevum.de>
11 |  * Date:   Tue, 24 Oct 2023 15:02:36 +0200
12 |  * https://github.com/GNOME/libxml2/commit/61034116d0a3c8b295c6137956adc3ae55720711
13 |  *
14 |  * error: Make more xmlError structs constant
15 |  */
16 | #if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 21200)
17 | void handleSchemaError(void* userData, const xmlError* error) {
18 | #else
19 | void handleSchemaError(void* userData, xmlError* error) {
20 | #endif
21 |   std::vector<std::string> * vec = (std::vector<std::string> *) userData;
22 |   std::string message = std::string(error->message);
23 |   message.resize(message.size() - 1);
24 |   vec->push_back(message);
25 | }
26 | 
27 | // [[export]]
28 | extern "C" SEXP doc_validate(SEXP doc_sxp, SEXP schema_sxp) {
29 | 
30 |   XPtrDoc doc(doc_sxp);
31 |   XPtrDoc schema(schema_sxp);
32 | 
33 |   BEGIN_CPP
34 | 
35 |   std::vector<std::string> vec;
36 | 
37 |   xmlSchemaParserCtxtPtr cptr = xmlSchemaNewDocParserCtxt(schema.checked_get());
38 | 
39 |   xmlSchemaSetParserStructuredErrors(cptr, handleSchemaError, &vec);
40 | 
41 |   xmlSchemaPtr sptr = xmlSchemaParse(cptr);
42 | 
43 |   xmlSchemaValidCtxtPtr vptr = xmlSchemaNewValidCtxt(sptr);
44 | 
45 |   xmlSchemaSetValidStructuredErrors(vptr, handleSchemaError, &vec);
46 | 
47 |   SEXP out = PROTECT(Rf_allocVector(LGLSXP, 1));
48 | 
49 |   LOGICAL(out)[0] = xmlSchemaValidateDoc(vptr, doc.checked_get()) == 0;
50 | 
51 |   xmlSchemaFreeParserCtxt(cptr);
52 |   xmlSchemaFreeValidCtxt(vptr);
53 |   xmlSchemaFree(sptr);
54 | 
55 |   SEXP errors = PROTECT(Rf_allocVector(STRSXP, vec.size()));
56 |   for (size_t i = 0; i < vec.size(); ++i) {
57 |     SET_STRING_ELT(errors, i, Rf_mkCharLenCE(vec[i].c_str(), vec[i].size(), CE_UTF8));
58 |   }
59 |   Rf_setAttrib(out, Rf_install("errors"), errors);
60 | 
61 | 
62 |   UNPROTECT(2);
63 |   return out;
64 | 
65 |   END_CPP
66 | }
67 | 


--------------------------------------------------------------------------------
/src/xml2_xpath.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rinternals.h>
  2 | #include <libxml/xpath.h>
  3 | #include <libxml/xpathInternals.h>
  4 | #include <libxml/tree.h>
  5 | #include "xml2_types.h"
  6 | #include <algorithm>
  7 | 
  8 | class XmlSeeker {
  9 |   xmlXPathContext* context_;
 10 |   xmlXPathObject* result_;
 11 |   XPtrDoc doc_;
 12 | 
 13 | public:
 14 | 
 15 |   XmlSeeker(XPtrDoc doc, xmlNode* node) : result_(NULL), doc_(doc) {
 16 |     context_ = xmlXPathNewContext(doc.checked_get());
 17 |     // Set context to current node
 18 |     context_->node = node;
 19 |   }
 20 | 
 21 |   void registerNamespace(SEXP nsMap) {
 22 |     R_xlen_t n = Rf_xlength(nsMap);
 23 |     if (n == 0) {
 24 |       return;
 25 |     }
 26 | 
 27 |     SEXP prefix = Rf_getAttrib(nsMap, R_NamesSymbol);
 28 | 
 29 |     for (int i = 0; i < n; ++i) {
 30 |       xmlChar* prefixI = (xmlChar*) CHAR(STRING_ELT(prefix, i));
 31 |       xmlChar* urlI = (xmlChar*) CHAR(STRING_ELT(nsMap, i));
 32 | 
 33 |       if (xmlXPathRegisterNs(context_, prefixI, urlI) != 0)
 34 |         Rf_error("Failed to register namespace (%s <-> %s)", prefixI, urlI);
 35 |     }
 36 |   }
 37 | 
 38 |   SEXP search(const char* xpath, int num_results) {
 39 |     result_ = xmlXPathEval((const xmlChar*)xpath, context_);
 40 |     if (result_ == NULL) {
 41 |       SEXP ret = PROTECT(Rf_allocVector(VECSXP, 0));
 42 |       Rf_setAttrib(ret, R_ClassSymbol, Rf_mkString("xml_missing"));
 43 |       UNPROTECT(1);
 44 |       return ret;
 45 |     }
 46 | 
 47 |     switch (result_->type) {
 48 |       case XPATH_NODESET:
 49 |         {
 50 |           xmlNodeSet* nodes = result_->nodesetval;
 51 |           if (nodes == NULL || nodes->nodeNr == 0) {
 52 |             SEXP ret = PROTECT(Rf_allocVector(VECSXP, 0));
 53 |             Rf_setAttrib(ret, R_ClassSymbol, Rf_mkString("xml_missing"));
 54 |             UNPROTECT(1);
 55 |             return ret;
 56 |           }
 57 |           int n = std::min(result_->nodesetval->nodeNr, num_results);
 58 | 
 59 |           SEXP out = PROTECT(Rf_allocVector(VECSXP, n));
 60 | 
 61 |           SEXP names = PROTECT(Rf_allocVector(STRSXP, 2));
 62 |           SET_STRING_ELT(names, 0, Rf_mkChar("node"));
 63 |           SET_STRING_ELT(names, 1, Rf_mkChar("doc"));
 64 | 
 65 |           for (int i = 0; i < n; i++) {
 66 |             SEXP ret = PROTECT(Rf_allocVector(VECSXP, 2));
 67 | 
 68 |             SET_VECTOR_ELT(ret, 0, XPtrNode(nodes->nodeTab[i]));
 69 |             SET_VECTOR_ELT(ret, 1, doc_);
 70 | 
 71 |             Rf_setAttrib(ret, R_NamesSymbol, names);
 72 |             Rf_setAttrib(ret, R_ClassSymbol, Rf_mkString("xml_node"));
 73 | 
 74 |             SET_VECTOR_ELT(out, i, ret);
 75 | 
 76 |             UNPROTECT(1);
 77 |           }
 78 | 
 79 |           UNPROTECT(2);
 80 |           return out;
 81 |         }
 82 |       case XPATH_NUMBER: { return Rf_ScalarReal(result_->floatval); }
 83 |       case XPATH_BOOLEAN: { return Rf_ScalarLogical(result_->boolval); }
 84 |       case XPATH_STRING: { return Rf_ScalarString(Rf_mkCharCE((char *) result_->stringval, CE_UTF8)); }
 85 |       default:
 86 |         Rf_error("XPath result type: %d not supported", result_->type);
 87 |     }
 88 | 
 89 |     return R_NilValue;
 90 |   }
 91 | 
 92 |   ~XmlSeeker() {
 93 |     try {
 94 |       xmlXPathFreeContext(context_);
 95 |       if (result_ != NULL)
 96 |         xmlXPathFreeObject(result_);
 97 |     } catch (...) {}
 98 |   }
 99 | 
100 | };
101 | 
102 | // [[export]]
103 | extern "C" SEXP xpath_search(SEXP node_sxp, SEXP doc_sxp, SEXP xpath_sxp, SEXP nsMap_sxp, SEXP num_results_sxp) {
104 | 
105 |   XPtrNode node(node_sxp);
106 |   XPtrDoc doc(doc_sxp);
107 |   if (TYPEOF(xpath_sxp) != STRSXP) {
108 |     Rf_error("XPath must be a string, received %s", Rf_type2char(TYPEOF(xpath_sxp)));
109 |   }
110 |   const char* xpath = CHAR(STRING_ELT(xpath_sxp, 0));
111 | 
112 |   double num_results = REAL(num_results_sxp)[0];
113 | 
114 |   if (num_results == R_PosInf) {
115 |     num_results = INT_MAX;
116 |   }
117 |   XmlSeeker seeker(doc, node.checked_get());
118 |   seeker.registerNamespace(nsMap_sxp);
119 |   return seeker.search(xpath, num_results);
120 | }
121 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(xml2)
11 | 
12 | is_solaris <- tolower(Sys.info()[["sysname"]]) == "sunos"
13 | 
14 | if (!is_solaris) {
15 |   test_check("xml2")
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_attr.md:
--------------------------------------------------------------------------------
1 | # xml_attrs<- modifies all attributes
2 | 
3 |     Code
4 |       xml_attrs(docs) <- "test"
5 |     Condition
6 |       Error in `xml_attrs<-`:
7 |       ! `test` must be a list of named character vectors.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_children.md:
--------------------------------------------------------------------------------
1 | # xml_child() errors if more than one search is given
2 | 
3 |     Code
4 |       xml_child(x, 1:2)
5 |     Condition
6 |       Error in `xml_child()`:
7 |       ! `1` and `2` must be of length 1.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_document.md:
--------------------------------------------------------------------------------
 1 | # print method is correct
 2 | 
 3 |     Code
 4 |       print(x)
 5 |     Output
 6 |       {html_document}
 7 |       <html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
 8 |       [1] <head>\n<script type="text/javascript">var ue_t0=window.ue_t0||+new Date( ...
 9 |       [2] <body id="styleguide-v2" class="fixed">\n<script>\n    if (typeof uet ==  ...
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_find.md:
--------------------------------------------------------------------------------
 1 | # xml_find_num errors with non numeric results
 2 | 
 3 |     Code
 4 |       xml_find_num(x, "//z")
 5 |     Condition
 6 |       Error in `xml_find_num()`:
 7 |       ! Element at path `//z` must be a number, not a <xml_missing> object.
 8 |     Code
 9 |       xml_find_num(x, "//y")
10 |     Condition
11 |       Error in `xml_find_num()`:
12 |       ! Element at path `//y` must be a number, not a list.
13 |     Code
14 |       xml_find_num(x, "1=1")
15 |     Condition
16 |       Error in `xml_find_num()`:
17 |       ! Element at path `1=1` must be a number, not `TRUE`.
18 |     Code
19 |       xml_find_num(x, "string(5)")
20 |     Condition
21 |       Error in `xml_find_num()`:
22 |       ! Element at path `string(5)` must be a number, not the string "5".
23 | 
24 | # xml_find_int errors with non integer results
25 | 
26 |     Code
27 |       xml_find_int(x, "//z")
28 |     Condition
29 |       Error in `xml_find_int()`:
30 |       ! Element at path `//z` must be a whole number, not a <xml_missing> object.
31 |     Code
32 |       xml_find_int(x, "//y")
33 |     Condition
34 |       Error in `xml_find_int()`:
35 |       ! Element at path `//y` must be a whole number, not a list.
36 |     Code
37 |       xml_find_int(x, "number(1.1)")
38 |     Condition
39 |       Error in `xml_find_int()`:
40 |       ! Element at path `number(1.1)` must be a whole number, not the number 1.1.
41 | 
42 | # xml_find_chr errors with non character results
43 | 
44 |     Code
45 |       xml_find_chr(x, "//z")
46 |     Condition
47 |       Error in `xml_find_chr()`:
48 |       ! Element at path `//z` must be a single string, not a <xml_missing> object.
49 |     Code
50 |       xml_find_chr(x, "//y")
51 |     Condition
52 |       Error in `xml_find_chr()`:
53 |       ! Element at path `//y` must be a single string, not a list.
54 |     Code
55 |       xml_find_chr(x, "1=1")
56 |     Condition
57 |       Error in `xml_find_chr()`:
58 |       ! Element at path `1=1` must be a single string, not `TRUE`.
59 |     Code
60 |       xml_find_chr(x, "1+1")
61 |     Condition
62 |       Error in `xml_find_chr()`:
63 |       ! Element at path `1+1` must be a single string, not the number 2.
64 | 
65 | # xml_find_lgl errors with non logical results
66 | 
67 |     Code
68 |       xml_find_lgl(x, "//z")
69 |     Condition
70 |       Error in `xml_find_lgl()`:
71 |       ! Element at path `//z` must be `TRUE` or `FALSE`, not a <xml_missing> object.
72 |     Code
73 |       xml_find_lgl(x, "//y")
74 |     Condition
75 |       Error in `xml_find_lgl()`:
76 |       ! Element at path `//y` must be `TRUE` or `FALSE`, not a list.
77 |     Code
78 |       xml_find_lgl(x, "string(5)")
79 |     Condition
80 |       Error in `xml_find_lgl()`:
81 |       ! Element at path `string(5)` must be `TRUE` or `FALSE`, not the string "5".
82 |     Code
83 |       xml_find_lgl(x, "1+1")
84 |     Condition
85 |       Error in `xml_find_lgl()`:
86 |       ! Element at path `1+1` must be `TRUE` or `FALSE`, not the number 2.
87 | 
88 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_name.md:
--------------------------------------------------------------------------------
1 | # error if missing ns spec
2 | 
3 |     Code
4 |       xml_name(bars, ns)
5 |     Condition
6 |       Error in `xml_name()`:
7 |       ! Couldn't find prefix for url http://bar.com
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_node.md:
--------------------------------------------------------------------------------
 1 | # print method is correct
 2 | 
 3 |     Code
 4 |       print(body)
 5 |     Output
 6 |       {html_node}
 7 |       <body id="styleguide-v2" class="fixed">
 8 |        [1] <script>\n    if (typeof uet == 'function') {\n      uet("bb");\n    }\n ...
 9 |        [2] <script>\n    if ('csm' in window) {\n      csm.measure('csm_body_delive ...
10 |        [3] <div id="wrapper">\n            <div id="root" class="redesign">\n<scrip ...
11 |        [4] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ...
12 |        [5] <script type="text/imdblogin-js" id="login">\njQuery(document).ready(fun ...
13 |        [6] <script type="text/javascript">\n                jQuery(\n               ...
14 |        [7] <iframe id="sis_pixel_sitewide" width="1" height="1" frameborder="0" mar ...
15 |        [8] <script>\n    setTimeout(function(){\n        try{\n            //sis3.0 ...
16 |        [9] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ...
17 |       [10] <script type="text/javascript">\nif(window.COMSCORE){\nCOMSCORE.beacon({ ...
18 |       [11] <noscript>\n<img src="http://b.scorecardresearch.com/p?c1=2&amp;c2=60349 ...
19 |       [12] <script>\n    doWithAds(function(){\n        (new Image()).src = "http:/ ...
20 |       [13] <script>\n(function(){\n    var readyTimeout = setInterval(function(){\n ...
21 |       [14] <div id="servertime" time="235"></div>
22 |       [15] <script>\n    if (typeof uet == 'function') {\n      uet("be");\n    }\n ...
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_nodeset.md:
--------------------------------------------------------------------------------
 1 | # print method is correct
 2 | 
 3 |     Code
 4 |       print(divs)
 5 |     Output
 6 |       {xml_nodeset (10)}
 7 |        [1] <div id="wrapper">\n            <div id="root" class="redesign">\n<scrip ...
 8 |        [2] <div id="root" class="redesign">\n<script>\n    if (typeof uet == 'funct ...
 9 |        [3] <div id="nb20" class="navbarSprite">\n<div id="supertab">\t\n\t<!-- begi ...
10 |        [4] <div id="supertab">\t\n\t<!-- begin TOP_AD -->\n<div id="top_ad_wrapper" ...
11 |        [5] <div id="top_ad_wrapper" class="dfp_slot">\n<script type="text/javascrip ...
12 |        [6] <div id="top_ad_reflow_helper"></div>
13 |        [7] <div id="navbar" class="navbarSprite">\n<noscript>\n  <link rel="stylesh ...
14 |        [8] <div id="nb_search">\n    <noscript><div id="more_if_no_javascript"><a h ...
15 |        [9] <div id="more_if_no_javascript"><a href="/search/">More</a></div>
16 |       [10] <div class="magnifyingglass navbarSprite"></div>
17 | 
18 | ---
19 | 
20 |     Code
21 |       print(x, width = 13L)
22 |     Output
23 |       {xml_document}
24 |       <doc>
25 |       [1] <a>123 ...
26 |       [2] <b>123 ...
27 |       [3] <c>12\ ...
28 |     Code
29 |       print(x, width = 14L)
30 |     Output
31 |       {xml_document}
32 |       <doc>
33 |       [1] <a>1234 ...
34 |       [2] <b>1234 ...
35 |       [3] <c>12\\ ...
36 | 
37 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_parse.md:
--------------------------------------------------------------------------------
 1 | # read_xml errors with an empty document
 2 | 
 3 |     Code
 4 |       read_xml(character())
 5 |     Condition
 6 |       Error in `read_xml()`:
 7 |       ! `x` must be a single string, not an empty character vector.
 8 | 
 9 | # parse_options errors when given an invalid option
10 | 
11 |     Code
12 |       read_html(test_path("lego.html.bz2"), options = "INVALID")
13 |     Condition
14 |       Error in `read_html()`:
15 |       x `options` "INVALID" is not a valid option.
16 |       i Valid options are one of "RECOVER", "NOENT", "DTDLOAD", "DTDATTR", "DTDVALID", "NOERROR", "NOWARNING", "PEDANTIC", "NOBLANKS", "SAX1", "XINCLUDE", "NONET", "NODICT", "NSCLEAN", "NOCDATA", "NOXINCNODE", "COMPACT", "OLD10", ..., "IGNORE_ENC", or "BIG_LINES".
17 |       i See read_html (`?xml2::read_html()`) for all options.
18 | 
19 | # read_xml and read_html fail with > 1 input
20 | 
21 |     Code
22 |       read_xml(c("foo", "bar"))
23 |     Condition
24 |       Error in `read_xml()`:
25 |       ! `x` must be a single string, not a character vector.
26 |     Code
27 |       read_html(c("foo", "bar"))
28 |     Condition
29 |       Error in `read_xml()`:
30 |       ! `x` must be a single string, not a character vector.
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/xml_write.md:
--------------------------------------------------------------------------------
 1 | # write_xml errors for incorrect directory and with invalid inputs
 2 | 
 3 |     Code
 4 |       write_xml(x, c("test.xml", "foo"))
 5 |     Condition
 6 |       Error in `write_xml()`:
 7 |       ! `file` must be a single string, not a character vector.
 8 | 
 9 | # write_xml works with nodeset input and connections
10 | 
11 |     Code
12 |       write_xml(y[1], c(filename, "foo"))
13 |     Condition
14 |       Error in `write_xml()`:
15 |       ! `file` must be a single string, not a character vector.
16 | 
17 | # write_xml works with node input and files
18 | 
19 |     Code
20 |       write_xml(y, c(filename, "foo"))
21 |     Condition
22 |       Error in `write_xml()`:
23 |       ! `file` must be a single string, not a character vector.
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
1 | maybe_error <- function(code, ...) {
2 |   tryCatch(code, error = function(e) expect_error(stop(e), ...))
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/testthat/lego.html.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-lib/xml2/bf5619bbb6452d1f23cd88a9e0960d77e98a0d7b/tests/testthat/lego.html.bz2


--------------------------------------------------------------------------------
/tests/testthat/ns-multiple-aliases.xml:
--------------------------------------------------------------------------------
1 | <root>
2 |   <doc1 xmlns:b="http://bar.com"><b:bar /></doc1>
3 |   <doc2 xmlns:c="http://bar.com"><c:bar /></doc2>
4 | </root>
5 | 


--------------------------------------------------------------------------------
/tests/testthat/ns-multiple-default.xml:
--------------------------------------------------------------------------------
1 | <root>
2 |   <doc1 xmlns = "http://foo.com"><bar /></doc1>
3 |   <doc2 xmlns = "http://bar.com"><bar /></doc2>
4 | </root>
5 | 


--------------------------------------------------------------------------------
/tests/testthat/ns-multiple-prefix.xml:
--------------------------------------------------------------------------------
1 | <root>
2 |   <doc1 xmlns:b="http://baz.com"><b:bar /></doc1>
3 |   <doc2 xmlns:b="http://bar.com"><b:bar /></doc2>
4 | </root>
5 | 


--------------------------------------------------------------------------------
/tests/testthat/ns-multiple.xml:
--------------------------------------------------------------------------------
1 | <root xmlns:f="http://foo.com" xmlns:g = "http://bar.com">
2 |   <doc1><f:bar f:id="a" /></doc1>
3 |   <doc2><g:bar g:id="b" /></doc2>
4 | </root>
5 | 


--------------------------------------------------------------------------------
/tests/testthat/records.dtd:
--------------------------------------------------------------------------------
1 | <!ENTITY author "foo bar">
2 | <!ENTITY hoqc "&author; Quantitative Consultancy">
3 | <!ELEMENT records (record+)>
4 | <!ELEMENT record (field1)>
5 | <!ELEMENT field1 (#PCDATA)>
6 | 


--------------------------------------------------------------------------------
/tests/testthat/records.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE records SYSTEM "records.dtd">
3 | <records>
4 | <record>
5 | <field1>&hoqc;</field1>
6 | </record>
7 | </records>
8 | 


--------------------------------------------------------------------------------
/tests/testthat/setup.R:
--------------------------------------------------------------------------------
1 | cat("This is libxml2 version", as.character(xml2:::libxml2_version()), "\n")
2 | 


--------------------------------------------------------------------------------
/tests/testthat/test-as_list.R:
--------------------------------------------------------------------------------
 1 | list_xml <- function(x) as_list(read_xml(x))
 2 | 
 3 | test_that("empty elements become empty lists", {
 4 |   expect_equal(list_xml("<x></x>"), list(x = list()))
 5 |   expect_equal(list_xml("<x><y/></x>"), list(x = list(y = list())))
 6 |   expect_equal(list_xml("<x><y><z/></y></x>"), list(x = list(y = list(z = list()))))
 7 | })
 8 | 
 9 | test_that("text nodes become character vectors", {
10 |   expect_equal(list_xml("<x>a</x>"), list(x = list("a")))
11 |   expect_equal(list_xml("<x><y>a</y></x>"), list(x = list(y = list("a"))))
12 | })
13 | 
14 | test_that("cdata nodes become character vectors", {
15 |   expect_equal(list_xml("<x><![CDATA[<y/>]]></x>"), list(x = list("<y/>")))
16 | })
17 | 
18 | test_that("xml attributes become R attibutes", {
19 |   expect_equal(list_xml("<x a='1' b='2'></x>"), list(x = structure(list(), a = "1", b = "2")))
20 | })
21 | 
22 | test_that("xml names are preserved when attributes exist", {
23 |   expect_equal(
24 |     list_xml("<x a='1' b='2'><y>3</y><z>4</z></x>"),
25 |     list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2"))
26 |   )
27 | })
28 | 
29 | test_that("special attributes are escaped", {
30 |   expect_equal(
31 |     list_xml("<x a='1' b='2' names='esc'><y>3</y><z>4</z></x>"),
32 |     list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2", .names = "esc"))
33 |   )
34 | })
35 | 
36 | test_that("attributes in child nodes", {
37 |   expect_equal(
38 |     list_xml("<w aa = '0'><x a='1' b='2' names='esc'><y>3</y><z>4</z></x></w>"),
39 |     list(w = structure(list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2", .names = "esc")), aa = "0"))
40 |   )
41 | })
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-as_xml_document.R:
--------------------------------------------------------------------------------
 1 | roundtrip_xml <- function(x) {
 2 |   xml <- read_xml(x)
 3 |   lst <- as_list(xml)
 4 |   xml2 <- as_xml_document(lst)
 5 |   expect_equal(as.character(xml), as.character(xml2))
 6 | }
 7 | 
 8 | test_that("roundtrips with single children", {
 9 |   roundtrip_xml("<a><b/></a>")
10 | 
11 |   roundtrip_xml("<a><b><c/></b></a>")
12 | 
13 |   roundtrip_xml("<a><b>foo<c/></b></a>")
14 | 
15 |   roundtrip_xml("<a><b>foo<c>bar</c></b></a>")
16 | 
17 |   roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b></a>")
18 | })
19 | 
20 | test_that("roundtrips with multi children", {
21 |   roundtrip_xml("<a><b1/><b2/></a>")
22 | 
23 |   roundtrip_xml("<a><b><c1/><c2/></b></a>")
24 | 
25 |   roundtrip_xml("<a><b1>foo<c/></b1><b2>bar<c/></b2></a>")
26 | 
27 |   roundtrip_xml("<a><b>foo<c>bar</c><c>baz</c></b></a>")
28 | 
29 |   roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b></a>")
30 |   roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b><c zz = '4'>baz</c></a>")
31 | })
32 | 
33 | test_that("rountrips with special attributes", {
34 |   roundtrip_xml("<a names = 'test'><b/></a>")
35 | })
36 | 
37 | test_that("more than one root node is an error", {
38 |   expect_error(as_xml_document(list(a = list(), b = list())), "Root nodes must be of length 1")
39 | })
40 | 
41 | test_that("Can convert nodes with leading and trailing text", {
42 |   roundtrip_xml("<a>foo<b>bar</b>baz</a>")
43 | })
44 | 


--------------------------------------------------------------------------------
/tests/testthat/test-classes.R:
--------------------------------------------------------------------------------
 1 | test_that("CDATA creation works", {
 2 |   x <- xml_new_root("root")
 3 |   xml_add_child(x, xml_cdata("<d/>"))
 4 |   expect_identical(as.character(x), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root><![CDATA[<d/>]]></root>\n")
 5 | })
 6 | 
 7 | test_that("Comment creation works", {
 8 |   x <- xml_new_root("root")
 9 |   xml_add_child(x, xml_comment("Hello!"))
10 |   expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root><!--Hello!--></root>\n", as.character(x, options = ""))
11 | })
12 | 
13 | test_that("xml_dtd works", {
14 |   r <- xml_new_root(xml_dtd(name = "html", external_id = "-//W3C//DTD XHTML 1.0 Transitional//EN", system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
15 |   expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(r))
16 | 
17 |   no_name <- xml_new_root(xml_dtd(external_id = "-//W3C//DTD XHTML 1.0 Transitional//EN", system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
18 |   expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE  PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(no_name))
19 | 
20 |   no_name_external_id <- xml_new_root(xml_dtd(system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
21 |   expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE  SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(no_name_external_id))
22 | 
23 |   no_name_external_id_internal_id <- xml_new_root(xml_dtd())
24 |   expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE >\n", as.character(no_name_external_id_internal_id))
25 | })
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-format.R:
--------------------------------------------------------------------------------
 1 | test_that("format.xml_node prints attributes for root nodes", {
 2 |   x <- read_xml("<parent foo = 'bar' />")
 3 |   expect_equal(format(x), "<parent foo=\"bar\">")
 4 | })
 5 | test_that("format.xml_node prints namespaces for root nodes", {
 6 |   x <- read_xml("<parent/>")
 7 |   expect_equal(format(x), "<parent>")
 8 | 
 9 |   y <- read_xml("<parent xmlns = 'tag:james.f.hester@gmail.com,2016:bar' />")
10 |   expect_equal(format(y), "<parent xmlns=\"tag:james.f.hester@gmail.com,2016:bar\">")
11 | 
12 |   z <- read_xml("<parent xmlns:foo = 'tag:james.f.hester@gmail.com,2016:bar' />")
13 |   expect_equal(format(z), "<parent xmlns:foo=\"tag:james.f.hester@gmail.com,2016:bar\">")
14 | })
15 | 


--------------------------------------------------------------------------------
/tests/testthat/test-null.R:
--------------------------------------------------------------------------------
 1 | data <- read_xml(test_path("ns-multiple.xml"))
 2 | tf <- tempfile()
 3 | on.exit(unlink(tf))
 4 | saveRDS(data, file = tf)
 5 | x <- readRDS(tf)
 6 | 
 7 | test_that("accessors all fail rather than crash with NULL Xptrs", {
 8 |   expect_error(as_list(x), "external pointer is not valid")
 9 | 
10 |   expect_error(html_structure(x), "external pointer is not valid")
11 | 
12 |   expect_error(xml_add_child(x, x), "external pointer is not valid")
13 |   expect_error(xml_add_sibling(x, x), "external pointer is not valid")
14 | 
15 |   expect_error(xml_attr(x, "foo"), "external pointer is not valid")
16 |   expect_error(xml_attr(x, "foo") <- "bar", "external pointer is not valid")
17 | 
18 |   expect_error(xml_attrs(x), "external pointer is not valid")
19 |   expect_error(xml_attrs(x) <- list(), "external pointer is not valid")
20 | 
21 |   expect_error(xml_child(x), "external pointer is not valid")
22 |   expect_error(xml_children(x), "external pointer is not valid")
23 | 
24 |   expect_error(xml_contents(x), "external pointer is not valid")
25 | 
26 |   expect_error(xml_double(x), "external pointer is not valid")
27 | 
28 |   expect_error(xml_find_all(x, ""), "external pointer is not valid")
29 |   expect_error(xml_find_chr(x, ""), "external pointer is not valid")
30 |   expect_error(xml_find_first(x, ""), "external pointer is not valid")
31 |   expect_error(xml_find_lgl(x, ""), "external pointer is not valid")
32 |   expect_error(xml_find_num(x, ""), "external pointer is not valid")
33 | 
34 |   expect_error(xml_has_attr(x, ""), "external pointer is not valid")
35 | 
36 |   expect_error(xml_integer(x), "external pointer is not valid")
37 | 
38 |   expect_error(xml_length(x), "external pointer is not valid")
39 | 
40 |   expect_error(xml_name(x), "external pointer is not valid")
41 |   expect_error(xml_name(x) <- "foo", "external pointer is not valid")
42 | 
43 |   expect_error(xml_ns(x), "external pointer is not valid")
44 |   expect_error(xml_ns_strip(x), "external pointer is not valid")
45 | 
46 |   expect_error(xml_parent(x), "external pointer is not valid")
47 |   expect_error(xml_parents(x), "external pointer is not valid")
48 | 
49 |   expect_error(xml_path(x), "external pointer is not valid")
50 | 
51 |   expect_error(xml_remove(x), "external pointer is not valid")
52 | 
53 |   expect_error(xml_replace(x, x), "external pointer is not valid")
54 | 
55 |   expect_error(xml_set_namespace(x, "foo"), "external pointer is not valid")
56 | 
57 |   expect_error(xml_siblings(x), "external pointer is not valid")
58 | 
59 |   expect_error(xml_structure(x), "external pointer is not valid")
60 | 
61 |   expect_error(xml_text(x), "external pointer is not valid")
62 |   expect_error(xml_text(x) <- "test", "external pointer is not valid")
63 | 
64 |   expect_error(xml_type(x), "external pointer is not valid")
65 | 
66 |   expect_error(xml_url(x), "external pointer is not valid")
67 | })
68 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_children.R:
--------------------------------------------------------------------------------
 1 | x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>")
 2 | 
 3 | test_that("xml_child() returns the proper child", {
 4 |   expect_identical(xml_child(x), xml_children(x)[[1L]])
 5 | 
 6 |   expect_identical(xml_child(x, 2), xml_children(x)[[2L]])
 7 | })
 8 | 
 9 | test_that("xml_child() returns child by name", {
10 |   expect_identical(xml_child(x, "baz"), xml_find_first(x, "./baz"))
11 | })
12 | 
13 | test_that("xml_child() errors if more than one search is given", {
14 |   expect_snapshot(error = TRUE, xml_child(x, 1:2))
15 | })
16 | 
17 | test_that("xml_child() errors if search is not numeric or character", {
18 |   expect_error(xml_child(x, TRUE), "`search` must be `numeric` or `character`")
19 |   expect_error(xml_child(x, as.factor("test")), "`search` must be `numeric` or `character`")
20 |   expect_error(xml_child(x, raw(1)), "`search` must be `numeric` or `character`")
21 |   expect_error(xml_child(x, list(1)), "`search` must be `numeric` or `character`")
22 | })
23 | 
24 | test_that("xml_length", {
25 |   expect_equal(xml_length(x), 2)
26 |   all <- xml_find_all(x, "//*")
27 |   expect_equal(xml_length(all), c(2, 1, 0, 0))
28 | })
29 | 
30 | test_that("xml_parent", {
31 |   expect_identical(unclass(xml_parent(xml_child(x))), unclass(x))
32 | })
33 | 
34 | test_that("xml_parents", {
35 |   expect_equal(
36 |     xml_name(xml_parents(xml_find_first(x, "//boo"))),
37 |     c("bar", "foo")
38 |   )
39 | })
40 | 
41 | test_that("xml_root", {
42 |   doc <- xml_new_document()
43 | 
44 |   expect_s3_class(xml_root(doc), "xml_missing")
45 | 
46 |   a <- xml_add_child(doc, "a")
47 |   b <- xml_add_child(doc, "b")
48 | 
49 |   expect_equal(xml_name(xml_root(b)), "a")
50 |   expect_equal(xml_name(xml_root(doc)), "a")
51 | })
52 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_document.R:
--------------------------------------------------------------------------------
1 | test_that("print method is correct", {
2 |   x <- read_html(test_path("lego.html.bz2"))
3 | 
4 |   expect_snapshot(print(x))
5 | })
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_missing.R:
--------------------------------------------------------------------------------
 1 | x <- read_xml("<body>
 2 |   <p>Some <b>text</b>.</p>
 3 |   <p>Some <b>other</b>.</p>
 4 |   <p>No bold text</p>
 5 |   </body>")
 6 | para <- xml_find_all(x, ".//p")
 7 | b <- xml_find_first(para, ".//b")
 8 | mss <- b[[3]]
 9 | 
10 | test_that("xml_find returns nodes of class 'xml_missing' for missing nodes", {
11 |   expect_length(b, 3L)
12 |   expect_equal(lengths(b), c(2L, 2L, 0L))
13 |   expect_s3_class(mss, "xml_missing")
14 | })
15 | 
16 | test_that("xml_missing methods return properly for all S3 methods", {
17 |   expect_equal(as.character(mss), NA_character_)
18 |   expect_equal(as_list(mss), list())
19 |   expect_equal(nodeset_apply(mss), xml_nodeset())
20 |   expect_output(print(mss), "\\{xml_missing\\}\n<NA>")
21 |   expect_equal(tree_structure(mss), NA_character_)
22 |   expect_error(write_xml(mss), "Missing data cannot be written")
23 |   expect_error(write_html(mss), "Missing data cannot be written")
24 |   expect_equal(xml_attr(mss, "dummy_attr"), NA_character_)
25 |   expect_equal(xml_attrs(mss), NA_character_)
26 |   expect_equal(xml_find_all(mss), xml_nodeset())
27 |   expect_equal(xml_find_chr(mss), character())
28 |   expect_equal(xml_find_lgl(mss), logical())
29 |   expect_equal(xml_find_num(mss), numeric())
30 |   expect_equal(xml_find_first(mss), xml_missing())
31 |   expect_equal(xml_length(mss), 0L)
32 |   expect_equal(xml_name(mss), NA_character_)
33 |   expect_equal(xml_parent(mss), xml_missing())
34 |   expect_equal(xml_path(mss), NA_character_)
35 |   expect_equal(xml_text(mss), NA_character_)
36 |   expect_equal(xml_url(mss), NA_character_)
37 | })
38 | 
39 | test_that("is.na() should return TRUE for xml_missing", {
40 |   expect_true(is.na(xml_missing()))
41 | })
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_name.R:
--------------------------------------------------------------------------------
 1 | test_that("xml_name() returns the name", {
 2 |   x <- read_xml("<body>
 3 |     <p>Some <b>text</b>.</p>
 4 |     <p>Some <i>other</i>.</p>
 5 |     <p>No bold text</p>
 6 |     </body>")
 7 | 
 8 |   children <- xml_children(x)
 9 |   x <- xml_find_first(children, ".//b|.//i")
10 | 
11 |   expect_equal(xml_name(x[[1]]), "b")
12 |   expect_equal(xml_name(x[[2]]), "i")
13 |   expect_equal(xml_name(x[[3]]), NA_character_)
14 | 
15 |   expect_equal(xml_name(x), c("b", "i", NA_character_))
16 | })
17 | 
18 | test_that("qualified names returned when ns given", {
19 |   x <- read_xml(test_path("ns-multiple-default.xml"))
20 |   ns <- xml_ns(x)
21 | 
22 |   bars <- xml_children(xml_children(x))
23 |   expect_equal(xml_name(bars), c("bar", "bar"))
24 |   expect_equal(xml_name(bars, ns), c("d1:bar", "d2:bar"))
25 | })
26 | 
27 | test_that("error if missing ns spec", {
28 |   x <- read_xml(test_path("ns-multiple-default.xml"))
29 |   ns <- xml_ns(x)[1]
30 | 
31 |   bars <- xml_children(xml_children(x))
32 |   expect_snapshot(error = TRUE, xml_name(bars, ns))
33 | })
34 | 
35 | test_that("xml_name<- modifies the name", {
36 |   x <- read_xml(test_path("ns-multiple-default.xml"))
37 |   ns <- xml_ns(x)
38 | 
39 |   bars <- xml_children(xml_children(x))
40 |   bar <- bars[[1]]
41 | 
42 |   xml_name(bar) <- "foo"
43 |   expect_equal(xml_name(bar), "foo")
44 |   expect_equal(xml_name(bar, ns), "d1:foo")
45 | 
46 |   # ns is ignored
47 |   xml_name(bar, ns) <- "bar"
48 |   expect_equal(xml_name(bar), "bar")
49 |   expect_equal(xml_name(bar, ns), "d1:bar")
50 | 
51 |   xml_name(bars) <- "foo"
52 |   expect_equal(xml_name(bars), c("foo", "foo"))
53 | 
54 |   old_mss <- mss <- xml_missing()
55 |   xml_name(mss) <- "foo"
56 |   expect_identical(old_mss, mss)
57 | })
58 | 
59 | test_that("xml_set_name modifies the name", {
60 |   x <- read_xml(test_path("ns-multiple-default.xml"))
61 |   ns <- xml_ns(x)
62 | 
63 |   bars <- xml_children(xml_children(x))
64 |   bar <- bars[[1]]
65 | 
66 |   xml_set_name(bar, "foo")
67 |   expect_equal(xml_name(bar), "foo")
68 |   expect_equal(xml_name(bar, ns), "d1:foo")
69 | 
70 |   # ns is ignored
71 |   xml_set_name(bar, "bar", ns)
72 |   expect_equal(xml_name(bar), "bar")
73 |   expect_equal(xml_name(bar, ns), "d1:bar")
74 | 
75 |   xml_set_name(bars, "foo")
76 |   expect_equal(xml_name(bars), c("foo", "foo"))
77 | 
78 |   old_mss <- mss <- xml_missing()
79 |   xml_set_name(mss, "foo")
80 |   expect_identical(old_mss, mss)
81 | })
82 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_namespaces.R:
--------------------------------------------------------------------------------
 1 | # XML parsing tests ------------------------------------------------------------
 2 | 
 3 | test_that("multiple default namespaces given unique names", {
 4 |   ns <- unclass(xml_ns(read_xml(test_path("ns-multiple-default.xml"))))
 5 |   expect_equal(ns, c(d1 = "http://foo.com", d2 = "http://bar.com"))
 6 | })
 7 | 
 8 | test_that("repeated prefixes given unique names", {
 9 |   ns <- unclass(xml_ns(read_xml(test_path("ns-multiple-prefix.xml"))))
10 |   expect_equal(ns, c(b = "http://baz.com", b1 = "http://bar.com"))
11 | })
12 | 
13 | test_that("aliased prefixes retained", {
14 |   ns <- unclass(xml_ns(read_xml(test_path("ns-multiple-aliases.xml"))))
15 |   expect_equal(ns, c(b = "http://bar.com", c = "http://bar.com"))
16 | })
17 | 
18 | 
19 | # Low-level character vector tests ---------------------------------------------
20 | 
21 | test_that("unique prefix-url combo unchanged", {
22 |   x <- c(blah = "http://blah.com", rah = "http://rah.com")
23 |   expect_equal(.Call(unique_ns, x), x)
24 | })
25 | 
26 | test_that("all prefixs kept", {
27 |   x <- c(blah = "http://blah.com", rah = "http://blah.com")
28 |   expect_named(.Call(unique_ns, x), c("blah", "rah"))
29 | })
30 | 
31 | test_that("multiple default namespaces can be stripped", {
32 |   x <- read_xml(test_path("ns-multiple-default.xml"))
33 |   ns <- unclass(xml_ns(x))
34 |   expect_equal(ns, c(d1 = "http://foo.com", d2 = "http://bar.com"))
35 |   expect_length(xml_find_all(x, "//bar"), 0)
36 | 
37 |   xml_ns_strip(x)
38 |   ns <- unclass(xml_ns(x))
39 | 
40 |   expect_equal(unname(ns), character())
41 |   expect_length(xml_find_all(x, "//bar"), 2)
42 | })
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_node.R:
--------------------------------------------------------------------------------
1 | test_that("print method is correct", {
2 |   x <- read_html(test_path("lego.html.bz2"))
3 |   body <- xml_find_first(x, "//body")
4 |   expect_snapshot(print(body))
5 | })
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_nodeset.R:
--------------------------------------------------------------------------------
 1 | test_that("methods work on empty nodesets", {
 2 |   x <- read_xml("<a><b/></a>")
 3 |   empty <- xml_find_all(x, "//c")
 4 | 
 5 |   expect_error(empty[[1]], "subscript out of bounds")
 6 |   expect_identical(empty[1], empty)
 7 |   test <- empty
 8 | 
 9 |   xml_attr(test, "test") <- 1
10 |   expect_identical(test, empty)
11 | 
12 |   xml_attrs(test) <- c("test" = 1)
13 |   expect_identical(test, empty)
14 | 
15 |   xml_name(test) <- "test"
16 |   expect_identical(test, empty)
17 | 
18 |   xml_text(test) <- "test"
19 |   expect_identical(test, empty)
20 | 
21 |   expect_identical(as.character(empty), character(0))
22 |   expect_identical(as_list(empty), list())
23 |   expect_identical(nodeset_apply(empty, identical), empty)
24 |   expect_output(print(empty), "\\{xml_nodeset \\(0\\)\\}")
25 |   expect_silent(tree_structure(empty))
26 | 
27 |   xml_add_child(test, "test")
28 |   expect_identical(test, empty)
29 | 
30 |   xml_add_sibling(test, "test")
31 |   expect_identical(test, empty)
32 | 
33 |   expect_identical(xml_attr(empty, "test"), character())
34 |   expect_identical(xml_attrs(empty), list())
35 |   expect_identical(xml_double(empty), numeric())
36 |   expect_identical(xml_find_all(empty), empty)
37 |   expect_identical(xml_find_chr(empty), character())
38 |   expect_identical(xml_find_first(empty), empty)
39 |   expect_identical(xml_find_lgl(empty), logical())
40 |   expect_identical(xml_find_num(empty), numeric())
41 |   expect_identical(xml_integer(empty), integer())
42 |   expect_identical(xml_length(empty), 0L)
43 |   expect_identical(xml_name(empty), character())
44 |   expect_identical(xml_ns(empty), character())
45 |   expect_identical(xml_parent(empty), empty)
46 |   expect_identical(xml_path(empty), character())
47 | 
48 |   xml_remove(test)
49 |   expect_identical(test, empty)
50 | 
51 |   xml_replace(test)
52 |   expect_identical(test, empty)
53 | 
54 |   xml_set_attr(test, "test", 1)
55 |   expect_identical(test, empty)
56 | 
57 |   xml_set_attrs(test, c("test" = 1))
58 |   expect_identical(test, empty)
59 | 
60 |   xml_set_name(test, "test")
61 |   expect_identical(test, empty)
62 | 
63 |   xml_set_text(test, "test")
64 |   expect_identical(test, empty)
65 | 
66 |   expect_identical(xml_siblings(empty), empty)
67 |   expect_silent(xml_structure(empty))
68 | 
69 |   expect_identical(xml_text(empty), character())
70 |   expect_identical(xml_url(empty), character())
71 | })
72 | 
73 | test_that("print method is correct", {
74 |   skip_if(getOption("width") < 20L, "Screen too narrow")
75 | 
76 |   x <- read_html(test_path("lego.html.bz2"))
77 |   body <- xml_find_first(x, "//body")
78 |   divs <- xml_find_all(body, ".//div")[1:10]
79 |   expect_snapshot(print(divs))
80 | 
81 |   # double-substring() logic
82 |   s <- c(
83 |     "123456789\\", # always too wide, '\' never encoded
84 |     "12345",       # always fits
85 |     "12\\45"       # doesn't fit when '\' is encoded
86 |   )
87 |   # embed as text on nodes <a>,<b>,<c>
88 |   s <- sprintf("<%1$s>%2$s</%1$s>", letters[1:3], s)
89 |   x <- read_xml(sprintf("<doc>%s</doc>", paste(s, collapse="")))
90 |   expect_snapshot({
91 |     print(x, width = 13L)
92 |     print(x, width = 14L)
93 |   })
94 | })
95 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_parse.R:
--------------------------------------------------------------------------------
  1 | test_that("download_xml fails if curl is not installed", {
  2 |   skip("how to test error with `check_installed()`?")
  3 |   mockery::stub(download_xml, "requireNamespace", function(...) FALSE)
  4 | 
  5 |   expect_error(
  6 |     download_xml("http://httpbin.org/xml"),
  7 |     "`curl` must be installed to use `download_xml\\(\\)`"
  8 |   )
  9 | })
 10 | 
 11 | test_that("read_xml errors with an empty document", {
 12 |   expect_snapshot(error = TRUE, {
 13 |     read_xml(character())
 14 |   })
 15 | 
 16 |   tf <- tempfile()
 17 |   file.create(tf)
 18 |   on.exit(unlink(tf))
 19 | 
 20 |   expect_error(read_xml(tf), "Document is empty")
 21 | })
 22 | 
 23 | test_that("read_html correctly parses malformed document", {
 24 |   lego <- read_html(test_path("lego.html.bz2"))
 25 |   expect_length(xml_find_all(lego, ".//p"), 39)
 26 | })
 27 | 
 28 | test_that("parse_options errors when given an invalid option", {
 29 |   expect_error(
 30 |     parse_options("INVALID", xml_parse_options()),
 31 |     '`options` "INVALID" is not a valid option'
 32 |   )
 33 | 
 34 |   expect_snapshot(error = TRUE,
 35 |     read_html(test_path("lego.html.bz2"), options = "INVALID")
 36 |   )
 37 | 
 38 |   # Empty inputs returned as 0
 39 |   expect_identical(0L, parse_options("", xml_parse_options()))
 40 |   expect_identical(0L, parse_options(NULL, xml_parse_options()))
 41 | 
 42 |   # Numerics returned as integers
 43 |   expect_identical(12L, parse_options(12L, xml_parse_options()))
 44 |   expect_identical(12L, parse_options(12, xml_parse_options()))
 45 | 
 46 |   # Multiple inputs summed
 47 |   expect_identical(3L, parse_options(c("RECOVER", "NOENT"), xml_parse_options()))
 48 | })
 49 | 
 50 | test_that("read_html properly passes parser arguments", {
 51 |   skip_if_not(libxml2_version() >= "2.9.2")
 52 | 
 53 |   blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR"))
 54 |   expect_equal(
 55 |     as_list(blanks)$html$body$catalog$cd[[1]],
 56 |     "\r\n    "
 57 |   )
 58 | 
 59 |   no_blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR", "NOBLANKS"))
 60 | 
 61 |   expect_equal(
 62 |     as_list(no_blanks)$html$body$catalog$cd[[1]],
 63 |     list("Empire Burlesque")
 64 |   )
 65 | })
 66 | 
 67 | test_that("read_xml works with httr response objects", {
 68 |   skip("httpbin is unreliable")
 69 |   x <- read_xml(httr::GET("http://httpbin.org/xml"))
 70 | 
 71 |   expect_s3_class(x, "xml_document")
 72 | 
 73 |   expect_length(xml_find_all(x, "//slide"), 2)
 74 | })
 75 | 
 76 | test_that("read_xml and read_html fail for bad status codes", {
 77 |   skip("httpbin is unreliable")
 78 | 
 79 |   expect_error(
 80 |     read_xml(httr::GET("http://httpbin.org/status/404")),
 81 |     class = "http_404"
 82 |   )
 83 | 
 84 |   expect_error(
 85 |     read_html(httr::GET("http://httpbin.org/status/404")),
 86 |     class = "http_404"
 87 |   )
 88 | })
 89 | 
 90 | test_that("read_xml works with raw inputs", {
 91 |   x <- read_xml("<foo/>")
 92 |   expect_equal(xml_url(x), NA_character_)
 93 | })
 94 | 
 95 | test_that("read_html works with non-ASCII encodings", {
 96 |   tmp <- tempfile()
 97 |   on.exit(unlink(tmp))
 98 | 
 99 |   writeLines("<html><body>\U2019</body></html>", tmp, useBytes = TRUE)
100 |   res <- read_html(tmp, encoding = "UTF-8")
101 | 
102 |   expect_equal(
103 |     as.character(res, options = ""),
104 |     "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><body>\U2019</body></html>\n"
105 |   )
106 | })
107 | 
108 | test_that("read_xml and read_html fail with > 1 input", {
109 |   expect_snapshot(error = TRUE, {
110 |     read_xml(c("foo", "bar"))
111 |     read_html(c("foo", "bar"))
112 |   })
113 | })
114 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_schema.R:
--------------------------------------------------------------------------------
 1 | test_that("xml schema validates", {
 2 |   doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2"))
 3 |   schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
 4 |   expect_true(xml_validate(doc, schema))
 5 | })
 6 | 
 7 | test_that("xml schema errors", {
 8 |   str <- readLines(system.file("extdata/order-doc.xml", package = "xml2"))
 9 |   str <- sub("<quantity>1", "<quantity>", str)
10 |   str <- sub("95819", "ABC95819", str)
11 |   str <- sub('partNum="926-AA"', "", str)
12 |   doc <- read_xml(paste(str, collapse = "\n"))
13 |   schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
14 |   out <- xml_validate(doc, schema)
15 |   expect_false(out)
16 |   errors <- attr(out, "errors")
17 |   expect_type(errors, "character")
18 |   expect_length(errors, 4)
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_serialize.R:
--------------------------------------------------------------------------------
 1 | x <- read_xml("<a>
 2 |   <b><c>123</c></b>
 3 |   <b><c>456</c></b>
 4 |   </a>")
 5 | 
 6 | test_that("xml_serialize and xml_unserialize work with xml_document input", {
 7 |   out <- xml_unserialize(xml_serialize(x, NULL))
 8 |   expect_identical(as.character(x), as.character(out))
 9 | 
10 |   f <- tempfile()
11 |   on.exit(unlink(f))
12 | 
13 |   xml_serialize(x, f)
14 |   expect_identical(as.character(xml_unserialize(f)), as.character(x))
15 | })
16 | 
17 | test_that("xml_serialize and xml_unserialize work with xml_node input", {
18 |   b <- xml_find_first(x, "//b")
19 |   out <- xml_unserialize(xml_serialize(b, NULL))
20 |   expect_identical(as.character(b), as.character(out))
21 | 
22 |   f <- tempfile()
23 |   on.exit(unlink(f))
24 | 
25 |   xml_serialize(b, f)
26 |   expect_identical(as.character(xml_unserialize(f)), as.character(b))
27 | })
28 | 
29 | test_that("xml_serialize and xml_unserialize work with xml_nodeset input", {
30 |   b <- xml_find_all(x, "//b")
31 |   out <- xml_unserialize(xml_serialize(b, NULL))
32 |   expect_identical(as.character(b), as.character(out))
33 | 
34 |   f <- tempfile()
35 |   on.exit(unlink(f))
36 | 
37 |   xml_serialize(b, f)
38 |   expect_identical(as.character(xml_unserialize(f)), as.character(b))
39 | })
40 | 
41 | test_that("xml_serialize and xml_unserialize work with HTML-based xml_document input", {
42 |   file <- system.file("extdata", "r-project.html", package = "xml2")
43 |   x <- read_html(file)
44 | 
45 |   out <- xml_unserialize(xml_serialize(x, NULL))
46 |   expect_identical(as.character(x), as.character(out))
47 | 
48 |   f <- tempfile()
49 |   on.exit(unlink(f))
50 | 
51 |   xml_serialize(x, f)
52 |   expect_identical(as.character(xml_unserialize(f)), as.character(x))
53 | })
54 | 
55 | test_that("xml_unserialize throws an error if given a invalid object", {
56 |   expect_error(xml_unserialize(serialize(1, NULL)), "Not a serialized xml2 object")
57 | })
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_structure.R:
--------------------------------------------------------------------------------
 1 | test_that("xml_structure", {
 2 |   expect_output(
 3 |     xml_structure(read_xml("<a><b><c/><c/></b><d/></a>")),
 4 |     "<a>
 5 |   <b>
 6 |     <c>
 7 |     <c>
 8 |   <d>"
 9 |   )
10 | 
11 |   expect_output(
12 |     xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"), indent = 0L),
13 |     "<a>
14 | <b>
15 | <c>
16 | <c>
17 | <d>"
18 |   )
19 | })
20 | 
21 | test_that("xml_structure can write to a file (#244)", {
22 |   tmp <- tempfile()
23 |   xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"), file = tmp)
24 |   expect_equal(readLines(tmp), c("<a>", "  <b>", "    <c>", "    <c>", "  <d>"))
25 | 
26 |   # repeated calls erase existing content
27 |   xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"), file = tmp)
28 |   expect_equal(readLines(tmp), c("<a>", "  <b>", "    <c>", "    <c>", "  <d>"))
29 | })
30 | 
31 | test_that("xml_structure is correct", {
32 |   x <- read_html(test_path("lego.html.bz2"))
33 | 
34 |   quicklinks <- xml_find_first(x, "//div[contains(@div, 'quicklinks')]")
35 |   expect_snapshot(html_structure(quicklinks))
36 | })
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_text.R:
--------------------------------------------------------------------------------
 1 | test_that("xml_text returns only text without markup", {
 2 |   x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
 3 | 
 4 |   expect_identical(xml_text(x), "This is some text. This is bold!")
 5 | 
 6 |   expect_identical(xml_text(xml_children(x)), "bold!")
 7 | })
 8 | 
 9 | test_that("xml_text works properly with xml_nodeset objects", {
10 |   x <- read_xml("<body>
11 |     <p>Some <b>text</b>.</p>
12 |     <p>Some <i>other</i>.</p>
13 |     <p>No bold text</p>
14 |     </body>")
15 | 
16 |   children <- xml_children(x)
17 |   x <- xml_find_first(children, ".//b|.//i")
18 | 
19 |   expect_identical(
20 |     xml_text(x),
21 |     c("text", "other", NA)
22 |   )
23 | })
24 | 
25 | test_that("xml_text<- and xml_set_text work properly with xml_nodeset objects", {
26 |   x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
27 | 
28 |   expect_identical(xml_text(x), "This is some text. This is some nested text.")
29 | 
30 |   xml_text(x) <- "test"
31 |   expect_identical(xml_text(x), "testThis is some nested text.")
32 |   xml_set_text(x, "test2")
33 |   expect_identical(xml_text(x), "test2This is some nested text.")
34 | })
35 | 
36 | test_that("xml_text trims whitespace if requested, including non-breaking spaces", {
37 |   x <- read_html("<p>   Some text &euro;  &nbsp;</p>")
38 |   expect_identical(
39 |     xml_text(x),
40 |     "   Some text \u20ac  \u00a0"
41 |   )
42 | 
43 |   expect_identical(
44 |     xml_text(x, trim = TRUE),
45 |     "Some text \u20ac"
46 |   )
47 | 
48 |   x2 <- read_html("<body><p>   Some text &euro;  &nbsp;</p><p>  and more &euro; text   &nbsp;</body>")
49 |   expect_identical(
50 |     xml_text(xml_find_all(x2, ".//p"), trim = TRUE),
51 |     c("Some text \u20ac", "and more \u20ac text")
52 |   )
53 | })
54 | 
55 | test_that("xml_integer() returns an integer vector", {
56 |   x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>")
57 | 
58 |   expect_identical(
59 |     xml_integer(xml_find_all(x, "//@x")),
60 |     c(1L, 2L)
61 |   )
62 | })
63 | 
64 | 
65 | test_that("xml_double() returns a numeric vector", {
66 |   x <- read_xml("<earth><point latitude = '42.3466456' longitude = '-71.0390351' /><point latitude = '-36.8523378' longitude = '174.7691073' /></earth>")
67 | 
68 |   expect_identical(xml_double(xml_find_all(x, "//@latitude")), c(42.3466456, -36.8523378))
69 | })
70 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_type.R:
--------------------------------------------------------------------------------
 1 | test_that("xml_type() works", {
 2 |   x <- read_xml("<body>
 3 |     <p>Some <b>text</b>.</p>
 4 |     <p>Some <i>other</i>.</p>
 5 |     <p>No bold text</p>
 6 |     </body>")
 7 | 
 8 |   children <- xml_children(x)
 9 |   x <- xml_find_first(children, ".//b|.//i")
10 | 
11 |   expect_equal(xml_type(x[[1]]), "element")
12 |   expect_equal(xml_type(x[[3]]), NA_character_)
13 | 
14 |   expect_equal(xml_type(x), c("element", "element", NA))
15 | 
16 |   empty <- xml_children(x)
17 |   expect_identical(xml_type(empty), character())
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_url.R:
--------------------------------------------------------------------------------
  1 | test_that("url_absolute", {
  2 |   expect_equal(
  3 |     url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d"),
  4 |     c("http://hadley.nz/a/b/c/", "http://hadley.nz/a/b/", "http://hadley.nz/", "http://hadley.nz/x")
  5 |   )
  6 | 
  7 |   expect_error(
  8 |     url_absolute(c(".", "..", "/", "/x"), c("http://hadley.nz/a/b/c/d", "http://foo.bar")),
  9 |     "Base URL must be length 1"
 10 |   )
 11 | })
 12 | 
 13 | test_that("url_relative", {
 14 |   # The behavior of libxml2 with relative paths is fragile so we skip this test
 15 |   skip("libxml2-dependent")
 16 | 
 17 |   expect_equal(
 18 |     url_relative("http://hadley.nz/a/c", "http://hadley.nz"),
 19 |     "/a/c"
 20 |   )
 21 | 
 22 |   expect_equal(
 23 |     url_relative("http://hadley.nz/a/c", "http://hadley.nz/"),
 24 |     "a/c"
 25 |   )
 26 | 
 27 |   expect_equal(
 28 |     url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b"),
 29 |     "c"
 30 |   )
 31 | 
 32 |   expect_equal(
 33 |     url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/"),
 34 |     "../c"
 35 |   )
 36 | 
 37 |   expect_error(
 38 |     url_relative("http://hadley.nz/a/c", c("http://hadley.nz/a/b/c/d", "http://foo.bar")),
 39 |     "Base URL must be length 1"
 40 |   )
 41 | })
 42 | 
 43 | test_that("url_parse", {
 44 |   expect_equal(
 45 |     url_parse("http://had.co.nz/"),
 46 |     data.frame(
 47 |       scheme = "http", server = "had.co.nz", port = NA_integer_,
 48 |       user = "", path = "/", query = "", fragment = "", stringsAsFactors = FALSE
 49 |     )
 50 |   )
 51 | 
 52 |   expect_equal(
 53 |     url_parse("http://had.co.nz:1234/"),
 54 |     data.frame(
 55 |       scheme = "http", server = "had.co.nz", port = 1234L,
 56 |       user = "", path = "/", query = "", fragment = "", stringsAsFactors = FALSE
 57 |     )
 58 |   )
 59 | 
 60 |   expect_equal(
 61 |     url_parse("http://had.co.nz:1234/?a=1&b=2"),
 62 |     data.frame(
 63 |       scheme = "http", server = "had.co.nz", port = 1234L,
 64 |       user = "", path = "/", query = "a=1&b=2", fragment = "", stringsAsFactors = FALSE
 65 |     )
 66 |   )
 67 | 
 68 |   expect_equal(
 69 |     url_parse("http://had.co.nz:1234/?a=1&b=2#def"),
 70 |     data.frame(
 71 |       scheme = "http", server = "had.co.nz", port = 1234L,
 72 |       user = "", path = "/", query = "a=1&b=2", fragment = "def", stringsAsFactors = FALSE
 73 |     )
 74 |   )
 75 | })
 76 | 
 77 | test_that("url_escape", {
 78 |   expect_error(
 79 |     url_escape("a b c", reserved = c("a", "b")),
 80 |     "`reserved` must be character vector of length 1"
 81 |   )
 82 | 
 83 |   expect_equal(
 84 |     url_escape("a b c"),
 85 |     "a%20b%20c"
 86 |   )
 87 | 
 88 |   expect_equal(
 89 |     url_escape("a b c", " "),
 90 |     "a b c"
 91 |   )
 92 | 
 93 |   expect_equal(
 94 |     url_unescape("a%20b%2fc"),
 95 |     "a b/c"
 96 |   )
 97 | 
 98 |   expect_equal(
 99 |     url_unescape("%C2%B5"),
100 |     "\u00B5"
101 |   )
102 | })
103 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xml_write.R:
--------------------------------------------------------------------------------
  1 | test_that("write_xml errors for incorrect directory and with invalid inputs", {
  2 |   x <- read_xml("<x/>")
  3 |   filename <- "does_not_exist/test.xml"
  4 |   expect_error(write_xml(x, filename), "'does_not_exist' does not exist in current working directory")
  5 | 
  6 |   expect_snapshot(error = TRUE, {
  7 |     write_xml(x, c("test.xml", "foo"))
  8 |   })
  9 | })
 10 | 
 11 | test_that("write_xml works with relative file paths", {
 12 |   x <- read_xml("<x/>")
 13 | 
 14 |   filename <- "../test.xml"
 15 |   on.exit(unlink(filename))
 16 |   write_xml(x, filename, options = "no_declaration")
 17 |   expect_identical(readChar(filename, 1000L), "<x/>\n")
 18 | })
 19 | 
 20 | test_that("write_xml works with no options", {
 21 |   x <- read_xml("<x/>")
 22 | 
 23 |   filename <- "../test.xml"
 24 |   on.exit(unlink(filename))
 25 |   write_xml(x, filename, options = NULL)
 26 |   expect_identical(readChar(filename, 1000L), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<x/>\n")
 27 | })
 28 | 
 29 | test_that("write_xml works with an explicit connections", {
 30 |   x <- read_xml("<x/>")
 31 | 
 32 |   filename <- "../test.xml"
 33 |   file <- file(filename, "wb")
 34 |   on.exit(unlink(filename))
 35 |   write_xml(x, file, options = "no_declaration")
 36 |   close(file)
 37 |   expect_identical(readChar(filename, 1000L), "<x/>\n")
 38 | })
 39 | 
 40 | test_that("write_xml works with an implicit connections", {
 41 |   x <- read_xml("<x/>")
 42 | 
 43 |   filename <- "../test.xml.gz"
 44 |   write_xml(x, filename, options = "no_declaration")
 45 |   file <- gzfile(filename, "rb")
 46 |   on.exit({
 47 |     unlink(filename)
 48 |     close(file)
 49 |   })
 50 |   expect_identical(readChar(file, 1000L), "<x/>\n")
 51 | })
 52 | 
 53 | test_that("write_xml works with nodeset input and files", {
 54 |   x <- read_xml("<x><y/><y><z/></y></x>")
 55 |   y <- xml_find_all(x, "//y")
 56 | 
 57 |   filename <- "../test.xml"
 58 |   on.exit(unlink(filename))
 59 |   expect_error(
 60 |     write_xml(y, filename, options = "no_declaration"),
 61 |     "Can only save length 1 node sets"
 62 |   )
 63 | 
 64 |   write_xml(y[1], filename, options = "no_declaration")
 65 |   expect_identical(readChar(filename, 1000L), "<y/>")
 66 | })
 67 | 
 68 | test_that("write_xml works with nodeset input and connections", {
 69 |   x <- read_xml("<x><y/><y/></x>")
 70 |   y <- xml_find_all(x, "//y")
 71 | 
 72 |   filename <- "../test.xml.gz"
 73 |   expect_error(
 74 |     write_xml(y, filename, options = "no_declaration"),
 75 |     "Can only save length 1 node sets"
 76 |   )
 77 | 
 78 |   expect_snapshot(error = TRUE, {
 79 |     write_xml(y[1], c(filename, "foo"))
 80 |   })
 81 | 
 82 |   write_xml(y[1], filename, options = "no_declaration")
 83 |   file <- gzfile(filename, "rb")
 84 |   on.exit({
 85 |     unlink(filename)
 86 |     close(file)
 87 |   })
 88 |   expect_identical(readChar(file, 1000L), "<y/>")
 89 | })
 90 | 
 91 | test_that("write_xml works with node input and files", {
 92 |   x <- read_xml("<x><y/><y/></x>")
 93 |   y <- xml_find_first(x, "//y")
 94 | 
 95 |   filename <- "../test.xml"
 96 |   expect_snapshot(error = TRUE, write_xml(y, c(filename, "foo")))
 97 | 
 98 |   write_xml(y, filename, options = "no_declaration")
 99 |   on.exit(unlink(filename))
100 |   expect_identical(readChar(filename, 1000L), "<y/>")
101 | })
102 | 
103 | test_that("write_xml works with node input and connections", {
104 |   x <- read_xml("<x><y/><y/></x>")
105 |   y <- xml_find_first(x, "//y")
106 | 
107 |   filename <- "../test.xml.gz"
108 |   write_xml(y, filename, options = "no_declaration")
109 |   file <- gzfile(filename, "rb")
110 |   on.exit({
111 |     unlink(filename)
112 |     close(file)
113 |   })
114 |   expect_identical(readChar(file, 1000L), "<y/>")
115 | })
116 | 
117 | test_that("write_html work with html input", {
118 |   x <- read_html("<html><title>Foo</title></html>")
119 | 
120 |   filename <- "../test.html.gz"
121 |   write_html(x, filename)
122 |   file <- gzfile(filename, "rb")
123 |   on.exit({
124 |     unlink(filename)
125 |     close(file)
126 |   })
127 |   expect_identical(
128 |     readChar(file, 1000L),
129 |     "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n<title>Foo</title>\n</head></html>\n"
130 |   )
131 | })
132 | 
133 | test_that("write_xml returns invisibly", {
134 |   x <- read_xml("<x>foo</x>")
135 |   tf <- tempfile()
136 |   on.exit(unlink(tf))
137 | 
138 |   res <- withVisible(write_xml(x, tf))
139 | 
140 |   expect_null(res$value)
141 |   expect_false(res$visible)
142 | })
143 | 


--------------------------------------------------------------------------------
/tests/testthat/xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='us-ascii'?>
 2 | 
 3 | <!--  A SAMPLE set of slides  -->
 4 | 
 5 | <slideshow 
 6 |     title="Sample Slide Show"
 7 |     date="Date of publication"
 8 |     author="Yours Truly"
 9 |     >
10 | 
11 |     <!-- TITLE SLIDE -->
12 |     <slide type="all">
13 |       <title>Wake up to WonderWidgets!</title>
14 |     </slide>
15 | 
16 |     <!-- OVERVIEW -->
17 |     <slide type="all">
18 |         <title>Overview</title>
19 |         <item>Why <em>WonderWidgets</em> are great</item>
20 |         <item/>
21 |         <item>Who <em>buys</em> WonderWidgets</item>
22 |     </slide>
23 | 
24 | </slideshow>


--------------------------------------------------------------------------------
/tools/winlibs.R:
--------------------------------------------------------------------------------
 1 | if(!file.exists("../windows/libxml2/include/libxml2/libxml")){
 2 |   unlink("../windows", recursive = TRUE)
 3 |   url <- if(grepl("aarch", R.version$platform)){
 4 |     "https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-clang-aarch64.tar.xz"
 5 |   } else if(grepl("clang", Sys.getenv('R_COMPILED_BY'))){
 6 |     "https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-clang-x86_64.tar.xz"
 7 |   } else if(getRversion() >= "4.2") {
 8 |     "https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-ucrt-x86_64.tar.xz"
 9 |   } else {
10 |     "https://github.com/rwinlib/libxml2/archive/v2.10.3.tar.gz"
11 |   }
12 |   download.file(url, basename(url), quiet = TRUE)
13 |   dir.create("../windows", showWarnings = FALSE)
14 |   untar(basename(url), exdir = "../windows", tar = 'internal')
15 |   unlink(basename(url))
16 |   setwd("../windows")
17 |   file.rename(list.files(), 'libxml2')
18 | }
19 | 


--------------------------------------------------------------------------------
/xml2.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: 1ff14708-efe3-4c8d-a8e6-1d8c6504bbb8
 3 | 
 4 | RestoreWorkspace: Default
 5 | SaveWorkspace: Default
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | AutoAppendNewline: Yes
17 | StripTrailingWhitespace: Yes
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------