├── .appveyor.yml ├── .codecov.yml ├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build.jam ├── doc ├── char_delimiters_separator.htm ├── char_separator.htm ├── escaped_list_separator.htm ├── index.html ├── introduc.htm ├── offset_separator.htm ├── token_iterator.htm ├── tokenizer.htm └── tokenizerfunction.htm ├── example ├── Jamfile.v2 ├── char_sep_example_1.cpp ├── char_sep_example_2.cpp └── char_sep_example_3.cpp ├── include └── boost │ ├── token_functions.hpp │ ├── token_iterator.hpp │ └── tokenizer.hpp ├── index.html ├── meta └── libraries.json └── test ├── Jamfile.v2 ├── cmake_test ├── CMakeLists.txt └── main.cpp ├── examples.cpp ├── simple_example_1.cpp ├── simple_example_2.cpp ├── simple_example_3.cpp ├── simple_example_4.cpp └── simple_example_5.cpp /.appveyor.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2016, 2017 Peter Dimov 2 | # Copyright 2017 - 2019 James E. King III 3 | # Copyright 2019 - 2021 Alexander Grund 4 | # Distributed under the Boost Software License, Version 1.0. 5 | # (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) 6 | 7 | # 8 | # Generic Appveyor build script for boostorg repositories 9 | # See: https://github.com/boostorg/boost-ci/ 10 | # 11 | # Instructions for customizing this script for your library: 12 | # 13 | # 1. Customize the compilers and language levels you want. 14 | # 2. If you have more than include/, src/, test/, example/, examples/, 15 | # benchmark/ or tools/ directories, set the environment variable DEPINST. 16 | # For example if your build uses code in "bench/" and "fog/" directories: 17 | # - DEPINST: --include bench --include fog 18 | # 3. Enable pull request builds in your boostorg/ account. 19 | # 20 | # That's it - the script will do everything else for you. 21 | # 22 | 23 | version: 1.0.{build}-{branch} 24 | 25 | shallow_clone: true 26 | 27 | branches: 28 | only: 29 | - master 30 | - develop 31 | - /bugfix\/.*/ 32 | - /feature\/.*/ 33 | - /fix\/.*/ 34 | - /pr\/.*/ 35 | 36 | skip_commits: 37 | files: 38 | - LICENSE 39 | - meta/* 40 | - README.md 41 | 42 | matrix: 43 | fast_finish: false 44 | # Adding MAYFAIL to any matrix job allows it to fail but the build stays green: 45 | allow_failures: 46 | - MAYFAIL: true 47 | 48 | environment: 49 | global: 50 | B2_CI_VERSION: 1 51 | GIT_FETCH_JOBS: 4 52 | # see: http://www.boost.org/build/doc/html/bbv2/overview/invocation.html#bbv2.overview.invocation.properties 53 | # to use the default for a given environment, comment it out; recommend you build debug and release however: 54 | # on Windows it is important to exercise all the possibilities, especially shared vs static, however most 55 | # libraries that care about this exercise it in their Jamfiles... 56 | B2_ADDRESS_MODEL: 32,64 57 | B2_LINK: shared,static 58 | # B2_THREADING: threading=multi,single 59 | B2_VARIANT: release 60 | 61 | matrix: 62 | - FLAVOR: Visual Studio 2017 C++2a Strict 63 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 64 | B2_CXXFLAGS: -permissive- 65 | B2_CXXSTD: 2a 66 | B2_TOOLSET: msvc-14.1 67 | 68 | - FLAVOR: Visual Studio 2017 C++14/17 69 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 70 | B2_CXXSTD: 14,17 71 | B2_TOOLSET: msvc-14.1 72 | 73 | - FLAVOR: cygwin (32-bit) 74 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 75 | ADDPATH: C:\cygwin\bin; 76 | B2_ADDRESS_MODEL: 32 77 | B2_CXXSTD: 03,11,14,1z 78 | B2_TOOLSET: gcc 79 | 80 | - FLAVOR: cygwin (64-bit) 81 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 82 | ADDPATH: C:\cygwin64\bin; 83 | B2_ADDRESS_MODEL: 64 84 | B2_CXXSTD: 03,11,14,1z 85 | B2_TOOLSET: gcc 86 | 87 | install: 88 | - git clone --depth 1 https://github.com/boostorg/boost-ci.git C:\boost-ci-cloned 89 | # Copy ci folder if not testing Boost.CI 90 | - if NOT "%APPVEYOR_PROJECT_NAME%" == "boost-ci" xcopy /s /e /q /i /y C:\boost-ci-cloned\ci .\ci 91 | - rmdir /s /q C:\boost-ci-cloned 92 | - ci\appveyor\install.bat 93 | 94 | build: off 95 | 96 | test_script: ci\build.bat 97 | 98 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 - 2021 Alexander Grund 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) 4 | # 5 | # Sample codecov configuration file. Edit as required 6 | 7 | codecov: 8 | max_report_age: off 9 | require_ci_to_pass: yes 10 | notify: 11 | # Increase this if you have multiple coverage collection jobs 12 | after_n_builds: 2 13 | wait_for_ci: yes 14 | 15 | parsers: 16 | gcov: 17 | branch_detection: 18 | conditional: yes 19 | loop: yes 20 | method: no 21 | macro: no 22 | 23 | # Change how pull request comments look 24 | comment: 25 | layout: "reach,diff,flags,files,footer" 26 | 27 | # Ignore specific files or folders. Glob patterns are supported. 28 | # See https://docs.codecov.com/docs/ignoring-paths 29 | ignore: 30 | - libs/tokenizer/test/ 31 | - test/ 32 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto !eol svneol=native#text/plain 2 | *.gitattributes text svneol=native#text/plain 3 | 4 | # Scriptish formats 5 | *.bat text svneol=native#text/plain 6 | *.bsh text svneol=native#text/x-beanshell 7 | *.cgi text svneol=native#text/plain 8 | *.cmd text svneol=native#text/plain 9 | *.js text svneol=native#text/javascript 10 | *.php text svneol=native#text/x-php 11 | *.pl text svneol=native#text/x-perl 12 | *.pm text svneol=native#text/x-perl 13 | *.py text svneol=native#text/x-python 14 | *.sh eol=lf svneol=LF#text/x-sh 15 | configure eol=lf svneol=LF#text/x-sh 16 | 17 | # Image formats 18 | *.bmp binary svneol=unset#image/bmp 19 | *.gif binary svneol=unset#image/gif 20 | *.ico binary svneol=unset#image/ico 21 | *.jpeg binary svneol=unset#image/jpeg 22 | *.jpg binary svneol=unset#image/jpeg 23 | *.png binary svneol=unset#image/png 24 | *.tif binary svneol=unset#image/tiff 25 | *.tiff binary svneol=unset#image/tiff 26 | *.svg text svneol=native#image/svg%2Bxml 27 | 28 | # Data formats 29 | *.pdf binary svneol=unset#application/pdf 30 | *.avi binary svneol=unset#video/avi 31 | *.doc binary svneol=unset#application/msword 32 | *.dsp text svneol=crlf#text/plain 33 | *.dsw text svneol=crlf#text/plain 34 | *.eps binary svneol=unset#application/postscript 35 | *.gz binary svneol=unset#application/gzip 36 | *.mov binary svneol=unset#video/quicktime 37 | *.mp3 binary svneol=unset#audio/mpeg 38 | *.ppt binary svneol=unset#application/vnd.ms-powerpoint 39 | *.ps binary svneol=unset#application/postscript 40 | *.psd binary svneol=unset#application/photoshop 41 | *.rdf binary svneol=unset#text/rdf 42 | *.rss text svneol=unset#text/xml 43 | *.rtf binary svneol=unset#text/rtf 44 | *.sln text svneol=native#text/plain 45 | *.swf binary svneol=unset#application/x-shockwave-flash 46 | *.tgz binary svneol=unset#application/gzip 47 | *.vcproj text svneol=native#text/xml 48 | *.vcxproj text svneol=native#text/xml 49 | *.vsprops text svneol=native#text/xml 50 | *.wav binary svneol=unset#audio/wav 51 | *.xls binary svneol=unset#application/vnd.ms-excel 52 | *.zip binary svneol=unset#application/zip 53 | 54 | # Text formats 55 | .htaccess text svneol=native#text/plain 56 | *.bbk text svneol=native#text/xml 57 | *.cmake text svneol=native#text/plain 58 | *.css text svneol=native#text/css 59 | *.dtd text svneol=native#text/xml 60 | *.htm text svneol=native#text/html 61 | *.html text svneol=native#text/html 62 | *.ini text svneol=native#text/plain 63 | *.log text svneol=native#text/plain 64 | *.mak text svneol=native#text/plain 65 | *.qbk text svneol=native#text/plain 66 | *.rst text svneol=native#text/plain 67 | *.sql text svneol=native#text/x-sql 68 | *.txt text svneol=native#text/plain 69 | *.xhtml text svneol=native#text/xhtml%2Bxml 70 | *.xml text svneol=native#text/xml 71 | *.xsd text svneol=native#text/xml 72 | *.xsl text svneol=native#text/xml 73 | *.xslt text svneol=native#text/xml 74 | *.xul text svneol=native#text/xul 75 | *.yml text svneol=native#text/plain 76 | boost-no-inspect text svneol=native#text/plain 77 | CHANGES text svneol=native#text/plain 78 | COPYING text svneol=native#text/plain 79 | INSTALL text svneol=native#text/plain 80 | Jamfile text svneol=native#text/plain 81 | Jamroot text svneol=native#text/plain 82 | Jamfile.v2 text svneol=native#text/plain 83 | Jamrules text svneol=native#text/plain 84 | Makefile* text svneol=native#text/plain 85 | README text svneol=native#text/plain 86 | TODO text svneol=native#text/plain 87 | 88 | # Code formats 89 | *.c text svneol=native#text/plain 90 | *.cpp text svneol=native#text/plain 91 | *.h text svneol=native#text/plain 92 | *.hpp text svneol=native#text/plain 93 | *.ipp text svneol=native#text/plain 94 | *.tpp text svneol=native#text/plain 95 | *.jam text svneol=native#text/plain 96 | *.java text svneol=native#text/plain 97 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2020-2021 Peter Dimov 2 | # Copyright 2021 Andrey Semashev 3 | # Copyright 2021 Alexander Grund 4 | # Copyright 2022-2025 James E. King III 5 | # 6 | # Distributed under the Boost Software License, Version 1.0. 7 | # (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) 8 | --- 9 | name: CI 10 | 11 | on: 12 | pull_request: 13 | push: 14 | branches: 15 | - master 16 | - develop 17 | - bugfix/** 18 | - feature/** 19 | - fix/** 20 | - pr/** 21 | paths-ignore: 22 | - LICENSE 23 | - meta/** 24 | - README.md 25 | 26 | concurrency: 27 | group: ${{format('{0}:{1}', github.repository, github.ref)}} 28 | cancel-in-progress: true 29 | 30 | env: 31 | GIT_FETCH_JOBS: 8 32 | NET_RETRY_COUNT: 5 33 | B2_CI_VERSION: 1 34 | B2_VARIANT: debug,release 35 | B2_LINK: shared,static 36 | LCOV_BRANCH_COVERAGE: 1 37 | 38 | jobs: 39 | posix: 40 | defaults: 41 | run: 42 | shell: bash 43 | 44 | strategy: 45 | fail-fast: false 46 | matrix: 47 | include: 48 | # Linux, gcc 49 | - { compiler: gcc-4.8, cxxstd: '03,11', os: ubuntu-latest, container: 'ubuntu:16.04' } 50 | - { compiler: gcc-4.9, cxxstd: '03,11', os: ubuntu-latest, container: 'ubuntu:16.04' } 51 | - { compiler: gcc-5, cxxstd: '03,11,14,1z', os: ubuntu-latest, container: 'ubuntu:18.04' } 52 | - { compiler: gcc-6, cxxstd: '03,11,14,17', os: ubuntu-latest, container: 'ubuntu:18.04' } 53 | - { compiler: gcc-7, cxxstd: '03,11,14,17', os: ubuntu-20.04 } 54 | - { compiler: gcc-8, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 } 55 | - { compiler: gcc-9, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 } 56 | - { compiler: gcc-10, cxxstd: '03,11,14,17,20', os: ubuntu-22.04 } 57 | - { compiler: gcc-11, cxxstd: '03,11,14,17,20', os: ubuntu-22.04 } 58 | - { compiler: gcc-12, cxxstd: '03,11,14,17,20', os: ubuntu-22.04 } 59 | - { compiler: gcc-13, cxxstd: '11,14,17,20,2b', os: ubuntu-24.04 } 60 | - { compiler: gcc-14, cxxstd: '11,14,17,20,2b', os: ubuntu-24.04 } 61 | - { name: GCC w/ sanitizers, sanitize: yes, 62 | compiler: gcc-13, cxxstd: '03,11,14,17,20', os: ubuntu-24.04 } 63 | - { name: Collect coverage, coverage: yes, 64 | compiler: gcc-13, cxxstd: '03,2b', os: ubuntu-24.04, install: 'g++-13-multilib', address-model: '32,64' } 65 | 66 | # Linux, clang 67 | - { compiler: clang-3.5, cxxstd: '03,11', os: ubuntu-latest, container: 'ubuntu:16.04' } 68 | - { compiler: clang-3.6, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:16.04' } 69 | - { compiler: clang-3.7, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:16.04' } 70 | - { compiler: clang-3.8, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:16.04' } 71 | - { compiler: clang-3.9, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:18.04' } 72 | - { compiler: clang-4.0, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:18.04' } 73 | - { compiler: clang-5.0, cxxstd: '03,11,14,1z', os: ubuntu-latest, container: 'ubuntu:18.04' } 74 | - { compiler: clang-6.0, cxxstd: '03,11,14,17', os: ubuntu-20.04 } 75 | - { compiler: clang-7, cxxstd: '03,11,14,17', os: ubuntu-20.04 } 76 | # Note: clang-8 does not fully support C++20, so it is not compatible with some libstdc++ versions in this mode 77 | - { compiler: clang-8, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 , install: 'clang-8 g++-7', gcc_toolchain: 7 } 78 | - { compiler: clang-9, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 } 79 | - { compiler: clang-10, cxxstd: '03,11,14,17,20', os: ubuntu-20.04 } 80 | - { compiler: clang-11, cxxstd: '03,11,14,17,20', os: ubuntu-20.04 } 81 | - { compiler: clang-12, cxxstd: '03,11,14,17,20', os: ubuntu-20.04 } 82 | # Clang isn't compatible with libstdc++-13, so use the slightly older one 83 | - { compiler: clang-13, cxxstd: '03,11,14,17,20', os: ubuntu-22.04, install: 'clang-13 g++-12', gcc_toolchain: 12 } 84 | - { compiler: clang-14, cxxstd: '03,11,14,17,20', os: ubuntu-22.04, install: 'clang-14 g++-12', gcc_toolchain: 12 } 85 | - { compiler: clang-15, cxxstd: '03,11,14,17,20', os: ubuntu-22.04, install: 'clang-15 g++-12', gcc_toolchain: 12 } 86 | - { compiler: clang-16, cxxstd: '11,14,17,20,2b', os: ubuntu-24.04 } 87 | # https://github.com/llvm/llvm-project/issues/59827: disabled 2b/23 for clang-17 with libstdc++13 in 24.04 88 | - { compiler: clang-17, cxxstd: '11,14,17,20', os: ubuntu-24.04 } 89 | - { compiler: clang-18, cxxstd: '11,14,17,20,23,2c', os: ubuntu-24.04 } 90 | 91 | # libc++ 92 | - { compiler: clang-6.0, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:18.04', stdlib: libc++, install: 'clang-6.0 libc++-dev libc++abi-dev' } 93 | - { compiler: clang-7, cxxstd: '03,11,14,17', os: ubuntu-20.04, stdlib: libc++, install: 'clang-7 libc++-7-dev libc++abi-7-dev' } 94 | - { name: Clang w/ sanitizers, sanitize: yes, 95 | compiler: clang-12, cxxstd: '03,11,14,17,20', os: ubuntu-20.04, stdlib: libc++, install: 'clang-12 libc++-12-dev libc++abi-12-dev' } 96 | 97 | - { name: MacOS w/ clang and sanitizers, 98 | compiler: clang, cxxstd: '03,11,14,17,20,2b', os: macos-13, sanitize: yes } 99 | - { compiler: clang, cxxstd: '03,11,14,17,20,2b', os: macos-14 } 100 | - { compiler: clang, cxxstd: '03,11,14,17,20,2b', os: macos-15 } 101 | 102 | # Coverity Scan 103 | # requires two github secrets in repo to activate; see ci/github/coverity.sh 104 | # does not run on pull requests, only on pushes into develop and master 105 | - { name: Coverity, coverity: yes, 106 | compiler: clang-12, cxxstd: '03,20', os: ubuntu-20.04, ccache: no } 107 | 108 | # multiarch (bigendian testing) - does not support coverage yet 109 | - { name: Big-endian, multiarch: yes, 110 | compiler: clang, cxxstd: '17', os: ubuntu-22.04, ccache: no, distro: fedora, edition: 34, arch: s390x } 111 | 112 | 113 | timeout-minutes: 120 114 | runs-on: ${{matrix.os}} 115 | container: 116 | image: ${{matrix.container}} 117 | volumes: 118 | - /node20217:/node20217:rw,rshared 119 | - ${{ startsWith(matrix.container, 'ubuntu:1') && '/node20217:/__e/node20:ro,rshared' || ' ' }} 120 | env: {B2_USE_CCACHE: 1} 121 | 122 | steps: 123 | - name: Setup environment 124 | run: | 125 | if [ -f "/etc/debian_version" ]; then 126 | echo "DEBIAN_FRONTEND=noninteractive" >> $GITHUB_ENV 127 | export DEBIAN_FRONTEND=noninteractive 128 | fi 129 | if [ -n "${{matrix.container}}" ] && [ -f "/etc/debian_version" ]; then 130 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 131 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y sudo software-properties-common curl 132 | # Need (newer) git, and the older Ubuntu container may require requesting the key manually using port 80 133 | curl -sSL --retry ${NET_RETRY_COUNT:-5} 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE1DD270288B4E6030699E45FA1715D88E1DF1F24' | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/git-core_ubuntu_ppa.gpg 134 | for i in {1..${NET_RETRY_COUNT:-3}}; do sudo -E add-apt-repository -y ppa:git-core/ppa && break || sleep 10; done 135 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 136 | osver=$(lsb_release -sr | cut -f1 -d.) 137 | pkgs="g++ git xz-utils" 138 | # Ubuntu 22+ has only Python 3 in the repos 139 | if [ -n "$osver" ] && [ "$osver" -ge "22" ]; then 140 | pkgs+=" python-is-python3 libpython3-dev" 141 | else 142 | pkgs+=" python libpython-dev" 143 | fi 144 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y $pkgs 145 | fi 146 | # For jobs not compatible with ccache, use "ccache: no" in the matrix 147 | if [[ "${{ matrix.ccache }}" == "no" ]]; then 148 | echo "B2_USE_CCACHE=0" >> $GITHUB_ENV 149 | fi 150 | git config --global pack.threads 0 151 | if [[ "${{matrix.container}}" == "ubuntu:1"* ]]; then 152 | # Node 20 doesn't work with Ubuntu 16/18 glibc: https://github.com/actions/checkout/issues/1590 153 | curl -sL https://archives.boost.io/misc/node/node-v20.9.0-linux-x64-glibc-217.tar.xz | tar -xJ --strip-components 1 -C /node20217 154 | fi 155 | 156 | - uses: actions/checkout@v4 157 | with: 158 | # For coverage builds fetch the whole history, else only 1 commit using a 'fake ternary' 159 | fetch-depth: ${{ matrix.coverage && '0' || '1' }} 160 | 161 | - name: Cache ccache 162 | uses: actions/cache@v4 163 | if: env.B2_USE_CCACHE 164 | with: 165 | path: ~/.ccache 166 | key: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}-${{github.sha}} 167 | restore-keys: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}- 168 | 169 | - name: Fetch Boost.CI 170 | uses: actions/checkout@v4 171 | with: 172 | repository: boostorg/boost-ci 173 | ref: master 174 | path: boost-ci-cloned 175 | 176 | - name: Get CI scripts folder 177 | run: | 178 | # Copy ci folder if not testing Boost.CI 179 | [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . 180 | rm -rf boost-ci-cloned 181 | 182 | - name: Install packages 183 | if: startsWith(matrix.os, 'ubuntu') 184 | run: | 185 | SOURCE_KEYS=("${{join(matrix.source_keys, '" "')}}") 186 | SOURCES=("${{join(matrix.sources, '" "')}}") 187 | # Add this by default 188 | SOURCE_KEYS+=('http://keyserver.ubuntu.com/pks/lookup?op=get&search=0x1E9377A2BA9EF27F') 189 | SOURCES+=(ppa:ubuntu-toolchain-r/test) 190 | 191 | ci/add-apt-keys.sh "${SOURCE_KEYS[@]}" 192 | # Initial update before adding sources required to get e.g. keys 193 | sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 194 | ci/add-apt-repositories.sh "${SOURCES[@]}" 195 | 196 | sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 197 | if [[ -z "${{matrix.install}}" ]]; then 198 | pkgs="${{matrix.compiler}}" 199 | pkgs="${pkgs/gcc-/g++-}" 200 | else 201 | pkgs="${{matrix.install}}" 202 | fi 203 | sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y $pkgs 204 | 205 | - name: Setup GCC Toolchain 206 | if: matrix.gcc_toolchain 207 | run: | 208 | GCC_TOOLCHAIN_ROOT="$HOME/gcc-toolchain" 209 | echo "GCC_TOOLCHAIN_ROOT=$GCC_TOOLCHAIN_ROOT" >> $GITHUB_ENV 210 | if ! command -v dpkg-architecture; then 211 | apt-get install -y dpkg-dev 212 | fi 213 | MULTIARCH_TRIPLET="$(dpkg-architecture -qDEB_HOST_MULTIARCH)" 214 | mkdir -p "$GCC_TOOLCHAIN_ROOT" 215 | ln -s /usr/include "$GCC_TOOLCHAIN_ROOT/include" 216 | ln -s /usr/bin "$GCC_TOOLCHAIN_ROOT/bin" 217 | mkdir -p "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET" 218 | ln -s "/usr/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" 219 | 220 | - name: Setup multiarch 221 | if: matrix.multiarch 222 | env: 223 | BDDE_DISTRO: ${{matrix.distro}} 224 | BDDE_EDITION: ${{matrix.edition}} 225 | BDDE_ARCH: ${{matrix.arch}} 226 | run: ci/github/setup_bdde.sh 227 | 228 | - name: Setup Boost 229 | env: 230 | B2_ADDRESS_MODEL: ${{matrix.address-model}} 231 | B2_COMPILER: ${{matrix.compiler}} 232 | B2_CXXSTD: ${{matrix.cxxstd}} 233 | B2_SANITIZE: ${{matrix.sanitize}} 234 | B2_STDLIB: ${{matrix.stdlib}} 235 | # More entries can be added in the same way, see the B2_ARGS assignment in ci/enforce.sh for the possible keys. 236 | # B2_DEFINES: ${{matrix.defines}} 237 | # Variables set here (to non-empty) will override the top-level environment variables, e.g. 238 | # B2_VARIANT: ${{matrix.variant}} 239 | # Set the (B2) target(s) to build, defaults to the test folder of the current library 240 | # Can alternatively be done like this in the build step or in the build command of the build step, e.g. `run: B2_TARGETS=libs/$SELF/doc ci/build.sh` 241 | # B2_TARGETS: libs/foo/test//bar 242 | run: source ci/github/install.sh 243 | 244 | - name: Setup coverage collection 245 | if: matrix.coverage 246 | run: ci/github/codecov.sh "setup" 247 | 248 | - name: Run tests 249 | if: '!matrix.coverity' 250 | run: ci/build.sh 251 | 252 | - name: Upload coverage 253 | if: matrix.coverage 254 | run: ci/codecov.sh "upload" 255 | env: 256 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 257 | 258 | - name: Run coverity 259 | if: matrix.coverity && github.event_name == 'push' && (github.ref_name == 'develop' || github.ref_name == 'master') 260 | run: ci/github/coverity.sh 261 | env: 262 | COVERITY_SCAN_NOTIFICATION_EMAIL: ${{ secrets.COVERITY_SCAN_NOTIFICATION_EMAIL }} 263 | COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} 264 | 265 | windows: 266 | defaults: 267 | run: 268 | shell: cmd 269 | strategy: 270 | fail-fast: false 271 | matrix: 272 | include: 273 | - { toolset: msvc-14.0, cxxstd: '14,latest', addrmd: '32,64', os: windows-2019 } 274 | - { toolset: msvc-14.2, cxxstd: '14,17,20', addrmd: '32,64', os: windows-2019 } 275 | - { toolset: msvc-14.3, cxxstd: '14,17,20,latest',addrmd: '32,64', os: windows-2022 } 276 | - { name: Collect coverage, coverage: yes, 277 | toolset: msvc-14.3, cxxstd: 'latest', addrmd: '64', os: windows-2022 } 278 | - { toolset: clang-win, cxxstd: '14,17,latest', addrmd: '32,64', os: windows-2022 } 279 | - { toolset: gcc, cxxstd: '03,11,14,17,2a', addrmd: '64', os: windows-2019 } 280 | 281 | runs-on: ${{matrix.os}} 282 | 283 | steps: 284 | - uses: actions/checkout@v4 285 | 286 | - name: Fetch Boost.CI 287 | uses: actions/checkout@v4 288 | with: 289 | repository: boostorg/boost-ci 290 | ref: master 291 | path: boost-ci-cloned 292 | - name: Get CI scripts folder 293 | run: | 294 | REM Copy ci folder if not testing Boost.CI 295 | if "%GITHUB_REPOSITORY%" == "%GITHUB_REPOSITORY:boost-ci=%" xcopy /s /e /q /i /y boost-ci-cloned\ci .\ci 296 | rmdir /s /q boost-ci-cloned 297 | 298 | - name: Setup Boost 299 | run: ci\github\install.bat 300 | 301 | - name: Run tests 302 | if: '!matrix.coverage' 303 | run: ci\build.bat 304 | env: 305 | B2_TOOLSET: ${{matrix.toolset}} 306 | B2_CXXSTD: ${{matrix.cxxstd}} 307 | B2_ADDRESS_MODEL: ${{matrix.addrmd}} 308 | 309 | - name: Collect coverage 310 | shell: powershell 311 | if: matrix.coverage 312 | run: ci\opencppcoverage.ps1 313 | env: 314 | B2_TOOLSET: ${{matrix.toolset}} 315 | B2_CXXSTD: ${{matrix.cxxstd}} 316 | B2_ADDRESS_MODEL: ${{matrix.addrmd}} 317 | 318 | - name: Upload coverage 319 | if: matrix.coverage 320 | uses: codecov/codecov-action@v5 321 | with: 322 | disable_search: true 323 | fail_ci_if_error: true 324 | files: __out/cobertura.xml 325 | name: github-actions 326 | token: ${{secrets.CODECOV_TOKEN}} 327 | verbose: true 328 | 329 | MSYS2: 330 | defaults: 331 | run: 332 | shell: msys2 {0} 333 | strategy: 334 | fail-fast: false 335 | matrix: 336 | include: 337 | - { sys: MINGW32, compiler: gcc, cxxstd: '03,11,17,20' } 338 | - { sys: MINGW64, compiler: gcc, cxxstd: '03,11,17,20' } 339 | 340 | runs-on: windows-latest 341 | 342 | steps: 343 | - uses: actions/checkout@v4 344 | 345 | - name: Setup MSYS2 environment 346 | uses: msys2/setup-msys2@v2 347 | with: 348 | msystem: ${{matrix.sys}} 349 | update: true 350 | install: git python 351 | pacboy: gcc:p cmake:p ninja:p 352 | 353 | - name: Fetch Boost.CI 354 | uses: actions/checkout@v4 355 | with: 356 | repository: boostorg/boost-ci 357 | ref: master 358 | path: boost-ci-cloned 359 | - name: Get CI scripts folder 360 | run: | 361 | # Copy ci folder if not testing Boost.CI 362 | [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . 363 | rm -rf boost-ci-cloned 364 | 365 | - name: Setup Boost 366 | env: 367 | B2_COMPILER: ${{matrix.compiler}} 368 | B2_CXXSTD: ${{matrix.cxxstd}} 369 | B2_SANITIZE: ${{matrix.sanitize}} 370 | B2_STDLIB: ${{matrix.stdlib}} 371 | run: ci/github/install.sh 372 | 373 | - name: Run tests 374 | run: ci/build.sh 375 | 376 | # Run also the CMake tests to avoid having to setup another matrix for CMake on MSYS 377 | - name: Run CMake tests 378 | run: | 379 | cd "$BOOST_ROOT" 380 | mkdir __build_cmake_test__ && cd __build_cmake_test__ 381 | cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBOOST_INCLUDE_LIBRARIES=$SELF -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DBoost_VERBOSE=ON .. 382 | cmake --build . --target tests --config Debug -j$B2_JOBS 383 | ctest --output-on-failure --build-config Debug 384 | 385 | CMake: 386 | defaults: 387 | run: 388 | shell: bash 389 | 390 | strategy: 391 | fail-fast: false 392 | matrix: 393 | include: 394 | - { os: ubuntu-20.04, build_shared: ON, build_type: Debug, generator: 'Unix Makefiles' } 395 | - { os: ubuntu-20.04, build_shared: OFF, build_type: Debug, generator: 'Unix Makefiles' } 396 | - { os: windows-2019, build_shared: ON, build_type: Debug, generator: 'Visual Studio 16 2019' } 397 | - { os: windows-2019, build_shared: OFF, build_type: Debug, generator: 'Visual Studio 16 2019' } 398 | 399 | timeout-minutes: 120 400 | runs-on: ${{matrix.os}} 401 | 402 | steps: 403 | - uses: actions/checkout@v4 404 | - name: Fetch Boost.CI 405 | uses: actions/checkout@v4 406 | with: 407 | repository: boostorg/boost-ci 408 | ref: master 409 | path: boost-ci-cloned 410 | - name: Get CI scripts folder 411 | run: | 412 | # Copy ci folder if not testing Boost.CI 413 | [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . 414 | rm -rf boost-ci-cloned 415 | - name: Setup Boost 416 | env: {B2_DONT_BOOTSTRAP: 1} 417 | run: source ci/github/install.sh 418 | 419 | - name: Run CMake tests 420 | run: | 421 | cd "$BOOST_ROOT" 422 | mkdir __build_cmake_test__ && cd __build_cmake_test__ 423 | cmake -G "${{matrix.generator}}" -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBOOST_INCLUDE_LIBRARIES=$SELF -DBUILD_SHARED_LIBS=${{matrix.build_shared}} -DBUILD_TESTING=ON -DBoost_VERBOSE=ON .. 424 | cmake --build . --target tests --config ${{matrix.build_type}} -j$B2_JOBS 425 | ctest --output-on-failure --build-config ${{matrix.build_type}} 426 | 427 | - name: Run CMake subdir tests 428 | run: | 429 | cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_test" # New unified folder 430 | [ -d "$cmake_test_folder" ] || cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_subdir_test" 431 | cd "$cmake_test_folder" 432 | mkdir __build_cmake_subdir_test__ && cd __build_cmake_subdir_test__ 433 | cmake -G "${{matrix.generator}}" -DBOOST_CI_INSTALL_TEST=OFF -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBUILD_SHARED_LIBS=${{matrix.build_shared}} .. 434 | cmake --build . --config ${{matrix.build_type}} -j$B2_JOBS 435 | ctest --output-on-failure --build-config ${{matrix.build_type}} 436 | 437 | - name: Install Library 438 | run: | 439 | cd "$BOOST_ROOT" 440 | mkdir __build_cmake_install_test__ && cd __build_cmake_install_test__ 441 | cmake -G "${{matrix.generator}}" -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBOOST_INCLUDE_LIBRARIES=$SELF -DBUILD_SHARED_LIBS=${{matrix.build_shared}} -DCMAKE_INSTALL_PREFIX=~/.local -DBoost_VERBOSE=ON -DBoost_DEBUG=ON .. 442 | cmake --build . --target install --config ${{matrix.build_type}} -j$B2_JOBS 443 | - name: Run CMake install tests 444 | run: | 445 | cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_test" # New unified folder 446 | [ -d "$cmake_test_folder" ] || cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_install_test" 447 | cd "$cmake_test_folder" 448 | mkdir __build_cmake_install_test__ && cd __build_cmake_install_test__ 449 | cmake -G "${{matrix.generator}}" -DBOOST_CI_INSTALL_TEST=ON -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBUILD_SHARED_LIBS=${{matrix.build_shared}} -DCMAKE_PREFIX_PATH=~/.local .. 450 | cmake --build . --config ${{matrix.build_type}} -j$B2_JOBS 451 | ctest --output-on-failure --build-config ${{matrix.build_type}} 452 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Generated by `boostdep --cmake tokenizer` 2 | # Copyright 2020 Peter Dimov 3 | # Distributed under the Boost Software License, Version 1.0. 4 | # https://www.boost.org/LICENSE_1_0.txt 5 | 6 | cmake_minimum_required(VERSION 3.5...3.16) 7 | 8 | project(boost_tokenizer VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX) 9 | 10 | add_library(boost_tokenizer INTERFACE) 11 | add_library(Boost::tokenizer ALIAS boost_tokenizer) 12 | 13 | target_include_directories(boost_tokenizer INTERFACE include) 14 | 15 | target_link_libraries(boost_tokenizer 16 | INTERFACE 17 | Boost::assert 18 | Boost::config 19 | Boost::iterator 20 | Boost::mpl 21 | Boost::throw_exception 22 | Boost::type_traits 23 | ) 24 | 25 | if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt") 26 | 27 | add_subdirectory(test) 28 | 29 | endif() 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Boost.Tokenizer](https://boost.org/libs/tokenizer) 2 | 3 | Boost.Tokenizer is a part of [Boost C++ Libraries](https://github.com/boostorg). The Boost.Tokenizer package provides a flexible and easy-to-use way to break a string or other character sequence into a series of tokens. 4 | 5 | ## License 6 | 7 | Distributed under the [Boost Software License, Version 1.0](https://www.boost.org/LICENSE_1_0.txt). 8 | 9 | ## Properties 10 | 11 | * C++03 12 | * Header-Only 13 | 14 | ## Build Status 15 | 16 | 17 | | Branch | GHA CI | Appveyor | Coverity Scan | codecov.io | Deps | Docs | Tests | 18 | | :-------------: | ------ | -------- | ------------- | ---------- | ---- | ---- | ----- | 19 | | [`master`](https://github.com/boostorg/tokenizer/tree/master) | [![Build Status](https://github.com/boostorg/tokenizer/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/boostorg/tokenizer/actions?query=branch:master) | [![Build status](https://ci.appveyor.com/api/projects/status/rpqpywvv4l4637qy/branch/master?svg=true)](https://ci.appveyor.com/project/cppalliance/tokenizer/branch/master) | [![Coverity Scan Build Status](https://scan.coverity.com/projects/15854/badge.svg)](https://scan.coverity.com/projects/boostorg-tokenizer) | [![codecov](https://codecov.io/gh/boostorg/tokenizer/branch/master/graph/badge.svg?token=sakwglU1PC)](https://codecov.io/gh/boostorg/tokenizer/tree/master) | [![Deps](https://img.shields.io/badge/deps-master-brightgreen.svg)](https://pdimov.github.io/boostdep-report/master/tokenizer.html) | [![Documentation](https://img.shields.io/badge/docs-master-brightgreen.svg)](https://www.boost.org/doc/libs/master/libs/tokenizer) | [![Enter the Matrix](https://img.shields.io/badge/matrix-master-brightgreen.svg)](https://www.boost.org/development/tests/master/developer/tokenizer.html) 20 | | [`develop`](https://github.com/boostorg/tokenizer/tree/develop) | [![Build Status](https://github.com/boostorg/tokenizer/actions/workflows/ci.yml/badge.svg?branch=develop)](https://github.com/boostorg/tokenizer/actions?query=branch:develop) | [![Build status](https://ci.appveyor.com/api/projects/status/rpqpywvv4l4637qy/branch/develop?svg=true)](https://ci.appveyor.com/project/cppalliance/tokenizer/branch/develop) | [![Coverity Scan Build Status](https://scan.coverity.com/projects/15854/badge.svg)](https://scan.coverity.com/projects/boostorg-tokenizer) | [![codecov](https://codecov.io/gh/boostorg/tokenizer/branch/develop/graph/badge.svg?token=sakwglU1PC)](https://codecov.io/gh/boostorg/tokenizer/tree/develop) | [![Deps](https://img.shields.io/badge/deps-develop-brightgreen.svg)](https://pdimov.github.io/boostdep-report/develop/tokenizer.html) | [![Documentation](https://img.shields.io/badge/docs-develop-brightgreen.svg)](https://www.boost.org/doc/libs/develop/libs/tokenizer) | [![Enter the Matrix](https://img.shields.io/badge/matrix-develop-brightgreen.svg)](https://www.boost.org/development/tests/develop/developer/tokenizer.html) 21 | 22 | ## Overview 23 | 24 | > break up a phrase into words. 25 | 26 | ![Try it online][badge.wandbox] 27 | 28 | ```c++ 29 | #include 30 | #include 31 | #include 32 | 33 | int main(){ 34 | std::string s = "This is, a test"; 35 | typedef boost::tokenizer<> Tok; 36 | Tok tok(s); 37 | for (Tok::iterator beg = tok.begin(); beg != tok.end(); ++beg){ 38 | std::cout << *beg << "\n"; 39 | } 40 | } 41 | 42 | ``` 43 | 44 | > Using Range-based for loop (C++11 or later) 45 | 46 | ![Try it online][badge.wandbox] 47 | ```c++ 48 | #include 49 | #include 50 | #include 51 | 52 | int main(){ 53 | std::string s = "This is, a test"; 54 | boost::tokenizer<> tok(s); 55 | for (auto token: tok) { 56 | std::cout << token << "\n"; 57 | } 58 | } 59 | ``` 60 | 61 | ## Related Material 62 | 63 | [Boost.Tokenizer](https://theboostcpplibraries.com/boost.tokenizer) Chapter 10 at theboostcpplibraries.com, contains several examples including **escaped_list_separator**. 64 | 65 | ## Acknowledgements 66 | >From the author: 67 | > 68 | I wish to thank the members of the boost mailing list, whose comments, compliments, and criticisms during both the development and formal review helped make the Tokenizer library what it is. I especially wish to thank Aleksey Gurtovoy for the idea of using a pair of iterators to specify the input, instead of a string. I also wish to thank Jeremy Siek for his idea of providing a container interface for the token iterators and for simplifying the template parameters for the TokenizerFunctions. He and Daryle Walker also emphasized the need to separate interface and implementation. Gary Powell sparked the idea of using the isspace and ispunct as the defaults for char_delimiters_separator. Jeff Garland provided ideas on how to change to order of the template parameters in order to make tokenizer easier to declare. Thanks to Douglas Gregor who served as review manager and provided many insights both on the boost list and in e-mail on how to polish up the implementation and presentation of Tokenizer. Finally, thanks to Beman Dawes who integrated the final version into the boost distribution. 69 | 70 | ## Directories 71 | 72 | | Name | Purpose | 73 | | ----------- | ------------------------------ | 74 | | `example` | examples | 75 | | `include` | header | 76 | | `test` | unit tests | 77 | 78 | ## More information 79 | 80 | * [Ask questions](https://stackoverflow.com/questions/ask?tags=c%2B%2B,boost,boost-tokenizer) 81 | * [Report bugs](https://github.com/boostorg/tokenizer/issues): Be sure to mention Boost version, platform and compiler you're using. A small compilable code sample to reproduce the problem is always good as well. 82 | * Submit your patches as pull requests against **develop** branch. Note that by submitting patches you agree to license your modifications under the [Boost Software License, Version 1.0](https://www.boost.org/LICENSE_1_0.txt). 83 | * Discussions about the library are held on the [Boost developers mailing list](https://www.boost.org/community/groups.html#main). Be sure to read the [discussion policy](https://www.boost.org/community/policy.html) before posting and add the `[tokenizer]` tag at the beginning of the subject line. -------------------------------------------------------------------------------- /build.jam: -------------------------------------------------------------------------------- 1 | # Copyright René Ferdinand Rivera Morell 2023-2024 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or copy at 4 | # http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | require-b2 5.2 ; 7 | 8 | constant boost_dependencies : 9 | /boost/assert//boost_assert 10 | /boost/config//boost_config 11 | /boost/iterator//boost_iterator 12 | /boost/throw_exception//boost_throw_exception 13 | /boost/type_traits//boost_type_traits ; 14 | 15 | project /boost/tokenizer 16 | : common-requirements 17 | include 18 | ; 19 | 20 | explicit 21 | [ alias boost_tokenizer : : : : $(boost_dependencies) ] 22 | [ alias all : boost_tokenizer example test ] 23 | ; 24 | 25 | call-if : boost-library tokenizer 26 | ; 27 | 28 | -------------------------------------------------------------------------------- /doc/char_delimiters_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Char Delimiters Separator 11 | 12 | 13 | 15 |

Note: This class is deprecated. Please use 17 | char_separator instead. 18 | 19 |

Char Delimiters Separator

20 |

 21 | template <class Char, class Traits = std::char_traits<Char> >
 22 | class char_delimiters_separator{
 23 |

24 | 25 |

The char_delimiters_separator class is an implementation of the TokenizerFunction concept that can be used to 27 | break text up into tokens. It is the default TokenizerFunction for 28 | tokenizer and token_iterator_generator. An example is below.

29 | 30 |

Example

31 |

 32 | // simple_example_4.cpp
 33 | #include<iostream>
 34 | #include<boost/tokenizer.hpp>
 35 | #include<string>
 36 | 
 37 | int main(){
 38 |    using namespace std;
 39 |    using namespace boost;
 40 |    string s = "This is,  a test";
 41 |    tokenizer<char_delimiters_separator<char> > tok(s);
 42 |    for(tokenizer<char_delimiters_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
 43 |        cout << *beg << "\n";
 44 |    }
 45 | }
 46 |

47 | 48 |

Construction and Usage

49 | 50 |

There is one constructor of interest. It is as follows

51 |

 52 | explicit char_delimiters_separator(bool return_delims = false, 
 53 | const Char* returnable = "",const Char* nonreturnable = "" )
 54 |

55 | 56 | 57 | 58 | 61 | 62 | 65 | 66 | 67 | 68 | 69 | 70 | 73 | 74 | 75 | 76 | 77 | 78 | 84 | 85 | 86 | 87 | 88 | 89 | 95 | 96 |

59 \| Parameter 60 \|	63 \| Description 64 \|
return_delims	Whether or not to return the delimiters that have been found. Note 71 \| that not all delimiters can be returned. See the other two parameters 72 \| for explanation.
returnable	This specifies the returnable delimiters. These are the delimiters 79 \| that can be returned as tokens when return_delims is true. Since these 80 \| are typically punctuation, if a 0 is provided as the argument, then the 81 \| returnable delmiters will be all characters Cfor which std::ispunct(C) 82 \| yields a true value. If an argument of "" is provided, then this is 83 \| taken to mean that there are noreturnable delimiters.
nonreturnable	This specifies the nonreturnable delimiters. These are delimiters 90 \| that cannot be returned as tokens. Since these are typically 91 \| whitespace, if 0 is specified as an argument, then the nonreturnable 92 \| delimiters will be all characters C for which std::isspace(C) yields a 93 \| true value. If an argument of "" is provided, then this is taken to 94 \| mean that there are no non-returnable delimiters.

97 | 98 |

The reason there is a distinction between nonreturnable and returnable 99 | delimiters is that some delimiters are just used to split up tokens and are 100 | nothing more. Take for example the following string "b c +". Assume you are 101 | writing a simple calculator to parse expression in post fix notation. While 102 | both the space and the + separate tokens, you only only interested in the + 103 | and not in the space. Indeed having the space returned as a token would 104 | only complicate your code. In this case you would specify + as a 105 | returnable, and space as a nonreturnable delimiter.

106 | 107 |

To use this class, pass an object of it anywhere a TokenizerFunction 108 | object is required.

109 | 110 |

Template Parameters

111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 124 | 125 | 126 | 127 | 128 | 129 | 131 | 132 |

Parameter	Description
`Char`	The type of the elements within a token, typically 123 \| `char`.
Traits	The traits class for Char, typically 130 \| std::char_traits<Char>

133 | 134 |

Model of

135 | 136 |

TokenizerFunction

137 | 138 |

139 |

140 | 141 |

144 | 145 |

Revised 146 | 25 147 | December, 2006

148 | 149 |

150 | 151 |

Distributed under the Boost Software License, Version 1.0. (See 152 | accompanying file LICENSE_1_0.txt or 153 | copy at http://www.boost.org/LICENSE_1_0.txt)

155 | 156 | 157 | -------------------------------------------------------------------------------- /doc/char_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Char Separator 11 | 12 | 13 | 15 |

17 | 18 |

char_separator<Char, Traits>

19 | 20 |

The char_separator class breaks a sequence of characters into 21 | tokens based on character delimiters much in the same way that 22 | strtok() does (but without all the evils of non-reentrancy and 23 | destruction of the input sequence).

24 | 25 |

The char_separator class is used in conjunction with the 26 | token_iterator or tokenizer to perform tokenizing.

28 | 29 |

Definitions

30 | 31 |

The strtok() function does not include matches with the 32 | character delimiters in the output sequence of tokens. However, sometimes 33 | it is useful to have the delimiters show up in the output sequence, 34 | therefore char_separator provides this as an option. We refer to 35 | delimiters that show up as output tokens as kept delimiters 36 | and delimiters that do now show up as output tokens as dropped 37 | delimiters.

38 | 39 |

When two delimiters appear next to each other in the input sequence, 40 | there is the question of whether to output an empty token or 41 | to skip ahead. The behaviour of strtok() is to skip ahead. The 42 | char_separator class provides both options.

43 | 44 |

Examples

45 | 46 |

This first examples shows how to use char_separator as a 47 | replacement for the strtok() function. We've specified three 48 | character delimiters, and they will not show up as output tokens. We have 49 | not specified any kept delimiters, and by default any empty tokens will be 50 | ignored.

51 | 52 |

53 |

 54 | // char_sep_example_1.cpp
 55 | #include <iostream>
 56 | #include <boost/tokenizer.hpp>
 57 | #include <string>
 58 | 
 59 | int main()
 60 | {
 61 |   std::string str = ";;Hello|world||-foo--bar;yow;baz|";
 62 |   typedef boost::tokenizer<boost::char_separator<char> > 
 63 |     tokenizer;
 64 |   boost::char_separator<char> sep("-;|");
 65 |   tokenizer tokens(str, sep);
 66 |   for (tokenizer::iterator tok_iter = tokens.begin();
 67 |        tok_iter != tokens.end(); ++tok_iter)
 68 |     std::cout << "<" << *tok_iter << "> ";
 69 |   std::cout << "\n";
 70 |   return EXIT_SUCCESS;
 71 | }
 72 |

73 |

The output is: 74 | 75 |

76 |

 77 | <Hello> <world> <foo> <bar> <yow> <baz> 
 78 |

79 |

80 | 81 |

The next example shows tokenizing with two dropped delimiters '-' and 82 | ';' and a single kept delimiter '|'. We also specify that empty tokens 83 | should show up in the output when two delimiters are next to each 84 | other.

85 | 86 |

87 |

 88 | // char_sep_example_2.cpp
 89 | #include <iostream>
 90 | #include <boost/tokenizer.hpp>
 91 | #include <string>
 92 | 
 93 | int main()
 94 | {
 95 |     std::string str = ";;Hello|world||-foo--bar;yow;baz|";
 96 |     typedef boost::tokenizer<boost::char_separator<char> > 
 97 |         tokenizer;
 98 |     boost::char_separator<char> sep("-;", "|", boost::keep_empty_tokens);
 99 |     tokenizer tokens(str, sep);
100 |     for (tokenizer::iterator tok_iter = tokens.begin();
101 |          tok_iter != tokens.end(); ++tok_iter)
102 |       std::cout << "<" << *tok_iter << "> ";
103 |     std::cout << "\n";
104 |     return EXIT_SUCCESS;
105 | }
106 |

107 |

The output is: 108 | 109 |

110 |

111 | <> <> <Hello> <|> <world> <|> <> <|> <> <foo> <> <bar> <yow> <baz> <|> <>
112 |

113 |

114 | 115 |

The final example shows tokenizing on punctuation and whitespace 116 | characters using the default constructor of the 117 | char_separator.

118 | 119 |

120 |

121 | // char_sep_example_3.cpp
122 | #include <iostream>
123 | #include <boost/tokenizer.hpp>
124 | #include <string>
125 | 
126 | int main()
127 | {
128 |    std::string str = "This is,  a test";
129 |    typedef boost::tokenizer<boost::char_separator<char> > Tok;
130 |    boost::char_separator<char> sep; // default constructed
131 |    Tok tok(str, sep);
132 |    for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter)
133 |      std::cout << "<" << *tok_iter << "> ";
134 |    std::cout << "\n";
135 |    return EXIT_SUCCESS;
136 | }
137 |

138 |

The output is: 139 | 140 |

141 |

142 | <This> <is> <,> <a> <test> 
143 |

144 |

145 | 146 |

Template parameters

147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 |

Parameter	Description	Default
`Char`	The type of elements within a token, typically `char`.
`Traits`	The `char_traits` for the character type.	`char_traits<char>`

173 | 174 |

Model of

Tokenizer Function 175 | 176 |

Members

177 |

178 |

179 | explicit char_separator(const Char* dropped_delims,
180 |                         const Char* kept_delims = "",
181 |                         empty_token_policy empty_tokens = drop_empty_tokens)
182 |

183 | 184 |

This creates a char_separator object, which can then be used to 185 | create a token_iterator or 186 | tokenizer to perform tokenizing. The 187 | dropped_delims and kept_delims are strings of characters 188 | where each character is used as delimiter during tokenizing. Whenever a 189 | delimiter is seen in the input sequence, the current token is finished, and 190 | a new token begins. The delimiters in dropped_delims do not show 191 | up as tokens in the output whereas the delimiters in kept_delims 192 | do show up as tokens. If empty_tokens is 193 | drop_empty_tokens, then empty tokens will not show up in the 194 | output. If empty_tokens is keep_empty_tokens then empty 195 | tokens will show up in the output.

196 |

197 |

198 | explicit char_separator()
199 |

200 | 201 |

The function std::isspace() is used to identify dropped 202 | delimiters and std::ispunct() is used to identify kept delimiters. 203 | In addition, empty tokens are dropped.

204 |

205 |

206 | template <typename InputIterator, typename Token>
207 | bool operator()(InputIterator& next, InputIterator end, Token& tok)
208 |

209 | 210 |

This function is called by the token_iterator to perform tokenizing. The 212 | user typically does not call this function directly.

213 |

214 | 215 |

218 | 219 |

Revised 220 | 25 221 | December, 2006

222 | 223 |

224 | 225 |

Distributed under the Boost Software License, Version 1.0. (See 226 | accompanying file LICENSE_1_0.txt or 227 | copy at http://www.boost.org/LICENSE_1_0.txt)

229 | 230 | 231 | -------------------------------------------------------------------------------- /doc/escaped_list_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Escaped List Separator 11 | 12 | 13 | 15 |

17 | 18 |

Escaped List Separator

19 | 20 |

21 |

 22 | escaped_list_separator<Char, Traits = std::char_traits<Char> >
 23 |

24 |

25 | 26 |

The escaped_list_separator class is an implementation of the 27 | TokenizerFunction. The 28 | escaped_list_separator parses a superset of the csv (comma separated value) 29 | format. The examples of this formate are below. It is assumed that the 30 | default characters for separator, quote, and escape are used.

31 | 32 |

Field 1,Field 2,Field 3
33 | Field 1,"Field 2, with comma",Field 3
34 | Field 1,Field 2 with \"embedded quote\",Field 3
35 | Field 1, Field 2 with \n new line,Field 3
36 | Field 1, Field 2 with embedded \\ ,Field 3

37 | 38 |

Fields are normally separated by commas. If you want to put a comma in a 39 | field, you need to put quotes around it. Also 3 escape sequences are 40 | supported

41 | 42 | 43 | 44 | 47 | 48 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |

45 \| Escape Sequence 46 \|	49 \| Result 50 \|
<escape><quote>	<quote>
<escape>n	newline
<escape><escape>	<escape>

71 | 72 |

Where <quote> is any character specified to be a quote 73 | and<escape> is any character specified to be an escape character.

74 | 75 |

Example

76 |

 77 | // simple_example_2.cpp
 78 | #include<iostream>
 79 | #include<boost/tokenizer.hpp>
 80 | #include<string>
 81 | 
 82 | int main(){
 83 |    using namespace std;
 84 |    using namespace boost;
 85 |    string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
 86 |    tokenizer<escaped_list_separator<char> > tok(s);
 87 |    for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
 88 |        cout << *beg << "\n";
 89 |    }
 90 | }
 91 |

92 | 93 |

94 | 95 |

Construction and Usage

96 | 97 |

escaped_list_separator has 2 constructors. They are as follows

98 |

 99 | explicit escaped_list_separator(Char e = '\\', Char c = ',',Char q = '\"')
100 |

101 | 102 | 103 | 104 | 107 | 108 | 111 | 112 | 113 | 114 | 115 | 116 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 |

105 \| Parameter 106 \|	109 \| Description 110 \|
e	Specifies the character to use for escape sequences. It defaults to 117 \| the C style \ (backslash). However you can override by passing in a 118 \| different character. An example of when you might want to do this is 119 \| when you have many fields which are Windows style filenames. Instead of 120 \| escaping out each \ in the path, you can change the escape to something 121 \| else.
c	Specifies the character to use to separate the fields
q	Specifies the character to use for the quote.

136 | 137 |

138 |

139 | escaped_list_separator(string_type e, string_type c, string_type q):
140 |

141 | 142 | 143 | 144 | 147 | 148 | 151 | 152 | 153 | 154 | 155 | 156 | 159 | 160 | 161 | 162 | 163 | 164 | 166 | 167 | 168 | 169 | 170 | 171 | 173 | 174 |

145 \| Parameter 146 \|	149 \| Description 150 \|
e	Any character in the string e, is considered to be an escape 157 \| character. If an empty string is given, then there are no escape 158 \| characters.
c	Any character in the string c, is considered to be a separator. If 165 \| an empty string is given, then there are no separator characters.
q	Any character in the string q, is considered to be a quote. If an 172 \| empty string is given, then there are no quote characters.

175 | 176 |

177 | 178 |

To use this class, pass an object of it anywhere in the Tokenizer 179 | package where a TokenizerFunction is required.

180 | 181 |

182 | 183 |

Template Parameters

184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 197 | 198 | 199 | 200 | 201 | 202 | 204 | 205 |

Parameter	Description
`Char`	The type of the elements within a token, typically 196 \| `char`.
Traits	The traits class for the Char type. This is used for comparing 203 \| Char's. It defaults to std::char_traits<Char>

206 | 207 |

208 | 209 |

Model of

210 | 211 |

TokenizerFunction

212 | 213 |

214 |

215 | 216 |

219 | 220 |

Revised 221 | 25 222 | December, 2006

223 | 224 |

225 | 226 |

Distributed under the Boost Software License, Version 1.0. (See 227 | accompanying file LICENSE_1_0.txt or 228 | copy at http://www.boost.org/LICENSE_1_0.txt)

230 | 231 | 232 | -------------------------------------------------------------------------------- /doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Tokenizer Overview 11 | 12 | 13 | 15 |

17 | 18 |

19 | 20 |

21 | 22 |

Introduction

23 | 24 |

Containers and Iterators

25 | 26 |

28 |
tokenizer
29 |
32 |
token iterator
33 |

35 | 36 |

TokenizerFunction Concept

37 | 38 |

TokenizerFunction Models

39 | 40 |

42 |
char_separator
43 |
46 |
escaped_list_separator
48 |
51 |
offset_separator
52 |
Deprecated: char_delimiters_separator

57 | 58 |

59 | 60 |

Acknowledgements

61 | 62 |

I wish to thank the members of the boost mailing list, whose comments, 63 | compliments, and criticisms during both the development and formal review 64 | helped make the Tokenizer library what it is. I especially wish to thank 65 | Aleksey Gurtovoy for the idea of using a pair of iterators to specify the 66 | input, instead of a string. I also wish to thank Jeremy Siek for his idea 67 | of providing a container interface for the token iterators and for 68 | simplifying the template parameters for the TokenizerFunctions. He and 69 | Daryle Walker also emphasized the need to separate interface and 70 | implementation. Gary Powell sparked the idea of using the isspace and 71 | ispunct as the defaults for char_delimiters_separator. Jeff Garland 72 | provided ideas on how to change to order of the template parameters in 73 | order to make tokenizer easier to declare. Thanks to Douglas Gregor who 74 | served as review manager and provided many insights both on the boost list 75 | and in e-mail on how to polish up the implementation and presentation of 76 | Tokenizer. Finally, thanks to Beman Dawes who integrated the final version 77 | into the boost distribution.

78 |

79 | 80 |

83 | 84 |

Revised 85 | 25 86 | December, 2006

87 | 88 |

90 | 91 |

Distributed under the Boost Software License, Version 1.0. (See 92 | accompanying file LICENSE_1_0.txt or 93 | copy at http://www.boost.org/LICENSE_1_0.txt)

95 | 96 | 97 | -------------------------------------------------------------------------------- /doc/introduc.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Introduction 11 | 12 | 13 | 14 |

16 | 17 |

Introduction

18 | 19 |

The Boost Tokenizer package provides a flexible and 20 | easy-to-use way to break a string or other character sequence into a series 21 | of tokens. Below is a simple example that will break up a phrase into 22 | words.

23 | 24 |

25 |

 26 | // simple_example_1.cpp
 27 | #include<iostream>
 28 | #include<boost/tokenizer.hpp>
 29 | #include<string>
 30 | 
 31 | int main(){
 32 |    using namespace std;
 33 |    using namespace boost;
 34 |    string s = "This is,  a test";
 35 |    tokenizer<> tok(s);
 36 |    for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 37 |        cout << *beg << "\n";
 38 |    }
 39 | }
 40 |

41 |

42 | 43 |

You can choose how the string gets parsed by using the 44 | TokenizerFunction. If you do not specify anything, the default 45 | TokenizerFunction is char_delimiters_separator<char> which 46 | defaults to breaking up a string based on space and punctuation. Here is an 47 | example using another TokenizerFunction called 48 | escaped_list_separator. This TokenizerFunction parses a superset 49 | of comma-separated value (CSV) lines. The format looks like this:

50 | 51 |

Field 1,"putting quotes around fields, allows commas",Field 52 | 3

53 | 54 |

Below is an example that will break the previous line into 55 | its three fields.

56 | 57 |

58 |

 59 | // simple_example_2.cpp
 60 | #include<iostream>
 61 | #include<boost/tokenizer.hpp>
 62 | #include<string>
 63 | 
 64 | int main(){
 65 |    using namespace std;
 66 |    using namespace boost;
 67 |    string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
 68 |    tokenizer<escaped_list_separator<char> > tok(s);
 69 |    for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
 70 |        cout << *beg << "\n";
 71 |    }
 72 | }
 73 |

74 |

75 | 76 |

Finally, for some TokenizerFunctions you have to pass 77 | something into the constructor in order to do anything interesting. An 78 | example is the offset_separator. This class breaks a string into tokens based 79 | on offsets. For example, when 12252001 is parsed using offsets of 80 | 2,2,4 it becomes 12 25 2001. Below is the code used.

81 | 82 |

83 |

 84 | // simple_example_3.cpp
 85 | #include<iostream>
 86 | #include<boost/tokenizer.hpp>
 87 | #include<string>
 88 | 
 89 | int main(){
 90 |    using namespace std;
 91 |    using namespace boost;
 92 |    string s = "12252001";
 93 |    int offsets[] = {2,2,4};
 94 |    offset_separator f(offsets, offsets+3);
 95 |    tokenizer<offset_separator> tok(s,f);
 96 |    for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 97 |        cout << *beg << "\n";
 98 |    }
 99 | }
100 |

101 |

102 | 103 |

104 |

105 | 106 |

109 | 110 |

Revised 111 | 9 June 2010

112 | 113 |

114 | 115 |

Distributed under the Boost Software License, Version 1.0. (See 116 | accompanying file LICENSE_1_0.txt or 117 | copy at http://www.boost.org/LICENSE_1_0.txt)

119 | 120 | 121 | -------------------------------------------------------------------------------- /doc/offset_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Offset Separator 11 | 12 | 13 | 15 |

17 | 18 |

Offset Separator

19 |

 20 | class offset_separator
 21 |

22 | 23 |

The offset_separator class is an implementation of the TokenizerFunction concept that can be used with 25 | the tokenizer class to break text up into 26 | tokens. The offset_separator breaks a sequence of Char's 27 | into strings based on a sequence of offsets. For example, if you had the 28 | string "12252001" and offsets (2,2,4) it would break the string into 12 25 29 | 2001. Here is an example.

30 | 31 |

Example

32 |

 33 | // simple_example_3.cpp
 34 | #include<iostream>
 35 | #include<boost/tokenizer.hpp>
 36 | #include<string>
 37 | 
 38 | int main(){
 39 |    using namespace std;
 40 |    using namespace boost;
 41 |    string s = "12252001";
 42 |    int offsets[] = {2,2,4};
 43 |    offset_separator f(offsets, offsets+3);
 44 |    tokenizer<offset_separator> tok(s,f);
 45 |    for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 46 |      cout << *beg << "\n";
 47 |    }
 48 | }
 49 |

50 | 51 |

52 | 53 |

Construction and Usage

54 | 55 |

The offset_separator has 1 constructor of interest. (The default 56 | constructor is just there to make some compilers happy). The declaration is 57 | below

58 |

 59 | template<typename Iter>
 60 | offset_separator(Iter begin,Iter end,bool bwrapoffsets = true, bool breturnpartiallast = true)
 61 |

62 | 63 | 64 | 65 | 68 | 69 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 89 | 90 | 91 | 92 | 93 | 94 | 101 | 102 |

66 \| Parameter 67 \|	70 \| Description 71 \|
begin, end	Specify the sequence of integer offsets.
bwrapoffsets	Tells whether to wrap around to the beginning of the offsets when 84 \| the all the offsets have been used. For example the string 85 \| "1225200101012002" with offsets (2,2,4) with bwrapoffsets to true, 86 \| would parse to 12 25 2001 01 01 2002. With bwrapoffsets to false, it 87 \| would parse to 12 25 2001 and then stop because all the offsets have 88 \| been used.
breturnpartiallast	Tells whether, when the parsed sequence terminates before yielding 95 \| the number of characters in the current offset, to create a token with 96 \| what was parsed, or to ignore it. For example the string "122501" with 97 \| offsets (2,2,4) with breturnpartiallast set to true will parse to 12 25 98 \| 01. With it set to false, it will parse to 12 25 and then will stop 99 \| because there are only 2 characters left in the sequence instead of the 100 \| 4 that should have been there.

103 | 104 |

To use this class, pass an object of it anywhere a TokenizerFunction is 105 | required. If you default constructruct the object, it will just return 106 | every character in the parsed sequence as a token. (ie it defaults to an 107 | offset of 1, and bwrapoffsets is true).

108 | 109 |

110 | 111 |

Model of

112 | 113 |

TokenizerFunction

114 |

115 | 116 |

119 | 120 |

Revised 121 | 25 122 | December, 2006

123 | 124 |

125 | 126 |

Distributed under the Boost Software License, Version 1.0. (See 127 | accompanying file LICENSE_1_0.txt or 128 | copy at http://www.boost.org/LICENSE_1_0.txt)

130 | 131 | 132 | -------------------------------------------------------------------------------- /doc/token_iterator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Token Iterator 11 | 12 | 13 | 15 |

17 | 18 |

Token Iterator

19 |

 20 | template <
 21 |         class TokenizerFunc = char_delimiters_separator<char>, 
 22 |         class Iterator = std::string::const_iterator,
 23 |         class Type = std::string
 24 | >
 25 | class token_iterator_generator 
 26 |

27 |

 28 | template<class Type, class Iterator, class TokenizerFunc>
 29 | typename token_iterator_generator<TokenizerFunc,Iterator,Type>::type 
 30 | make_token_iterator(Iterator begin, Iterator end,const TokenizerFunc& fun)
 31 | 
 32 |

33 | 34 |

The token iterator serves to provide an iterator view of the tokens in a 35 | parsed sequence.

36 | 37 |

Example

38 |

 39 | /// simple_example_5.cpp
 40 | #include<iostream>
 41 | #include<boost/token_iterator.hpp>
 42 | #include<string>
 43 | 
 44 | int main(){
 45 |    using namespace std;
 46 |    using namespace boost;
 47 |    string s = "12252001";
 48 |    int offsets[] = {2,2,4};
 49 |    offset_separator f(offsets, offsets+3);
 50 |    typedef token_iterator_generator<offset_separator>::type Iter;
 51 |    Iter beg = make_token_iterator<string>(s.begin(),s.end(),f);
 52 |    Iter end = make_token_iterator<string>(s.end(),s.end(),f); 
 53 |    // The above statement could also have been what is below
 54 |    // Iter end;
 55 |    for(;beg!=end;++beg){
 56 |      cout << *beg << "\n";
 57 |    }
 58 | }
 59 |

60 | 61 |

62 | 63 |

Template Parameters

64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 |

Parameter	Description
`TokenizerFunc`	The TokenizerFunction used to parse the sequence.
`Iterator`	The type of the iterator the specifies the sequence.
`Type`	The type of the token, typically string.

90 | 91 |

Model of

92 | 93 |

The category of Iterator, up to and including Forward Iterator. Anything 94 | higher will get scaled down to Forward Iterator.

95 | 96 |

Related Types

97 | 98 | 99 | 100 | 103 | 104 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 |

101 \| Type 102 \|	105 \| Remarks 106 \|
token_iterator_generator::type	The type of the token iterator.

115 | 116 |

Creation

117 |

118 | template<class Type, class Iterator, class TokenizerFunc>
119 | typename token_iterator_generator<TokenizerFunc,Iterator,Type>::type 
120 | make_token_iterator(Iterator begin, Iterator end,const TokenizerFunc& fun)
121 |

122 | 123 | 124 | 125 | 128 | 129 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 |

126 \| Parameter 127 \|	130 \| Description 131 \|
begin	The beginning of the sequence to be parsed.
end	Past the end of the sequence to be parsed.
fun	A functor that is a model of TokenizerFunction

152 | 153 |

154 |

155 | 156 |

159 | 160 |

Revised 161 | 25 162 | December, 2006

163 | 164 |

165 | 166 |

Distributed under the Boost Software License, Version 1.0. (See 167 | accompanying file LICENSE_1_0.txt or 168 | copy at http://www.boost.org/LICENSE_1_0.txt)

170 | 171 | 172 | -------------------------------------------------------------------------------- /doc/tokenizer.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Tokenizer Class 11 | 12 | 13 | 15 |

17 | 18 |

Tokenizer Class

19 |

  template <
 20 |         class TokenizerFunc = char_delimiters_separator<char>, 
 21 |         class Iterator = std::string::const_iterator,
 22 |         class Type = std::string
 23 |   >
 24 |   class tokenizer
 25 |

26 | 27 |

The tokenizer class provides a container view of a series of tokens 28 | contained in a sequence. You set the sequence to parse and the 29 | TokenizerFunction to use to parse the sequence either upon construction or 30 | using the assign member function. Note: No parsing is actually done upon 31 | construction. Parsing is done on demand as the tokens are accessed via the 32 | iterator provided by begin.

33 | 34 |

Example

35 |

// simple_example_1.cpp
 36 | #include<iostream>
 37 | #include<boost/tokenizer.hpp>
 38 | #include<string>
 39 | 
 40 | int main(){
 41 |    using namespace std;
 42 |    using namespace boost;
 43 |    string s = "This is,  a test";
 44 |    tokenizer<> tok(s);
 45 |    for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 46 |        cout << *beg << "\n";
 47 |    }
 48 | }
 49 |

50 | 51 |

The output from simple_example_1 is:

52 | 53 |

54 | 55 |
This 56 | is 57 | a 58 | test
59 | 60 |

61 | 62 |

Template Parameters

63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 |

Parameter	Description
`TokenizerFunc`	The TokenizerFunction used to parse the sequence.
`Iterator`	The type of the iterator the specifies the sequence.
`Type`	The type of the token, typically string.

89 | 90 |

91 | 92 |

Related Types

93 | 94 | 95 | 96 | 99 | 100 | 103 | 104 | 105 | 106 | 107 | 108 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 |

97 \| Type 98 \|	101 \| Remarks 102 \|
iterator	The type returned by begin and end. Note: the category of iterator 109 \| will be at most ForwardIterator. It will be InputIterator if the 110 \| Iterator template parameter is an InputIterator. For any other 111 \| category, it will be ForwardIterator.
const_iterator	Same type as iterator.
value_type	Same type as the template parameter Type
reference	Same type as value_type&
const_reference	Same type as const reference
pointer	Same type as value_type*
const_pointer	Same type as const pointer
size_type	void
difference_type	void

162 | 163 |

164 | 165 |

Construction and Member Functions

166 |

tokenizer(Iterator first, Iterator last,const TokenizerFunc& f = TokenizerFunc()) 
167 | 
168 | template<class Container>
169 | tokenizer(const Container& c,const TokenizerFunc& f = TokenizerFunc())
170 | 
171 | void assign(Iterator first, Iterator last)
172 | 
173 | void assign(Iterator first, Iterator last, const TokenizerFunc& f)
174 | 
175 | template<class Container>
176 | void assign(const Container& c)
177 | 
178 | template<class Container>
179 | void assign(const Container& c, const TokenizerFunc& f)
180 | 
181 | iterator begin() const 
182 | 
183 | iterator end() const
184 |

185 | 186 | 187 | 188 | 191 | 192 | 195 | 196 | 197 | 198 | 199 | 200 | 203 | 204 | 205 | 206 | 207 | 208 | 210 | 211 | 212 | 213 | 214 | 215 | 217 | 218 | 219 | 220 | 221 | 222 | 224 | 225 |

189 \| Parameter 190 \|	193 \| Description 194 \|
c	A container that contains the sequence to parse. Note: c.begin() 201 \| and c.end() must be convertible to the template parameter 202 \| Iterator.
f	A functor that is a model of TokenizerFunction that will be used to 209 \| parse the sequence.
first	The iterator that represents the beginning position in the sequence 216 \| to be parsed.
last	The iterator that represents the past the end position in the 223 \| sequence to be parsed.

226 | 227 |

228 |

229 | 230 |

233 | 234 |

Revised 235 | 16 February, 2008

236 | 237 |

238 | 239 |

Distributed under the Boost Software License, Version 1.0. (See 240 | accompanying file LICENSE_1_0.txt or 241 | copy at http://www.boost.org/LICENSE_1_0.txt)

243 | 244 | 245 | -------------------------------------------------------------------------------- /doc/tokenizerfunction.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | TokenizerFunction Concept 11 | 12 | 13 | 15 |

C++ Boost

16 | 17 |

TokenizerFunction Concept

18 | 19 |

A TokenizerFunction is a functor whose purpose is to parse a given 20 | sequence until exactly 1 token has been found or the end is reached. It 21 | then updates the token, and informs the caller of the location in the 22 | sequence of the next element immediately after the last element of the 23 | sequence that was parsed for the current token.

24 | 25 |

Refinement of

26 | 27 |

Assignable, CopyConstructable

28 | 29 |

Notation

30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 55 | 56 | 57 | 58 | 59 | 60 | 62 | 63 |

`X`	A type that is a model of TokenizerFunction
`func`	Object of type `X`
`tok`	Object of Token
next	iterator that points to the first unparsed element of the sequence 54 \| being parsed
end	iterator that points to the past the end of the sequence being 61 \| parsed

64 | 65 |

Definitions

66 | 67 |

A token is the result of parsing a sequence.

68 | 69 |

Valid expressions

70 | 71 |

In addition to the expression in Assignable and CopyConstructable the 72 | following expressions are valid

73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 |

Name	Expression	Return type
Functor	`func(next, end, tok)`	`bool`
reset	`reset()`	`void`

99 | 100 |

Expression semantics

101 | 102 |

In addition to the expression semantics in Assignable and 103 | CopyConstructable, TokenizerFunction has the following expression 104 | semantcs

105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 127 | 128 | 130 | 131 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 145 | 146 | 147 | 148 |

Name	Expression	Precondition	Semantics	Postcondition
operator()	`func(next, end, tok)`	`next` and `end` are valid iterators to the same 125 \| sequence. next is a reference the function is free to modify. tok is 126 \| constructed.	The return value indicates whether a new token was found in the 129 \| sequence [next,end)	If the return value is true, the new token is assigned to tok. next 132 \| is always updated to the position where parsing should start on the 133 \| subsequent call.
reset	`reset()`	`None`	Clears out all state variables that are used by the object in 144 \| parsing the current sequence.	A new sequence to parse can be given.

149 | 150 |

Complexity guarantees

151 | 152 |

No guarantees. Models of TokenizerFunction are free to define their own 153 | complexity

154 | 155 |

Models

156 | 157 |

escaped_list_separator

158 | 159 |

offset_separator

160 | 161 |

char_delimiters_separator

163 | 164 |

165 |

166 | 167 |

170 | 171 |

Revised 172 | 25 173 | December, 2006

174 | 175 |

176 | 177 |

Distributed under the Boost Software License, Version 1.0. (See 178 | accompanying file LICENSE_1_0.txt or 179 | copy at http://www.boost.org/LICENSE_1_0.txt)

59 \| Parameter 60 \|	63 \| Description 64 \|
return_delims	Whether or not to return the delimiters that have been found. Note 71 \| that not all delimiters can be returned. See the other two parameters 72 \| for explanation.
returnable	This specifies the returnable delimiters. These are the delimiters 79 \| that can be returned as tokens when return_delims is true. Since these 80 \| are typically punctuation, if a 0 is provided as the argument, then the 81 \| returnable delmiters will be all characters Cfor which std::ispunct(C) 82 \| yields a true value. If an argument of "" is provided, then this is 83 \| taken to mean that there are noreturnable delimiters.
nonreturnable	This specifies the nonreturnable delimiters. These are delimiters 90 \| that cannot be returned as tokens. Since these are typically 91 \| whitespace, if 0 is specified as an argument, then the nonreturnable 92 \| delimiters will be all characters C for which std::isspace(C) yields a 93 \| true value. If an argument of "" is provided, then this is taken to 94 \| mean that there are no non-returnable delimiters.

105 \| Parameter 106 \|	109 \| Description 110 \|
e	Specifies the character to use for escape sequences. It defaults to 117 \| the C style \ (backslash). However you can override by passing in a 118 \| different character. An example of when you might want to do this is 119 \| when you have many fields which are Windows style filenames. Instead of 120 \| escaping out each \ in the path, you can change the escape to something 121 \| else.
c	Specifies the character to use to separate the fields
q	Specifies the character to use for the quote.

145 \| Parameter 146 \|	149 \| Description 150 \|
e	Any character in the string e, is considered to be an escape 157 \| character. If an empty string is given, then there are no escape 158 \| characters.
c	Any character in the string c, is considered to be a separator. If 165 \| an empty string is given, then there are no separator characters.
q	Any character in the string q, is considered to be a quote. If an 172 \| empty string is given, then there are no quote characters.

189 \| Parameter 190 \|	193 \| Description 194 \|
c	A container that contains the sequence to parse. Note: c.begin() 201 \| and c.end() must be convertible to the template parameter 202 \| Iterator.
f	A functor that is a model of TokenizerFunction that will be used to 209 \| parse the sequence.
first	The iterator that represents the beginning position in the sequence 216 \| to be parsed.
last	The iterator that represents the past the end position in the 223 \| sequence to be parsed.

Char Delimiters Separator

Example

Construction and Usage

Template Parameters

Model of

char_separator<Char, Traits>

Definitions

Examples

Template parameters

Model of

Members

Escaped List Separator

Example

Construction and Usage

Template Parameters

Model of

Table Of Contents

Introduction

Containers and Iterators

tokenizer

token iterator

TokenizerFunction Concept

TokenizerFunction Models

char_separator

escaped_list_separator

offset_separator

Acknowledgements

Introduction

Offset Separator

Example

Construction and Usage

Model of

Token Iterator

Example

Template Parameters

Model of

Related Types

Creation

Tokenizer Class

Example

Template Parameters

Related Types

Construction and Member Functions

TokenizerFunction Concept

Refinement of

Notation

Definitions

Valid expressions

Expression semantics

Complexity guarantees

Models