├── .appveyor.yml ├── .codecov.yml ├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build.jam ├── doc ├── char_delimiters_separator.htm ├── char_separator.htm ├── escaped_list_separator.htm ├── index.html ├── introduc.htm ├── offset_separator.htm ├── token_iterator.htm ├── tokenizer.htm └── tokenizerfunction.htm ├── example ├── Jamfile.v2 ├── char_sep_example_1.cpp ├── char_sep_example_2.cpp └── char_sep_example_3.cpp ├── include └── boost │ ├── token_functions.hpp │ ├── token_iterator.hpp │ └── tokenizer.hpp ├── index.html ├── meta └── libraries.json └── test ├── Jamfile.v2 ├── cmake_test ├── CMakeLists.txt └── main.cpp ├── examples.cpp ├── simple_example_1.cpp ├── simple_example_2.cpp ├── simple_example_3.cpp ├── simple_example_4.cpp └── simple_example_5.cpp /.appveyor.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2016, 2017 Peter Dimov 2 | # Copyright 2017 - 2019 James E. King III 3 | # Copyright 2019 - 2021 Alexander Grund 4 | # Distributed under the Boost Software License, Version 1.0. 5 | # (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) 6 | 7 | # 8 | # Generic Appveyor build script for boostorg repositories 9 | # See: https://github.com/boostorg/boost-ci/ 10 | # 11 | # Instructions for customizing this script for your library: 12 | # 13 | # 1. Customize the compilers and language levels you want. 14 | # 2. If you have more than include/, src/, test/, example/, examples/, 15 | # benchmark/ or tools/ directories, set the environment variable DEPINST. 16 | # For example if your build uses code in "bench/" and "fog/" directories: 17 | # - DEPINST: --include bench --include fog 18 | # 3. Enable pull request builds in your boostorg/ account. 19 | # 20 | # That's it - the script will do everything else for you. 21 | # 22 | 23 | version: 1.0.{build}-{branch} 24 | 25 | shallow_clone: true 26 | 27 | branches: 28 | only: 29 | - master 30 | - develop 31 | - /bugfix\/.*/ 32 | - /feature\/.*/ 33 | - /fix\/.*/ 34 | - /pr\/.*/ 35 | 36 | skip_commits: 37 | files: 38 | - LICENSE 39 | - meta/* 40 | - README.md 41 | 42 | matrix: 43 | fast_finish: false 44 | # Adding MAYFAIL to any matrix job allows it to fail but the build stays green: 45 | allow_failures: 46 | - MAYFAIL: true 47 | 48 | environment: 49 | global: 50 | B2_CI_VERSION: 1 51 | GIT_FETCH_JOBS: 4 52 | # see: http://www.boost.org/build/doc/html/bbv2/overview/invocation.html#bbv2.overview.invocation.properties 53 | # to use the default for a given environment, comment it out; recommend you build debug and release however: 54 | # on Windows it is important to exercise all the possibilities, especially shared vs static, however most 55 | # libraries that care about this exercise it in their Jamfiles... 56 | B2_ADDRESS_MODEL: 32,64 57 | B2_LINK: shared,static 58 | # B2_THREADING: threading=multi,single 59 | B2_VARIANT: release 60 | 61 | matrix: 62 | - FLAVOR: Visual Studio 2017 C++2a Strict 63 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 64 | B2_CXXFLAGS: -permissive- 65 | B2_CXXSTD: 2a 66 | B2_TOOLSET: msvc-14.1 67 | 68 | - FLAVOR: Visual Studio 2017 C++14/17 69 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 70 | B2_CXXSTD: 14,17 71 | B2_TOOLSET: msvc-14.1 72 | 73 | - FLAVOR: cygwin (32-bit) 74 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 75 | ADDPATH: C:\cygwin\bin; 76 | B2_ADDRESS_MODEL: 32 77 | B2_CXXSTD: 03,11,14,1z 78 | B2_TOOLSET: gcc 79 | 80 | - FLAVOR: cygwin (64-bit) 81 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 82 | ADDPATH: C:\cygwin64\bin; 83 | B2_ADDRESS_MODEL: 64 84 | B2_CXXSTD: 03,11,14,1z 85 | B2_TOOLSET: gcc 86 | 87 | install: 88 | - git clone --depth 1 https://github.com/boostorg/boost-ci.git C:\boost-ci-cloned 89 | # Copy ci folder if not testing Boost.CI 90 | - if NOT "%APPVEYOR_PROJECT_NAME%" == "boost-ci" xcopy /s /e /q /i /y C:\boost-ci-cloned\ci .\ci 91 | - rmdir /s /q C:\boost-ci-cloned 92 | - ci\appveyor\install.bat 93 | 94 | build: off 95 | 96 | test_script: ci\build.bat 97 | 98 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 - 2021 Alexander Grund 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) 4 | # 5 | # Sample codecov configuration file. Edit as required 6 | 7 | codecov: 8 | max_report_age: off 9 | require_ci_to_pass: yes 10 | notify: 11 | # Increase this if you have multiple coverage collection jobs 12 | after_n_builds: 2 13 | wait_for_ci: yes 14 | 15 | parsers: 16 | gcov: 17 | branch_detection: 18 | conditional: yes 19 | loop: yes 20 | method: no 21 | macro: no 22 | 23 | # Change how pull request comments look 24 | comment: 25 | layout: "reach,diff,flags,files,footer" 26 | 27 | # Ignore specific files or folders. Glob patterns are supported. 28 | # See https://docs.codecov.com/docs/ignoring-paths 29 | ignore: 30 | - libs/tokenizer/test/ 31 | - test/ 32 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto !eol svneol=native#text/plain 2 | *.gitattributes text svneol=native#text/plain 3 | 4 | # Scriptish formats 5 | *.bat text svneol=native#text/plain 6 | *.bsh text svneol=native#text/x-beanshell 7 | *.cgi text svneol=native#text/plain 8 | *.cmd text svneol=native#text/plain 9 | *.js text svneol=native#text/javascript 10 | *.php text svneol=native#text/x-php 11 | *.pl text svneol=native#text/x-perl 12 | *.pm text svneol=native#text/x-perl 13 | *.py text svneol=native#text/x-python 14 | *.sh eol=lf svneol=LF#text/x-sh 15 | configure eol=lf svneol=LF#text/x-sh 16 | 17 | # Image formats 18 | *.bmp binary svneol=unset#image/bmp 19 | *.gif binary svneol=unset#image/gif 20 | *.ico binary svneol=unset#image/ico 21 | *.jpeg binary svneol=unset#image/jpeg 22 | *.jpg binary svneol=unset#image/jpeg 23 | *.png binary svneol=unset#image/png 24 | *.tif binary svneol=unset#image/tiff 25 | *.tiff binary svneol=unset#image/tiff 26 | *.svg text svneol=native#image/svg%2Bxml 27 | 28 | # Data formats 29 | *.pdf binary svneol=unset#application/pdf 30 | *.avi binary svneol=unset#video/avi 31 | *.doc binary svneol=unset#application/msword 32 | *.dsp text svneol=crlf#text/plain 33 | *.dsw text svneol=crlf#text/plain 34 | *.eps binary svneol=unset#application/postscript 35 | *.gz binary svneol=unset#application/gzip 36 | *.mov binary svneol=unset#video/quicktime 37 | *.mp3 binary svneol=unset#audio/mpeg 38 | *.ppt binary svneol=unset#application/vnd.ms-powerpoint 39 | *.ps binary svneol=unset#application/postscript 40 | *.psd binary svneol=unset#application/photoshop 41 | *.rdf binary svneol=unset#text/rdf 42 | *.rss text svneol=unset#text/xml 43 | *.rtf binary svneol=unset#text/rtf 44 | *.sln text svneol=native#text/plain 45 | *.swf binary svneol=unset#application/x-shockwave-flash 46 | *.tgz binary svneol=unset#application/gzip 47 | *.vcproj text svneol=native#text/xml 48 | *.vcxproj text svneol=native#text/xml 49 | *.vsprops text svneol=native#text/xml 50 | *.wav binary svneol=unset#audio/wav 51 | *.xls binary svneol=unset#application/vnd.ms-excel 52 | *.zip binary svneol=unset#application/zip 53 | 54 | # Text formats 55 | .htaccess text svneol=native#text/plain 56 | *.bbk text svneol=native#text/xml 57 | *.cmake text svneol=native#text/plain 58 | *.css text svneol=native#text/css 59 | *.dtd text svneol=native#text/xml 60 | *.htm text svneol=native#text/html 61 | *.html text svneol=native#text/html 62 | *.ini text svneol=native#text/plain 63 | *.log text svneol=native#text/plain 64 | *.mak text svneol=native#text/plain 65 | *.qbk text svneol=native#text/plain 66 | *.rst text svneol=native#text/plain 67 | *.sql text svneol=native#text/x-sql 68 | *.txt text svneol=native#text/plain 69 | *.xhtml text svneol=native#text/xhtml%2Bxml 70 | *.xml text svneol=native#text/xml 71 | *.xsd text svneol=native#text/xml 72 | *.xsl text svneol=native#text/xml 73 | *.xslt text svneol=native#text/xml 74 | *.xul text svneol=native#text/xul 75 | *.yml text svneol=native#text/plain 76 | boost-no-inspect text svneol=native#text/plain 77 | CHANGES text svneol=native#text/plain 78 | COPYING text svneol=native#text/plain 79 | INSTALL text svneol=native#text/plain 80 | Jamfile text svneol=native#text/plain 81 | Jamroot text svneol=native#text/plain 82 | Jamfile.v2 text svneol=native#text/plain 83 | Jamrules text svneol=native#text/plain 84 | Makefile* text svneol=native#text/plain 85 | README text svneol=native#text/plain 86 | TODO text svneol=native#text/plain 87 | 88 | # Code formats 89 | *.c text svneol=native#text/plain 90 | *.cpp text svneol=native#text/plain 91 | *.h text svneol=native#text/plain 92 | *.hpp text svneol=native#text/plain 93 | *.ipp text svneol=native#text/plain 94 | *.tpp text svneol=native#text/plain 95 | *.jam text svneol=native#text/plain 96 | *.java text svneol=native#text/plain 97 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2020-2021 Peter Dimov 2 | # Copyright 2021 Andrey Semashev 3 | # Copyright 2021 Alexander Grund 4 | # Copyright 2022-2025 James E. King III 5 | # 6 | # Distributed under the Boost Software License, Version 1.0. 7 | # (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) 8 | --- 9 | name: CI 10 | 11 | on: 12 | pull_request: 13 | push: 14 | branches: 15 | - master 16 | - develop 17 | - bugfix/** 18 | - feature/** 19 | - fix/** 20 | - pr/** 21 | paths-ignore: 22 | - LICENSE 23 | - meta/** 24 | - README.md 25 | 26 | concurrency: 27 | group: ${{format('{0}:{1}', github.repository, github.ref)}} 28 | cancel-in-progress: true 29 | 30 | env: 31 | GIT_FETCH_JOBS: 8 32 | NET_RETRY_COUNT: 5 33 | B2_CI_VERSION: 1 34 | B2_VARIANT: debug,release 35 | B2_LINK: shared,static 36 | LCOV_BRANCH_COVERAGE: 1 37 | 38 | jobs: 39 | posix: 40 | defaults: 41 | run: 42 | shell: bash 43 | 44 | strategy: 45 | fail-fast: false 46 | matrix: 47 | include: 48 | # Linux, gcc 49 | - { compiler: gcc-4.8, cxxstd: '03,11', os: ubuntu-latest, container: 'ubuntu:16.04' } 50 | - { compiler: gcc-4.9, cxxstd: '03,11', os: ubuntu-latest, container: 'ubuntu:16.04' } 51 | - { compiler: gcc-5, cxxstd: '03,11,14,1z', os: ubuntu-latest, container: 'ubuntu:18.04' } 52 | - { compiler: gcc-6, cxxstd: '03,11,14,17', os: ubuntu-latest, container: 'ubuntu:18.04' } 53 | - { compiler: gcc-7, cxxstd: '03,11,14,17', os: ubuntu-20.04 } 54 | - { compiler: gcc-8, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 } 55 | - { compiler: gcc-9, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 } 56 | - { compiler: gcc-10, cxxstd: '03,11,14,17,20', os: ubuntu-22.04 } 57 | - { compiler: gcc-11, cxxstd: '03,11,14,17,20', os: ubuntu-22.04 } 58 | - { compiler: gcc-12, cxxstd: '03,11,14,17,20', os: ubuntu-22.04 } 59 | - { compiler: gcc-13, cxxstd: '11,14,17,20,2b', os: ubuntu-24.04 } 60 | - { compiler: gcc-14, cxxstd: '11,14,17,20,2b', os: ubuntu-24.04 } 61 | - { name: GCC w/ sanitizers, sanitize: yes, 62 | compiler: gcc-13, cxxstd: '03,11,14,17,20', os: ubuntu-24.04 } 63 | - { name: Collect coverage, coverage: yes, 64 | compiler: gcc-13, cxxstd: '03,2b', os: ubuntu-24.04, install: 'g++-13-multilib', address-model: '32,64' } 65 | 66 | # Linux, clang 67 | - { compiler: clang-3.5, cxxstd: '03,11', os: ubuntu-latest, container: 'ubuntu:16.04' } 68 | - { compiler: clang-3.6, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:16.04' } 69 | - { compiler: clang-3.7, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:16.04' } 70 | - { compiler: clang-3.8, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:16.04' } 71 | - { compiler: clang-3.9, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:18.04' } 72 | - { compiler: clang-4.0, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:18.04' } 73 | - { compiler: clang-5.0, cxxstd: '03,11,14,1z', os: ubuntu-latest, container: 'ubuntu:18.04' } 74 | - { compiler: clang-6.0, cxxstd: '03,11,14,17', os: ubuntu-20.04 } 75 | - { compiler: clang-7, cxxstd: '03,11,14,17', os: ubuntu-20.04 } 76 | # Note: clang-8 does not fully support C++20, so it is not compatible with some libstdc++ versions in this mode 77 | - { compiler: clang-8, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 , install: 'clang-8 g++-7', gcc_toolchain: 7 } 78 | - { compiler: clang-9, cxxstd: '03,11,14,17,2a', os: ubuntu-20.04 } 79 | - { compiler: clang-10, cxxstd: '03,11,14,17,20', os: ubuntu-20.04 } 80 | - { compiler: clang-11, cxxstd: '03,11,14,17,20', os: ubuntu-20.04 } 81 | - { compiler: clang-12, cxxstd: '03,11,14,17,20', os: ubuntu-20.04 } 82 | # Clang isn't compatible with libstdc++-13, so use the slightly older one 83 | - { compiler: clang-13, cxxstd: '03,11,14,17,20', os: ubuntu-22.04, install: 'clang-13 g++-12', gcc_toolchain: 12 } 84 | - { compiler: clang-14, cxxstd: '03,11,14,17,20', os: ubuntu-22.04, install: 'clang-14 g++-12', gcc_toolchain: 12 } 85 | - { compiler: clang-15, cxxstd: '03,11,14,17,20', os: ubuntu-22.04, install: 'clang-15 g++-12', gcc_toolchain: 12 } 86 | - { compiler: clang-16, cxxstd: '11,14,17,20,2b', os: ubuntu-24.04 } 87 | # https://github.com/llvm/llvm-project/issues/59827: disabled 2b/23 for clang-17 with libstdc++13 in 24.04 88 | - { compiler: clang-17, cxxstd: '11,14,17,20', os: ubuntu-24.04 } 89 | - { compiler: clang-18, cxxstd: '11,14,17,20,23,2c', os: ubuntu-24.04 } 90 | 91 | # libc++ 92 | - { compiler: clang-6.0, cxxstd: '03,11,14', os: ubuntu-latest, container: 'ubuntu:18.04', stdlib: libc++, install: 'clang-6.0 libc++-dev libc++abi-dev' } 93 | - { compiler: clang-7, cxxstd: '03,11,14,17', os: ubuntu-20.04, stdlib: libc++, install: 'clang-7 libc++-7-dev libc++abi-7-dev' } 94 | - { name: Clang w/ sanitizers, sanitize: yes, 95 | compiler: clang-12, cxxstd: '03,11,14,17,20', os: ubuntu-20.04, stdlib: libc++, install: 'clang-12 libc++-12-dev libc++abi-12-dev' } 96 | 97 | - { name: MacOS w/ clang and sanitizers, 98 | compiler: clang, cxxstd: '03,11,14,17,20,2b', os: macos-13, sanitize: yes } 99 | - { compiler: clang, cxxstd: '03,11,14,17,20,2b', os: macos-14 } 100 | - { compiler: clang, cxxstd: '03,11,14,17,20,2b', os: macos-15 } 101 | 102 | # Coverity Scan 103 | # requires two github secrets in repo to activate; see ci/github/coverity.sh 104 | # does not run on pull requests, only on pushes into develop and master 105 | - { name: Coverity, coverity: yes, 106 | compiler: clang-12, cxxstd: '03,20', os: ubuntu-20.04, ccache: no } 107 | 108 | # multiarch (bigendian testing) - does not support coverage yet 109 | - { name: Big-endian, multiarch: yes, 110 | compiler: clang, cxxstd: '17', os: ubuntu-22.04, ccache: no, distro: fedora, edition: 34, arch: s390x } 111 | 112 | 113 | timeout-minutes: 120 114 | runs-on: ${{matrix.os}} 115 | container: 116 | image: ${{matrix.container}} 117 | volumes: 118 | - /node20217:/node20217:rw,rshared 119 | - ${{ startsWith(matrix.container, 'ubuntu:1') && '/node20217:/__e/node20:ro,rshared' || ' ' }} 120 | env: {B2_USE_CCACHE: 1} 121 | 122 | steps: 123 | - name: Setup environment 124 | run: | 125 | if [ -f "/etc/debian_version" ]; then 126 | echo "DEBIAN_FRONTEND=noninteractive" >> $GITHUB_ENV 127 | export DEBIAN_FRONTEND=noninteractive 128 | fi 129 | if [ -n "${{matrix.container}}" ] && [ -f "/etc/debian_version" ]; then 130 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 131 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y sudo software-properties-common curl 132 | # Need (newer) git, and the older Ubuntu container may require requesting the key manually using port 80 133 | curl -sSL --retry ${NET_RETRY_COUNT:-5} 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE1DD270288B4E6030699E45FA1715D88E1DF1F24' | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/git-core_ubuntu_ppa.gpg 134 | for i in {1..${NET_RETRY_COUNT:-3}}; do sudo -E add-apt-repository -y ppa:git-core/ppa && break || sleep 10; done 135 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 136 | osver=$(lsb_release -sr | cut -f1 -d.) 137 | pkgs="g++ git xz-utils" 138 | # Ubuntu 22+ has only Python 3 in the repos 139 | if [ -n "$osver" ] && [ "$osver" -ge "22" ]; then 140 | pkgs+=" python-is-python3 libpython3-dev" 141 | else 142 | pkgs+=" python libpython-dev" 143 | fi 144 | apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y $pkgs 145 | fi 146 | # For jobs not compatible with ccache, use "ccache: no" in the matrix 147 | if [[ "${{ matrix.ccache }}" == "no" ]]; then 148 | echo "B2_USE_CCACHE=0" >> $GITHUB_ENV 149 | fi 150 | git config --global pack.threads 0 151 | if [[ "${{matrix.container}}" == "ubuntu:1"* ]]; then 152 | # Node 20 doesn't work with Ubuntu 16/18 glibc: https://github.com/actions/checkout/issues/1590 153 | curl -sL https://archives.boost.io/misc/node/node-v20.9.0-linux-x64-glibc-217.tar.xz | tar -xJ --strip-components 1 -C /node20217 154 | fi 155 | 156 | - uses: actions/checkout@v4 157 | with: 158 | # For coverage builds fetch the whole history, else only 1 commit using a 'fake ternary' 159 | fetch-depth: ${{ matrix.coverage && '0' || '1' }} 160 | 161 | - name: Cache ccache 162 | uses: actions/cache@v4 163 | if: env.B2_USE_CCACHE 164 | with: 165 | path: ~/.ccache 166 | key: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}-${{github.sha}} 167 | restore-keys: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}- 168 | 169 | - name: Fetch Boost.CI 170 | uses: actions/checkout@v4 171 | with: 172 | repository: boostorg/boost-ci 173 | ref: master 174 | path: boost-ci-cloned 175 | 176 | - name: Get CI scripts folder 177 | run: | 178 | # Copy ci folder if not testing Boost.CI 179 | [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . 180 | rm -rf boost-ci-cloned 181 | 182 | - name: Install packages 183 | if: startsWith(matrix.os, 'ubuntu') 184 | run: | 185 | SOURCE_KEYS=("${{join(matrix.source_keys, '" "')}}") 186 | SOURCES=("${{join(matrix.sources, '" "')}}") 187 | # Add this by default 188 | SOURCE_KEYS+=('http://keyserver.ubuntu.com/pks/lookup?op=get&search=0x1E9377A2BA9EF27F') 189 | SOURCES+=(ppa:ubuntu-toolchain-r/test) 190 | 191 | ci/add-apt-keys.sh "${SOURCE_KEYS[@]}" 192 | # Initial update before adding sources required to get e.g. keys 193 | sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 194 | ci/add-apt-repositories.sh "${SOURCES[@]}" 195 | 196 | sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update 197 | if [[ -z "${{matrix.install}}" ]]; then 198 | pkgs="${{matrix.compiler}}" 199 | pkgs="${pkgs/gcc-/g++-}" 200 | else 201 | pkgs="${{matrix.install}}" 202 | fi 203 | sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y $pkgs 204 | 205 | - name: Setup GCC Toolchain 206 | if: matrix.gcc_toolchain 207 | run: | 208 | GCC_TOOLCHAIN_ROOT="$HOME/gcc-toolchain" 209 | echo "GCC_TOOLCHAIN_ROOT=$GCC_TOOLCHAIN_ROOT" >> $GITHUB_ENV 210 | if ! command -v dpkg-architecture; then 211 | apt-get install -y dpkg-dev 212 | fi 213 | MULTIARCH_TRIPLET="$(dpkg-architecture -qDEB_HOST_MULTIARCH)" 214 | mkdir -p "$GCC_TOOLCHAIN_ROOT" 215 | ln -s /usr/include "$GCC_TOOLCHAIN_ROOT/include" 216 | ln -s /usr/bin "$GCC_TOOLCHAIN_ROOT/bin" 217 | mkdir -p "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET" 218 | ln -s "/usr/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" 219 | 220 | - name: Setup multiarch 221 | if: matrix.multiarch 222 | env: 223 | BDDE_DISTRO: ${{matrix.distro}} 224 | BDDE_EDITION: ${{matrix.edition}} 225 | BDDE_ARCH: ${{matrix.arch}} 226 | run: ci/github/setup_bdde.sh 227 | 228 | - name: Setup Boost 229 | env: 230 | B2_ADDRESS_MODEL: ${{matrix.address-model}} 231 | B2_COMPILER: ${{matrix.compiler}} 232 | B2_CXXSTD: ${{matrix.cxxstd}} 233 | B2_SANITIZE: ${{matrix.sanitize}} 234 | B2_STDLIB: ${{matrix.stdlib}} 235 | # More entries can be added in the same way, see the B2_ARGS assignment in ci/enforce.sh for the possible keys. 236 | # B2_DEFINES: ${{matrix.defines}} 237 | # Variables set here (to non-empty) will override the top-level environment variables, e.g. 238 | # B2_VARIANT: ${{matrix.variant}} 239 | # Set the (B2) target(s) to build, defaults to the test folder of the current library 240 | # Can alternatively be done like this in the build step or in the build command of the build step, e.g. `run: B2_TARGETS=libs/$SELF/doc ci/build.sh` 241 | # B2_TARGETS: libs/foo/test//bar 242 | run: source ci/github/install.sh 243 | 244 | - name: Setup coverage collection 245 | if: matrix.coverage 246 | run: ci/github/codecov.sh "setup" 247 | 248 | - name: Run tests 249 | if: '!matrix.coverity' 250 | run: ci/build.sh 251 | 252 | - name: Upload coverage 253 | if: matrix.coverage 254 | run: ci/codecov.sh "upload" 255 | env: 256 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 257 | 258 | - name: Run coverity 259 | if: matrix.coverity && github.event_name == 'push' && (github.ref_name == 'develop' || github.ref_name == 'master') 260 | run: ci/github/coverity.sh 261 | env: 262 | COVERITY_SCAN_NOTIFICATION_EMAIL: ${{ secrets.COVERITY_SCAN_NOTIFICATION_EMAIL }} 263 | COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} 264 | 265 | windows: 266 | defaults: 267 | run: 268 | shell: cmd 269 | strategy: 270 | fail-fast: false 271 | matrix: 272 | include: 273 | - { toolset: msvc-14.0, cxxstd: '14,latest', addrmd: '32,64', os: windows-2019 } 274 | - { toolset: msvc-14.2, cxxstd: '14,17,20', addrmd: '32,64', os: windows-2019 } 275 | - { toolset: msvc-14.3, cxxstd: '14,17,20,latest',addrmd: '32,64', os: windows-2022 } 276 | - { name: Collect coverage, coverage: yes, 277 | toolset: msvc-14.3, cxxstd: 'latest', addrmd: '64', os: windows-2022 } 278 | - { toolset: clang-win, cxxstd: '14,17,latest', addrmd: '32,64', os: windows-2022 } 279 | - { toolset: gcc, cxxstd: '03,11,14,17,2a', addrmd: '64', os: windows-2019 } 280 | 281 | runs-on: ${{matrix.os}} 282 | 283 | steps: 284 | - uses: actions/checkout@v4 285 | 286 | - name: Fetch Boost.CI 287 | uses: actions/checkout@v4 288 | with: 289 | repository: boostorg/boost-ci 290 | ref: master 291 | path: boost-ci-cloned 292 | - name: Get CI scripts folder 293 | run: | 294 | REM Copy ci folder if not testing Boost.CI 295 | if "%GITHUB_REPOSITORY%" == "%GITHUB_REPOSITORY:boost-ci=%" xcopy /s /e /q /i /y boost-ci-cloned\ci .\ci 296 | rmdir /s /q boost-ci-cloned 297 | 298 | - name: Setup Boost 299 | run: ci\github\install.bat 300 | 301 | - name: Run tests 302 | if: '!matrix.coverage' 303 | run: ci\build.bat 304 | env: 305 | B2_TOOLSET: ${{matrix.toolset}} 306 | B2_CXXSTD: ${{matrix.cxxstd}} 307 | B2_ADDRESS_MODEL: ${{matrix.addrmd}} 308 | 309 | - name: Collect coverage 310 | shell: powershell 311 | if: matrix.coverage 312 | run: ci\opencppcoverage.ps1 313 | env: 314 | B2_TOOLSET: ${{matrix.toolset}} 315 | B2_CXXSTD: ${{matrix.cxxstd}} 316 | B2_ADDRESS_MODEL: ${{matrix.addrmd}} 317 | 318 | - name: Upload coverage 319 | if: matrix.coverage 320 | uses: codecov/codecov-action@v5 321 | with: 322 | disable_search: true 323 | fail_ci_if_error: true 324 | files: __out/cobertura.xml 325 | name: github-actions 326 | token: ${{secrets.CODECOV_TOKEN}} 327 | verbose: true 328 | 329 | MSYS2: 330 | defaults: 331 | run: 332 | shell: msys2 {0} 333 | strategy: 334 | fail-fast: false 335 | matrix: 336 | include: 337 | - { sys: MINGW32, compiler: gcc, cxxstd: '03,11,17,20' } 338 | - { sys: MINGW64, compiler: gcc, cxxstd: '03,11,17,20' } 339 | 340 | runs-on: windows-latest 341 | 342 | steps: 343 | - uses: actions/checkout@v4 344 | 345 | - name: Setup MSYS2 environment 346 | uses: msys2/setup-msys2@v2 347 | with: 348 | msystem: ${{matrix.sys}} 349 | update: true 350 | install: git python 351 | pacboy: gcc:p cmake:p ninja:p 352 | 353 | - name: Fetch Boost.CI 354 | uses: actions/checkout@v4 355 | with: 356 | repository: boostorg/boost-ci 357 | ref: master 358 | path: boost-ci-cloned 359 | - name: Get CI scripts folder 360 | run: | 361 | # Copy ci folder if not testing Boost.CI 362 | [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . 363 | rm -rf boost-ci-cloned 364 | 365 | - name: Setup Boost 366 | env: 367 | B2_COMPILER: ${{matrix.compiler}} 368 | B2_CXXSTD: ${{matrix.cxxstd}} 369 | B2_SANITIZE: ${{matrix.sanitize}} 370 | B2_STDLIB: ${{matrix.stdlib}} 371 | run: ci/github/install.sh 372 | 373 | - name: Run tests 374 | run: ci/build.sh 375 | 376 | # Run also the CMake tests to avoid having to setup another matrix for CMake on MSYS 377 | - name: Run CMake tests 378 | run: | 379 | cd "$BOOST_ROOT" 380 | mkdir __build_cmake_test__ && cd __build_cmake_test__ 381 | cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBOOST_INCLUDE_LIBRARIES=$SELF -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DBoost_VERBOSE=ON .. 382 | cmake --build . --target tests --config Debug -j$B2_JOBS 383 | ctest --output-on-failure --build-config Debug 384 | 385 | CMake: 386 | defaults: 387 | run: 388 | shell: bash 389 | 390 | strategy: 391 | fail-fast: false 392 | matrix: 393 | include: 394 | - { os: ubuntu-20.04, build_shared: ON, build_type: Debug, generator: 'Unix Makefiles' } 395 | - { os: ubuntu-20.04, build_shared: OFF, build_type: Debug, generator: 'Unix Makefiles' } 396 | - { os: windows-2019, build_shared: ON, build_type: Debug, generator: 'Visual Studio 16 2019' } 397 | - { os: windows-2019, build_shared: OFF, build_type: Debug, generator: 'Visual Studio 16 2019' } 398 | 399 | timeout-minutes: 120 400 | runs-on: ${{matrix.os}} 401 | 402 | steps: 403 | - uses: actions/checkout@v4 404 | - name: Fetch Boost.CI 405 | uses: actions/checkout@v4 406 | with: 407 | repository: boostorg/boost-ci 408 | ref: master 409 | path: boost-ci-cloned 410 | - name: Get CI scripts folder 411 | run: | 412 | # Copy ci folder if not testing Boost.CI 413 | [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . 414 | rm -rf boost-ci-cloned 415 | - name: Setup Boost 416 | env: {B2_DONT_BOOTSTRAP: 1} 417 | run: source ci/github/install.sh 418 | 419 | - name: Run CMake tests 420 | run: | 421 | cd "$BOOST_ROOT" 422 | mkdir __build_cmake_test__ && cd __build_cmake_test__ 423 | cmake -G "${{matrix.generator}}" -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBOOST_INCLUDE_LIBRARIES=$SELF -DBUILD_SHARED_LIBS=${{matrix.build_shared}} -DBUILD_TESTING=ON -DBoost_VERBOSE=ON .. 424 | cmake --build . --target tests --config ${{matrix.build_type}} -j$B2_JOBS 425 | ctest --output-on-failure --build-config ${{matrix.build_type}} 426 | 427 | - name: Run CMake subdir tests 428 | run: | 429 | cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_test" # New unified folder 430 | [ -d "$cmake_test_folder" ] || cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_subdir_test" 431 | cd "$cmake_test_folder" 432 | mkdir __build_cmake_subdir_test__ && cd __build_cmake_subdir_test__ 433 | cmake -G "${{matrix.generator}}" -DBOOST_CI_INSTALL_TEST=OFF -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBUILD_SHARED_LIBS=${{matrix.build_shared}} .. 434 | cmake --build . --config ${{matrix.build_type}} -j$B2_JOBS 435 | ctest --output-on-failure --build-config ${{matrix.build_type}} 436 | 437 | - name: Install Library 438 | run: | 439 | cd "$BOOST_ROOT" 440 | mkdir __build_cmake_install_test__ && cd __build_cmake_install_test__ 441 | cmake -G "${{matrix.generator}}" -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBOOST_INCLUDE_LIBRARIES=$SELF -DBUILD_SHARED_LIBS=${{matrix.build_shared}} -DCMAKE_INSTALL_PREFIX=~/.local -DBoost_VERBOSE=ON -DBoost_DEBUG=ON .. 442 | cmake --build . --target install --config ${{matrix.build_type}} -j$B2_JOBS 443 | - name: Run CMake install tests 444 | run: | 445 | cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_test" # New unified folder 446 | [ -d "$cmake_test_folder" ] || cmake_test_folder="$BOOST_ROOT/libs/$SELF/test/cmake_install_test" 447 | cd "$cmake_test_folder" 448 | mkdir __build_cmake_install_test__ && cd __build_cmake_install_test__ 449 | cmake -G "${{matrix.generator}}" -DBOOST_CI_INSTALL_TEST=ON -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DBUILD_SHARED_LIBS=${{matrix.build_shared}} -DCMAKE_PREFIX_PATH=~/.local .. 450 | cmake --build . --config ${{matrix.build_type}} -j$B2_JOBS 451 | ctest --output-on-failure --build-config ${{matrix.build_type}} 452 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Generated by `boostdep --cmake tokenizer` 2 | # Copyright 2020 Peter Dimov 3 | # Distributed under the Boost Software License, Version 1.0. 4 | # https://www.boost.org/LICENSE_1_0.txt 5 | 6 | cmake_minimum_required(VERSION 3.5...3.16) 7 | 8 | project(boost_tokenizer VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX) 9 | 10 | add_library(boost_tokenizer INTERFACE) 11 | add_library(Boost::tokenizer ALIAS boost_tokenizer) 12 | 13 | target_include_directories(boost_tokenizer INTERFACE include) 14 | 15 | target_link_libraries(boost_tokenizer 16 | INTERFACE 17 | Boost::assert 18 | Boost::config 19 | Boost::iterator 20 | Boost::mpl 21 | Boost::throw_exception 22 | Boost::type_traits 23 | ) 24 | 25 | if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt") 26 | 27 | add_subdirectory(test) 28 | 29 | endif() 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Boost.Tokenizer](https://boost.org/libs/tokenizer) 2 | 3 | Boost.Tokenizer is a part of [Boost C++ Libraries](https://github.com/boostorg). The Boost.Tokenizer package provides a flexible and easy-to-use way to break a string or other character sequence into a series of tokens. 4 | 5 | ## License 6 | 7 | Distributed under the [Boost Software License, Version 1.0](https://www.boost.org/LICENSE_1_0.txt). 8 | 9 | ## Properties 10 | 11 | * C++03 12 | * Header-Only 13 | 14 | ## Build Status 15 | 16 | 17 | | Branch | GHA CI | Appveyor | Coverity Scan | codecov.io | Deps | Docs | Tests | 18 | | :-------------: | ------ | -------- | ------------- | ---------- | ---- | ---- | ----- | 19 | | [`master`](https://github.com/boostorg/tokenizer/tree/master) | [![Build Status](https://github.com/boostorg/tokenizer/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/boostorg/tokenizer/actions?query=branch:master) | [![Build status](https://ci.appveyor.com/api/projects/status/rpqpywvv4l4637qy/branch/master?svg=true)](https://ci.appveyor.com/project/cppalliance/tokenizer/branch/master) | [![Coverity Scan Build Status](https://scan.coverity.com/projects/15854/badge.svg)](https://scan.coverity.com/projects/boostorg-tokenizer) | [![codecov](https://codecov.io/gh/boostorg/tokenizer/branch/master/graph/badge.svg?token=sakwglU1PC)](https://codecov.io/gh/boostorg/tokenizer/tree/master) | [![Deps](https://img.shields.io/badge/deps-master-brightgreen.svg)](https://pdimov.github.io/boostdep-report/master/tokenizer.html) | [![Documentation](https://img.shields.io/badge/docs-master-brightgreen.svg)](https://www.boost.org/doc/libs/master/libs/tokenizer) | [![Enter the Matrix](https://img.shields.io/badge/matrix-master-brightgreen.svg)](https://www.boost.org/development/tests/master/developer/tokenizer.html) 20 | | [`develop`](https://github.com/boostorg/tokenizer/tree/develop) | [![Build Status](https://github.com/boostorg/tokenizer/actions/workflows/ci.yml/badge.svg?branch=develop)](https://github.com/boostorg/tokenizer/actions?query=branch:develop) | [![Build status](https://ci.appveyor.com/api/projects/status/rpqpywvv4l4637qy/branch/develop?svg=true)](https://ci.appveyor.com/project/cppalliance/tokenizer/branch/develop) | [![Coverity Scan Build Status](https://scan.coverity.com/projects/15854/badge.svg)](https://scan.coverity.com/projects/boostorg-tokenizer) | [![codecov](https://codecov.io/gh/boostorg/tokenizer/branch/develop/graph/badge.svg?token=sakwglU1PC)](https://codecov.io/gh/boostorg/tokenizer/tree/develop) | [![Deps](https://img.shields.io/badge/deps-develop-brightgreen.svg)](https://pdimov.github.io/boostdep-report/develop/tokenizer.html) | [![Documentation](https://img.shields.io/badge/docs-develop-brightgreen.svg)](https://www.boost.org/doc/libs/develop/libs/tokenizer) | [![Enter the Matrix](https://img.shields.io/badge/matrix-develop-brightgreen.svg)](https://www.boost.org/development/tests/develop/developer/tokenizer.html) 21 | 22 | ## Overview 23 | 24 | > break up a phrase into words. 25 | 26 | ![Try it online][badge.wandbox] 27 | 28 | ```c++ 29 | #include 30 | #include 31 | #include 32 | 33 | int main(){ 34 | std::string s = "This is, a test"; 35 | typedef boost::tokenizer<> Tok; 36 | Tok tok(s); 37 | for (Tok::iterator beg = tok.begin(); beg != tok.end(); ++beg){ 38 | std::cout << *beg << "\n"; 39 | } 40 | } 41 | 42 | ``` 43 | 44 | > Using Range-based for loop (C++11 or later) 45 | 46 | ![Try it online][badge.wandbox] 47 | ```c++ 48 | #include 49 | #include 50 | #include 51 | 52 | int main(){ 53 | std::string s = "This is, a test"; 54 | boost::tokenizer<> tok(s); 55 | for (auto token: tok) { 56 | std::cout << token << "\n"; 57 | } 58 | } 59 | ``` 60 | 61 | ## Related Material 62 | 63 | [Boost.Tokenizer](https://theboostcpplibraries.com/boost.tokenizer) Chapter 10 at theboostcpplibraries.com, contains several examples including **escaped_list_separator**. 64 | 65 | ## Acknowledgements 66 | >From the author: 67 | > 68 | I wish to thank the members of the boost mailing list, whose comments, compliments, and criticisms during both the development and formal review helped make the Tokenizer library what it is. I especially wish to thank Aleksey Gurtovoy for the idea of using a pair of iterators to specify the input, instead of a string. I also wish to thank Jeremy Siek for his idea of providing a container interface for the token iterators and for simplifying the template parameters for the TokenizerFunctions. He and Daryle Walker also emphasized the need to separate interface and implementation. Gary Powell sparked the idea of using the isspace and ispunct as the defaults for char_delimiters_separator. Jeff Garland provided ideas on how to change to order of the template parameters in order to make tokenizer easier to declare. Thanks to Douglas Gregor who served as review manager and provided many insights both on the boost list and in e-mail on how to polish up the implementation and presentation of Tokenizer. Finally, thanks to Beman Dawes who integrated the final version into the boost distribution. 69 | 70 | ## Directories 71 | 72 | | Name | Purpose | 73 | | ----------- | ------------------------------ | 74 | | `example` | examples | 75 | | `include` | header | 76 | | `test` | unit tests | 77 | 78 | ## More information 79 | 80 | * [Ask questions](https://stackoverflow.com/questions/ask?tags=c%2B%2B,boost,boost-tokenizer) 81 | * [Report bugs](https://github.com/boostorg/tokenizer/issues): Be sure to mention Boost version, platform and compiler you're using. A small compilable code sample to reproduce the problem is always good as well. 82 | * Submit your patches as pull requests against **develop** branch. Note that by submitting patches you agree to license your modifications under the [Boost Software License, Version 1.0](https://www.boost.org/LICENSE_1_0.txt). 83 | * Discussions about the library are held on the [Boost developers mailing list](https://www.boost.org/community/groups.html#main). Be sure to read the [discussion policy](https://www.boost.org/community/policy.html) before posting and add the `[tokenizer]` tag at the beginning of the subject line. -------------------------------------------------------------------------------- /build.jam: -------------------------------------------------------------------------------- 1 | # Copyright René Ferdinand Rivera Morell 2023-2024 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or copy at 4 | # http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | require-b2 5.2 ; 7 | 8 | constant boost_dependencies : 9 | /boost/assert//boost_assert 10 | /boost/config//boost_config 11 | /boost/iterator//boost_iterator 12 | /boost/throw_exception//boost_throw_exception 13 | /boost/type_traits//boost_type_traits ; 14 | 15 | project /boost/tokenizer 16 | : common-requirements 17 | include 18 | ; 19 | 20 | explicit 21 | [ alias boost_tokenizer : : : : $(boost_dependencies) ] 22 | [ alias all : boost_tokenizer example test ] 23 | ; 24 | 25 | call-if : boost-library tokenizer 26 | ; 27 | 28 | -------------------------------------------------------------------------------- /doc/char_delimiters_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Char Delimiters Separator 11 | 12 | 13 | 15 |

C++ Boost

Note: This class is deprecated. Please use 17 | char_separator instead. 18 | 19 |

Char Delimiters Separator

20 |
 21 | template <class Char, class Traits = std::char_traits<Char> >
 22 | class char_delimiters_separator{
 23 | 
24 | 25 |

The char_delimiters_separator class is an implementation of the TokenizerFunction concept that can be used to 27 | break text up into tokens. It is the default TokenizerFunction for 28 | tokenizer and token_iterator_generator. An example is below.

29 | 30 |

Example

31 |
 32 | // simple_example_4.cpp
 33 | #include<iostream>
 34 | #include<boost/tokenizer.hpp>
 35 | #include<string>
 36 | 
 37 | int main(){
 38 |    using namespace std;
 39 |    using namespace boost;
 40 |    string s = "This is,  a test";
 41 |    tokenizer<char_delimiters_separator<char> > tok(s);
 42 |    for(tokenizer<char_delimiters_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
 43 |        cout << *beg << "\n";
 44 |    }
 45 | }
 46 | 
47 | 48 |

Construction and Usage

49 | 50 |

There is one constructor of interest. It is as follows

51 |
 52 | explicit char_delimiters_separator(bool return_delims = false, 
 53 | const Char* returnable = "",const Char* nonreturnable = "" )
 54 | 
55 | 56 | 57 | 58 | 61 | 62 | 65 | 66 | 67 | 68 | 69 | 70 | 73 | 74 | 75 | 76 | 77 | 78 | 84 | 85 | 86 | 87 | 88 | 89 | 95 | 96 |
59 |

Parameter

60 |
63 |

Description

64 |
return_delimsWhether or not to return the delimiters that have been found. Note 71 | that not all delimiters can be returned. See the other two parameters 72 | for explanation.
returnableThis specifies the returnable delimiters. These are the delimiters 79 | that can be returned as tokens when return_delims is true. Since these 80 | are typically punctuation, if a 0 is provided as the argument, then the 81 | returnable delmiters will be all characters Cfor which std::ispunct(C) 82 | yields a true value. If an argument of "" is provided, then this is 83 | taken to mean that there are noreturnable delimiters.
nonreturnableThis specifies the nonreturnable delimiters. These are delimiters 90 | that cannot be returned as tokens. Since these are typically 91 | whitespace, if 0 is specified as an argument, then the nonreturnable 92 | delimiters will be all characters C for which std::isspace(C) yields a 93 | true value. If an argument of "" is provided, then this is taken to 94 | mean that there are no non-returnable delimiters.
97 | 98 |

The reason there is a distinction between nonreturnable and returnable 99 | delimiters is that some delimiters are just used to split up tokens and are 100 | nothing more. Take for example the following string "b c +". Assume you are 101 | writing a simple calculator to parse expression in post fix notation. While 102 | both the space and the + separate tokens, you only only interested in the + 103 | and not in the space. Indeed having the space returned as a token would 104 | only complicate your code. In this case you would specify + as a 105 | returnable, and space as a nonreturnable delimiter.

106 | 107 |

To use this class, pass an object of it anywhere a TokenizerFunction 108 | object is required.

109 | 110 |

Template Parameters

111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 124 | 125 | 126 | 127 | 128 | 129 | 131 | 132 |
ParameterDescription
CharThe type of the elements within a token, typically 123 | char.
TraitsThe traits class for Char, typically 130 | std::char_traits<Char>
133 | 134 |

Model of

135 | 136 |

TokenizerFunction

137 | 138 |

 

139 |
140 | 141 |

Valid HTML 4.01 Transitional

144 | 145 |

Revised 146 | 25 147 | December, 2006

148 | 149 |

Copyright © 2001 John R. Bandela

150 | 151 |

Distributed under the Boost Software License, Version 1.0. (See 152 | accompanying file LICENSE_1_0.txt or 153 | copy at http://www.boost.org/LICENSE_1_0.txt)

155 | 156 | 157 | -------------------------------------------------------------------------------- /doc/char_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Char Separator 11 | 12 | 13 | 15 |

C++ Boost

17 | 18 |

char_separator<Char, Traits>

19 | 20 |

The char_separator class breaks a sequence of characters into 21 | tokens based on character delimiters much in the same way that 22 | strtok() does (but without all the evils of non-reentrancy and 23 | destruction of the input sequence).

24 | 25 |

The char_separator class is used in conjunction with the 26 | token_iterator or tokenizer to perform tokenizing.

28 | 29 |

Definitions

30 | 31 |

The strtok() function does not include matches with the 32 | character delimiters in the output sequence of tokens. However, sometimes 33 | it is useful to have the delimiters show up in the output sequence, 34 | therefore char_separator provides this as an option. We refer to 35 | delimiters that show up as output tokens as kept delimiters 36 | and delimiters that do now show up as output tokens as dropped 37 | delimiters.

38 | 39 |

When two delimiters appear next to each other in the input sequence, 40 | there is the question of whether to output an empty token or 41 | to skip ahead. The behaviour of strtok() is to skip ahead. The 42 | char_separator class provides both options.

43 | 44 |

Examples

45 | 46 |

This first examples shows how to use char_separator as a 47 | replacement for the strtok() function. We've specified three 48 | character delimiters, and they will not show up as output tokens. We have 49 | not specified any kept delimiters, and by default any empty tokens will be 50 | ignored.

51 | 52 |
53 |
 54 | // char_sep_example_1.cpp
 55 | #include <iostream>
 56 | #include <boost/tokenizer.hpp>
 57 | #include <string>
 58 | 
 59 | int main()
 60 | {
 61 |   std::string str = ";;Hello|world||-foo--bar;yow;baz|";
 62 |   typedef boost::tokenizer<boost::char_separator<char> > 
 63 |     tokenizer;
 64 |   boost::char_separator<char> sep("-;|");
 65 |   tokenizer tokens(str, sep);
 66 |   for (tokenizer::iterator tok_iter = tokens.begin();
 67 |        tok_iter != tokens.end(); ++tok_iter)
 68 |     std::cout << "<" << *tok_iter << "> ";
 69 |   std::cout << "\n";
 70 |   return EXIT_SUCCESS;
 71 | }
 72 | 
73 |
The output is: 74 | 75 |
76 |
 77 | <Hello> <world> <foo> <bar> <yow> <baz> 
 78 | 
79 |
80 | 81 |

The next example shows tokenizing with two dropped delimiters '-' and 82 | ';' and a single kept delimiter '|'. We also specify that empty tokens 83 | should show up in the output when two delimiters are next to each 84 | other.

85 | 86 |
87 |
 88 | // char_sep_example_2.cpp
 89 | #include <iostream>
 90 | #include <boost/tokenizer.hpp>
 91 | #include <string>
 92 | 
 93 | int main()
 94 | {
 95 |     std::string str = ";;Hello|world||-foo--bar;yow;baz|";
 96 |     typedef boost::tokenizer<boost::char_separator<char> > 
 97 |         tokenizer;
 98 |     boost::char_separator<char> sep("-;", "|", boost::keep_empty_tokens);
 99 |     tokenizer tokens(str, sep);
100 |     for (tokenizer::iterator tok_iter = tokens.begin();
101 |          tok_iter != tokens.end(); ++tok_iter)
102 |       std::cout << "<" << *tok_iter << "> ";
103 |     std::cout << "\n";
104 |     return EXIT_SUCCESS;
105 | }
106 | 
107 |
The output is: 108 | 109 |
110 |
111 | <> <> <Hello> <|> <world> <|> <> <|> <> <foo> <> <bar> <yow> <baz> <|> <>
112 | 
113 |
114 | 115 |

The final example shows tokenizing on punctuation and whitespace 116 | characters using the default constructor of the 117 | char_separator.

118 | 119 |
120 |
121 | // char_sep_example_3.cpp
122 | #include <iostream>
123 | #include <boost/tokenizer.hpp>
124 | #include <string>
125 | 
126 | int main()
127 | {
128 |    std::string str = "This is,  a test";
129 |    typedef boost::tokenizer<boost::char_separator<char> > Tok;
130 |    boost::char_separator<char> sep; // default constructed
131 |    Tok tok(str, sep);
132 |    for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter)
133 |      std::cout << "<" << *tok_iter << "> ";
134 |    std::cout << "\n";
135 |    return EXIT_SUCCESS;
136 | }
137 | 
138 |
The output is: 139 | 140 |
141 |
142 | <This> <is> <,> <a> <test> 
143 | 
144 |
145 | 146 |

Template parameters

147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 |
ParameterDescriptionDefault
CharThe type of elements within a token, typically char. 
TraitsThe char_traits for the character type.char_traits<char>
173 | 174 |

Model of

Tokenizer Function 175 | 176 |

Members

177 |
178 |
179 | explicit char_separator(const Char* dropped_delims,
180 |                         const Char* kept_delims = "",
181 |                         empty_token_policy empty_tokens = drop_empty_tokens)
182 | 
183 | 184 |

This creates a char_separator object, which can then be used to 185 | create a token_iterator or 186 | tokenizer to perform tokenizing. The 187 | dropped_delims and kept_delims are strings of characters 188 | where each character is used as delimiter during tokenizing. Whenever a 189 | delimiter is seen in the input sequence, the current token is finished, and 190 | a new token begins. The delimiters in dropped_delims do not show 191 | up as tokens in the output whereas the delimiters in kept_delims 192 | do show up as tokens. If empty_tokens is 193 | drop_empty_tokens, then empty tokens will not show up in the 194 | output. If empty_tokens is keep_empty_tokens then empty 195 | tokens will show up in the output.

196 |
197 |
198 | explicit char_separator()
199 | 
200 | 201 |

The function std::isspace() is used to identify dropped 202 | delimiters and std::ispunct() is used to identify kept delimiters. 203 | In addition, empty tokens are dropped.

204 |
205 |
206 | template <typename InputIterator, typename Token>
207 | bool operator()(InputIterator& next, InputIterator end, Token& tok)
208 | 
209 | 210 |

This function is called by the token_iterator to perform tokenizing. The 212 | user typically does not call this function directly.

213 |
214 | 215 |

Valid HTML 4.01 Transitional

218 | 219 |

Revised 220 | 25 221 | December, 2006

222 | 223 |

Copyright © 2001-2002 Jeremy Siek and John R. Bandela

224 | 225 |

Distributed under the Boost Software License, Version 1.0. (See 226 | accompanying file LICENSE_1_0.txt or 227 | copy at http://www.boost.org/LICENSE_1_0.txt)

229 | 230 | 231 | -------------------------------------------------------------------------------- /doc/escaped_list_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Escaped List Separator 11 | 12 | 13 | 15 |

C++ Boost

17 | 18 |

Escaped List Separator

19 | 20 |
21 |
 22 | escaped_list_separator<Char, Traits = std::char_traits<Char> >
 23 | 
24 |
25 | 26 |

The escaped_list_separator class is an implementation of the 27 | TokenizerFunction. The 28 | escaped_list_separator parses a superset of the csv (comma separated value) 29 | format. The examples of this formate are below. It is assumed that the 30 | default characters for separator, quote, and escape are used.

31 | 32 |

Field 1,Field 2,Field 3
33 | Field 1,"Field 2, with comma",Field 3
34 | Field 1,Field 2 with \"embedded quote\",Field 3
35 | Field 1, Field 2 with \n new line,Field 3
36 | Field 1, Field 2 with embedded \\ ,Field 3

37 | 38 |

Fields are normally separated by commas. If you want to put a comma in a 39 | field, you need to put quotes around it. Also 3 escape sequences are 40 | supported

41 | 42 | 43 | 44 | 47 | 48 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |
45 |

Escape Sequence

46 |
49 |

Result

50 |
<escape><quote><quote>
<escape>nnewline
<escape><escape><escape>
71 | 72 |

Where <quote> is any character specified to be a quote 73 | and<escape> is any character specified to be an escape character.

74 | 75 |

Example

76 |
 77 | // simple_example_2.cpp
 78 | #include<iostream>
 79 | #include<boost/tokenizer.hpp>
 80 | #include<string>
 81 | 
 82 | int main(){
 83 |    using namespace std;
 84 |    using namespace boost;
 85 |    string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
 86 |    tokenizer<escaped_list_separator<char> > tok(s);
 87 |    for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
 88 |        cout << *beg << "\n";
 89 |    }
 90 | }
 91 | 
92 | 93 |

 

94 | 95 |

Construction and Usage

96 | 97 |

escaped_list_separator has 2 constructors. They are as follows

98 |
 99 | explicit escaped_list_separator(Char e = '\\', Char c = ',',Char q = '\"')
100 | 
101 | 102 | 103 | 104 | 107 | 108 | 111 | 112 | 113 | 114 | 115 | 116 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 |
105 |

Parameter

106 |
109 |

Description

110 |
eSpecifies the character to use for escape sequences. It defaults to 117 | the C style \ (backslash). However you can override by passing in a 118 | different character. An example of when you might want to do this is 119 | when you have many fields which are Windows style filenames. Instead of 120 | escaping out each \ in the path, you can change the escape to something 121 | else.
cSpecifies the character to use to separate the fields
qSpecifies the character to use for the quote.
136 | 137 |

 

138 |
139 | escaped_list_separator(string_type e, string_type c, string_type q):
140 | 
141 | 142 | 143 | 144 | 147 | 148 | 151 | 152 | 153 | 154 | 155 | 156 | 159 | 160 | 161 | 162 | 163 | 164 | 166 | 167 | 168 | 169 | 170 | 171 | 173 | 174 |
145 |

Parameter

146 |
149 |

Description

150 |
eAny character in the string e, is considered to be an escape 157 | character. If an empty string is given, then there are no escape 158 | characters.
cAny character in the string c, is considered to be a separator. If 165 | an empty string is given, then there are no separator characters.
qAny character in the string q, is considered to be a quote. If an 172 | empty string is given, then there are no quote characters.
175 | 176 |

 

177 | 178 |

To use this class, pass an object of it anywhere in the Tokenizer 179 | package where a TokenizerFunction is required.

180 | 181 |

 

182 | 183 |

Template Parameters

184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 197 | 198 | 199 | 200 | 201 | 202 | 204 | 205 |
ParameterDescription
CharThe type of the elements within a token, typically 196 | char.
TraitsThe traits class for the Char type. This is used for comparing 203 | Char's. It defaults to std::char_traits<Char>
206 | 207 |

 

208 | 209 |

Model of

210 | 211 |

TokenizerFunction

212 | 213 |

 

214 |
215 | 216 |

Valid HTML 4.01 Transitional

219 | 220 |

Revised 221 | 25 222 | December, 2006

223 | 224 |

Copyright © 2001 John R. Bandela

225 | 226 |

Distributed under the Boost Software License, Version 1.0. (See 227 | accompanying file LICENSE_1_0.txt or 228 | copy at http://www.boost.org/LICENSE_1_0.txt)

230 | 231 | 232 | -------------------------------------------------------------------------------- /doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Tokenizer Overview 11 | 12 | 13 | 15 |

C++ Boost

17 | 18 |

Table Of Contents

19 | 20 |

 

21 | 22 |

Introduction

23 | 24 |

Containers and Iterators

25 | 26 | 35 | 36 |

TokenizerFunction Concept

37 | 38 |

TokenizerFunction Models

39 | 40 | 57 | 58 |

 

59 | 60 |

Acknowledgements

61 | 62 |

I wish to thank the members of the boost mailing list, whose comments, 63 | compliments, and criticisms during both the development and formal review 64 | helped make the Tokenizer library what it is. I especially wish to thank 65 | Aleksey Gurtovoy for the idea of using a pair of iterators to specify the 66 | input, instead of a string. I also wish to thank Jeremy Siek for his idea 67 | of providing a container interface for the token iterators and for 68 | simplifying the template parameters for the TokenizerFunctions. He and 69 | Daryle Walker also emphasized the need to separate interface and 70 | implementation. Gary Powell sparked the idea of using the isspace and 71 | ispunct as the defaults for char_delimiters_separator. Jeff Garland 72 | provided ideas on how to change to order of the template parameters in 73 | order to make tokenizer easier to declare. Thanks to Douglas Gregor who 74 | served as review manager and provided many insights both on the boost list 75 | and in e-mail on how to polish up the implementation and presentation of 76 | Tokenizer. Finally, thanks to Beman Dawes who integrated the final version 77 | into the boost distribution.

78 |
79 | 80 |

Valid HTML 4.01 Transitional

83 | 84 |

Revised 85 | 25 86 | December, 2006

87 | 88 |

Copyright © 2000 Jeremy Siek
89 | Copyright © 2001 John R. Bandela

90 | 91 |

Distributed under the Boost Software License, Version 1.0. (See 92 | accompanying file LICENSE_1_0.txt or 93 | copy at http://www.boost.org/LICENSE_1_0.txt)

95 | 96 | 97 | -------------------------------------------------------------------------------- /doc/introduc.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Introduction 11 | 12 | 13 | 14 |

C++ Boost

16 | 17 |

Introduction

18 | 19 |

The Boost Tokenizer package provides a flexible and 20 | easy-to-use way to break a string or other character sequence into a series 21 | of tokens. Below is a simple example that will break up a phrase into 22 | words.

23 | 24 |
25 |
 26 | // simple_example_1.cpp
 27 | #include<iostream>
 28 | #include<boost/tokenizer.hpp>
 29 | #include<string>
 30 | 
 31 | int main(){
 32 |    using namespace std;
 33 |    using namespace boost;
 34 |    string s = "This is,  a test";
 35 |    tokenizer<> tok(s);
 36 |    for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 37 |        cout << *beg << "\n";
 38 |    }
 39 | }
 40 | 
41 |
42 | 43 |

You can choose how the string gets parsed by using the 44 | TokenizerFunction. If you do not specify anything, the default 45 | TokenizerFunction is char_delimiters_separator<char> which 46 | defaults to breaking up a string based on space and punctuation. Here is an 47 | example using another TokenizerFunction called 48 | escaped_list_separator. This TokenizerFunction parses a superset 49 | of comma-separated value (CSV) lines. The format looks like this:

50 | 51 |

Field 1,"putting quotes around fields, allows commas",Field 52 | 3

53 | 54 |

Below is an example that will break the previous line into 55 | its three fields.

56 | 57 |
58 |
 59 | // simple_example_2.cpp
 60 | #include<iostream>
 61 | #include<boost/tokenizer.hpp>
 62 | #include<string>
 63 | 
 64 | int main(){
 65 |    using namespace std;
 66 |    using namespace boost;
 67 |    string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
 68 |    tokenizer<escaped_list_separator<char> > tok(s);
 69 |    for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
 70 |        cout << *beg << "\n";
 71 |    }
 72 | }
 73 | 
74 |
75 | 76 |

Finally, for some TokenizerFunctions you have to pass 77 | something into the constructor in order to do anything interesting. An 78 | example is the offset_separator. This class breaks a string into tokens based 79 | on offsets. For example, when 12252001 is parsed using offsets of 80 | 2,2,4 it becomes 12 25 2001. Below is the code used.

81 | 82 |
83 |
 84 | // simple_example_3.cpp
 85 | #include<iostream>
 86 | #include<boost/tokenizer.hpp>
 87 | #include<string>
 88 | 
 89 | int main(){
 90 |    using namespace std;
 91 |    using namespace boost;
 92 |    string s = "12252001";
 93 |    int offsets[] = {2,2,4};
 94 |    offset_separator f(offsets, offsets+3);
 95 |    tokenizer<offset_separator> tok(s,f);
 96 |    for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 97 |        cout << *beg << "\n";
 98 |    }
 99 | }
100 | 
101 |
102 | 103 |

 

104 |
105 | 106 |

Valid HTML 4.01 Transitional

109 | 110 |

Revised 111 | 9 June 2010

112 | 113 |

Copyright © 2001 John R. Bandela

114 | 115 |

Distributed under the Boost Software License, Version 1.0. (See 116 | accompanying file LICENSE_1_0.txt or 117 | copy at http://www.boost.org/LICENSE_1_0.txt)

119 | 120 | 121 | -------------------------------------------------------------------------------- /doc/offset_separator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Offset Separator 11 | 12 | 13 | 15 |

C++ Boost

17 | 18 |

Offset Separator

19 |
 20 | class offset_separator
 21 | 
22 | 23 |

The offset_separator class is an implementation of the TokenizerFunction concept that can be used with 25 | the tokenizer class to break text up into 26 | tokens. The offset_separator breaks a sequence of Char's 27 | into strings based on a sequence of offsets. For example, if you had the 28 | string "12252001" and offsets (2,2,4) it would break the string into 12 25 29 | 2001. Here is an example.

30 | 31 |

Example

32 |
 33 | // simple_example_3.cpp
 34 | #include<iostream>
 35 | #include<boost/tokenizer.hpp>
 36 | #include<string>
 37 | 
 38 | int main(){
 39 |    using namespace std;
 40 |    using namespace boost;
 41 |    string s = "12252001";
 42 |    int offsets[] = {2,2,4};
 43 |    offset_separator f(offsets, offsets+3);
 44 |    tokenizer<offset_separator> tok(s,f);
 45 |    for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 46 |      cout << *beg << "\n";
 47 |    }
 48 | }
 49 | 
50 | 51 |

 

52 | 53 |

Construction and Usage

54 | 55 |

The offset_separator has 1 constructor of interest. (The default 56 | constructor is just there to make some compilers happy). The declaration is 57 | below

58 |
 59 | template<typename Iter>
 60 | offset_separator(Iter begin,Iter end,bool bwrapoffsets = true, bool breturnpartiallast = true)
 61 | 
62 | 63 | 64 | 65 | 68 | 69 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 89 | 90 | 91 | 92 | 93 | 94 | 101 | 102 |
66 |

Parameter

67 |
70 |

Description

71 |
begin, endSpecify the sequence of integer offsets.
bwrapoffsetsTells whether to wrap around to the beginning of the offsets when 84 | the all the offsets have been used. For example the string 85 | "1225200101012002" with offsets (2,2,4) with bwrapoffsets to true, 86 | would parse to 12 25 2001 01 01 2002. With bwrapoffsets to false, it 87 | would parse to 12 25 2001 and then stop because all the offsets have 88 | been used.
breturnpartiallastTells whether, when the parsed sequence terminates before yielding 95 | the number of characters in the current offset, to create a token with 96 | what was parsed, or to ignore it. For example the string "122501" with 97 | offsets (2,2,4) with breturnpartiallast set to true will parse to 12 25 98 | 01. With it set to false, it will parse to 12 25 and then will stop 99 | because there are only 2 characters left in the sequence instead of the 100 | 4 that should have been there.
103 | 104 |

To use this class, pass an object of it anywhere a TokenizerFunction is 105 | required. If you default constructruct the object, it will just return 106 | every character in the parsed sequence as a token. (ie it defaults to an 107 | offset of 1, and bwrapoffsets is true).

108 | 109 |

 

110 | 111 |

Model of

112 | 113 |

TokenizerFunction

114 |
115 | 116 |

Valid HTML 4.01 Transitional

119 | 120 |

Revised 121 | 25 122 | December, 2006

123 | 124 |

Copyright © 2001 John R. Bandela

125 | 126 |

Distributed under the Boost Software License, Version 1.0. (See 127 | accompanying file LICENSE_1_0.txt or 128 | copy at http://www.boost.org/LICENSE_1_0.txt)

130 | 131 | 132 | -------------------------------------------------------------------------------- /doc/token_iterator.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Token Iterator 11 | 12 | 13 | 15 |

C++ Boost

17 | 18 |

Token Iterator

19 |
 20 | template <
 21 |         class TokenizerFunc = char_delimiters_separator<char>, 
 22 |         class Iterator = std::string::const_iterator,
 23 |         class Type = std::string
 24 | >
 25 | class token_iterator_generator 
 26 | 
27 |
 28 | template<class Type, class Iterator, class TokenizerFunc>
 29 | typename token_iterator_generator<TokenizerFunc,Iterator,Type>::type 
 30 | make_token_iterator(Iterator begin, Iterator end,const TokenizerFunc& fun)
 31 | 
 32 | 
33 | 34 |

The token iterator serves to provide an iterator view of the tokens in a 35 | parsed sequence.

36 | 37 |

Example

38 |
 39 | /// simple_example_5.cpp
 40 | #include<iostream>
 41 | #include<boost/token_iterator.hpp>
 42 | #include<string>
 43 | 
 44 | int main(){
 45 |    using namespace std;
 46 |    using namespace boost;
 47 |    string s = "12252001";
 48 |    int offsets[] = {2,2,4};
 49 |    offset_separator f(offsets, offsets+3);
 50 |    typedef token_iterator_generator<offset_separator>::type Iter;
 51 |    Iter beg = make_token_iterator<string>(s.begin(),s.end(),f);
 52 |    Iter end = make_token_iterator<string>(s.end(),s.end(),f); 
 53 |    // The above statement could also have been what is below
 54 |    // Iter end;
 55 |    for(;beg!=end;++beg){
 56 |      cout << *beg << "\n";
 57 |    }
 58 | }
 59 | 
60 | 61 |

 

62 | 63 |

Template Parameters

64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 |
ParameterDescription
TokenizerFuncThe TokenizerFunction used to parse the sequence.
IteratorThe type of the iterator the specifies the sequence.
TypeThe type of the token, typically string.
90 | 91 |

Model of

92 | 93 |

The category of Iterator, up to and including Forward Iterator. Anything 94 | higher will get scaled down to Forward Iterator.

95 | 96 |

Related Types

97 | 98 | 99 | 100 | 103 | 104 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 |
101 |

Type

102 |
105 |

Remarks

106 |
token_iterator_generator::typeThe type of the token iterator.
115 | 116 |

Creation

117 |
118 | template<class Type, class Iterator, class TokenizerFunc>
119 | typename token_iterator_generator<TokenizerFunc,Iterator,Type>::type 
120 | make_token_iterator(Iterator begin, Iterator end,const TokenizerFunc& fun)
121 | 
122 | 123 | 124 | 125 | 128 | 129 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 |
126 |

Parameter

127 |
130 |

Description

131 |
beginThe beginning of the sequence to be parsed.
endPast the end of the sequence to be parsed.
funA functor that is a model of TokenizerFunction
152 | 153 |

 

154 |
155 | 156 |

Valid HTML 4.01 Transitional

159 | 160 |

Revised 161 | 25 162 | December, 2006

163 | 164 |

Copyright © 2001 John R. Bandela

165 | 166 |

Distributed under the Boost Software License, Version 1.0. (See 167 | accompanying file LICENSE_1_0.txt or 168 | copy at http://www.boost.org/LICENSE_1_0.txt)

170 | 171 | 172 | -------------------------------------------------------------------------------- /doc/tokenizer.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Boost Tokenizer Class 11 | 12 | 13 | 15 |

C++ Boost

17 | 18 |

Tokenizer Class

19 |
  template <
 20 |         class TokenizerFunc = char_delimiters_separator<char>, 
 21 |         class Iterator = std::string::const_iterator,
 22 |         class Type = std::string
 23 |   >
 24 |   class tokenizer
 25 | 
26 | 27 |

The tokenizer class provides a container view of a series of tokens 28 | contained in a sequence. You set the sequence to parse and the 29 | TokenizerFunction to use to parse the sequence either upon construction or 30 | using the assign member function. Note: No parsing is actually done upon 31 | construction. Parsing is done on demand as the tokens are accessed via the 32 | iterator provided by begin.

33 | 34 |

Example

35 |
// simple_example_1.cpp
 36 | #include<iostream>
 37 | #include<boost/tokenizer.hpp>
 38 | #include<string>
 39 | 
 40 | int main(){
 41 |    using namespace std;
 42 |    using namespace boost;
 43 |    string s = "This is,  a test";
 44 |    tokenizer<> tok(s);
 45 |    for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
 46 |        cout << *beg << "\n";
 47 |    }
 48 | }
 49 | 
50 | 51 |

The output from simple_example_1 is:

52 | 53 |
54 | 55 |

This
56 | is
57 | a
58 | test

59 | 60 |
61 | 62 |

Template Parameters

63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 |
ParameterDescription
TokenizerFuncThe TokenizerFunction used to parse the sequence.
IteratorThe type of the iterator the specifies the sequence.
TypeThe type of the token, typically string.
89 | 90 |

 

91 | 92 |

Related Types

93 | 94 | 95 | 96 | 99 | 100 | 103 | 104 | 105 | 106 | 107 | 108 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 |
97 |

Type

98 |
101 |

Remarks

102 |
iteratorThe type returned by begin and end. Note: the category of iterator 109 | will be at most ForwardIterator. It will be InputIterator if the 110 | Iterator template parameter is an InputIterator. For any other 111 | category, it will be ForwardIterator.
const_iteratorSame type as iterator.
value_typeSame type as the template parameter Type
referenceSame type as value_type&
const_referenceSame type as const reference
pointerSame type as value_type*
const_pointerSame type as const pointer
size_typevoid
difference_typevoid
162 | 163 |

 

164 | 165 |

Construction and Member Functions

166 |
tokenizer(Iterator first, Iterator last,const TokenizerFunc& f = TokenizerFunc()) 
167 | 
168 | template<class Container>
169 | tokenizer(const Container& c,const TokenizerFunc& f = TokenizerFunc())
170 | 
171 | void assign(Iterator first, Iterator last)
172 | 
173 | void assign(Iterator first, Iterator last, const TokenizerFunc& f)
174 | 
175 | template<class Container>
176 | void assign(const Container& c)
177 | 
178 | template<class Container>
179 | void assign(const Container& c, const TokenizerFunc& f)
180 | 
181 | iterator begin() const 
182 | 
183 | iterator end() const
184 | 
185 | 186 | 187 | 188 | 191 | 192 | 195 | 196 | 197 | 198 | 199 | 200 | 203 | 204 | 205 | 206 | 207 | 208 | 210 | 211 | 212 | 213 | 214 | 215 | 217 | 218 | 219 | 220 | 221 | 222 | 224 | 225 |
189 |

Parameter

190 |
193 |

Description

194 |
cA container that contains the sequence to parse. Note: c.begin() 201 | and c.end() must be convertible to the template parameter 202 | Iterator.
fA functor that is a model of TokenizerFunction that will be used to 209 | parse the sequence.
firstThe iterator that represents the beginning position in the sequence 216 | to be parsed.
lastThe iterator that represents the past the end position in the 223 | sequence to be parsed.
226 | 227 |

 

228 |
229 | 230 |

Valid HTML 4.01 Transitional

233 | 234 |

Revised 235 | 16 February, 2008

236 | 237 |

Copyright © 2001 John R. Bandela

238 | 239 |

Distributed under the Boost Software License, Version 1.0. (See 240 | accompanying file LICENSE_1_0.txt or 241 | copy at http://www.boost.org/LICENSE_1_0.txt)

243 | 244 | 245 | -------------------------------------------------------------------------------- /doc/tokenizerfunction.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | TokenizerFunction Concept 11 | 12 | 13 | 15 |

C++ Boost

16 | 17 |

TokenizerFunction Concept

18 | 19 |

A TokenizerFunction is a functor whose purpose is to parse a given 20 | sequence until exactly 1 token has been found or the end is reached. It 21 | then updates the token, and informs the caller of the location in the 22 | sequence of the next element immediately after the last element of the 23 | sequence that was parsed for the current token.

24 | 25 |

Refinement of

26 | 27 |

Assignable, CopyConstructable

28 | 29 |

Notation

30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 55 | 56 | 57 | 58 | 59 | 60 | 62 | 63 |
XA type that is a model of TokenizerFunction
funcObject of type X
tokObject of Token
nextiterator that points to the first unparsed element of the sequence 54 | being parsed
enditerator that points to the past the end of the sequence being 61 | parsed
64 | 65 |

Definitions

66 | 67 |

A token is the result of parsing a sequence.

68 | 69 |

Valid expressions

70 | 71 |

In addition to the expression in Assignable and CopyConstructable the 72 | following expressions are valid

73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 |
NameExpressionReturn type
Functorfunc(next, end, tok)bool
resetreset()void
99 | 100 |

Expression semantics

101 | 102 |

In addition to the expression semantics in Assignable and 103 | CopyConstructable, TokenizerFunction has the following expression 104 | semantcs

105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 127 | 128 | 130 | 131 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 145 | 146 | 147 | 148 |
NameExpressionPreconditionSemanticsPostcondition
operator()func(next, end, tok)next and end are valid iterators to the same 125 | sequence. next is a reference the function is free to modify. tok is 126 | constructed.The return value indicates whether a new token was found in the 129 | sequence [next,end)If the return value is true, the new token is assigned to tok. next 132 | is always updated to the position where parsing should start on the 133 | subsequent call.
resetreset()NoneClears out all state variables that are used by the object in 144 | parsing the current sequence.A new sequence to parse can be given.
149 | 150 |

Complexity guarantees

151 | 152 |

No guarantees. Models of TokenizerFunction are free to define their own 153 | complexity

154 | 155 |

Models

156 | 157 |

escaped_list_separator

158 | 159 |

offset_separator

160 | 161 |

char_delimiters_separator

163 | 164 |

 

165 |
166 | 167 |

Valid HTML 4.01 Transitional

170 | 171 |

Revised 172 | 25 173 | December, 2006

174 | 175 |

Copyright © 2001 John R. Bandela

176 | 177 |

Distributed under the Boost Software License, Version 1.0. (See 178 | accompanying file LICENSE_1_0.txt or 179 | copy at http://www.boost.org/LICENSE_1_0.txt)

181 | 182 | 183 | -------------------------------------------------------------------------------- /example/Jamfile.v2: -------------------------------------------------------------------------------- 1 | # Boost.Tokenizer Library example Jamfile 2 | # 3 | # Copyright (c) 2008 James E. King III 4 | # 5 | # Distributed under the Boost Software License, Version 1.0. (See accompany- 6 | # ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 | 8 | import testing ; 9 | 10 | project : requirements /boost/tokenizer//boost_tokenizer ; 11 | 12 | test-suite "tokenizer-examples" 13 | : [ run char_sep_example_1.cpp ] 14 | [ run char_sep_example_2.cpp ] 15 | [ run char_sep_example_3.cpp ] 16 | ; 17 | 18 | -------------------------------------------------------------------------------- /example/char_sep_example_1.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright Jeremy Siek 2002. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // Sample output: 8 | // 9 | // 10 | 11 | // char_sep_example_1.cpp 12 | #include 13 | #include 14 | #include 15 | 16 | int main() 17 | { 18 | std::string str = ";;Hello|world||-foo--bar;yow;baz|"; 19 | typedef boost::tokenizer > 20 | tokenizer; 21 | boost::char_separator sep("-;|"); 22 | tokenizer tokens(str, sep); 23 | for (tokenizer::iterator tok_iter = tokens.begin(); 24 | tok_iter != tokens.end(); ++tok_iter) 25 | std::cout << "<" << *tok_iter << "> "; 26 | std::cout << "\n"; 27 | return EXIT_SUCCESS; 28 | } 29 | -------------------------------------------------------------------------------- /example/char_sep_example_2.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright Jeremy Siek 2002. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // Sample output: 8 | // 9 | // <> <> <|> <|> <> <|> <> <> <|> <> 10 | 11 | // char_sep_example_2.cpp 12 | #include 13 | #include 14 | #include 15 | 16 | int main() 17 | { 18 | std::string str = ";;Hello|world||-foo--bar;yow;baz|"; 19 | 20 | typedef boost::tokenizer > 21 | tokenizer; 22 | boost::char_separator sep("-;", "|", boost::keep_empty_tokens); 23 | tokenizer tokens(str, sep); 24 | for (tokenizer::iterator tok_iter = tokens.begin(); 25 | tok_iter != tokens.end(); ++tok_iter) 26 | std::cout << "<" << *tok_iter << "> "; 27 | std::cout << "\n"; 28 | return EXIT_SUCCESS; 29 | } 30 | -------------------------------------------------------------------------------- /example/char_sep_example_3.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright Jeremy Siek 2002. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // Sample output: 8 | // <,> 9 | 10 | // char_sep_example_3.cpp 11 | #include 12 | #include 13 | #include 14 | 15 | int main() 16 | { 17 | std::string str = "This is, a test"; 18 | typedef boost::tokenizer > Tok; 19 | boost::char_separator sep; // default constructed 20 | Tok tok(str, sep); 21 | for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) 22 | std::cout << "<" << *tok_iter << "> "; 23 | std::cout << "\n"; 24 | return EXIT_SUCCESS; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /include/boost/token_functions.hpp: -------------------------------------------------------------------------------- 1 | // Boost token_functions.hpp ------------------------------------------------// 2 | 3 | // Copyright John R. Bandela 2001. 4 | 5 | // Distributed under the Boost Software License, Version 1.0. (See 6 | // accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | // See http://www.boost.org/libs/tokenizer/ for documentation. 10 | 11 | // Revision History: 12 | // 01 Oct 2004 Joaquin M Lopez Munoz 13 | // Workaround for a problem with string::assign in msvc-stlport 14 | // 06 Apr 2004 John Bandela 15 | // Fixed a bug involving using char_delimiter with a true input iterator 16 | // 28 Nov 2003 Robert Zeh and John Bandela 17 | // Converted into "fast" functions that avoid using += when 18 | // the supplied iterator isn't an input_iterator; based on 19 | // some work done at Archelon and a version that was checked into 20 | // the boost CVS for a short period of time. 21 | // 20 Feb 2002 John Maddock 22 | // Removed using namespace std declarations and added 23 | // workaround for BOOST_NO_STDC_NAMESPACE (the library 24 | // can be safely mixed with regex). 25 | // 06 Feb 2002 Jeremy Siek 26 | // Added char_separator. 27 | // 02 Feb 2002 Jeremy Siek 28 | // Removed tabs and a little cleanup. 29 | 30 | 31 | #ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ 32 | #define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include // for find_if 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #if !defined(BOOST_NO_CWCTYPE) 46 | #include 47 | #endif 48 | 49 | // 50 | // the following must not be macros if we are to prefix them 51 | // with std:: (they shouldn't be macros anyway...) 52 | // 53 | #ifdef ispunct 54 | # undef ispunct 55 | #endif 56 | #ifdef iswpunct 57 | # undef iswpunct 58 | #endif 59 | #ifdef isspace 60 | # undef isspace 61 | #endif 62 | #ifdef iswspace 63 | # undef iswspace 64 | #endif 65 | // 66 | // fix namespace problems: 67 | // 68 | #ifdef BOOST_NO_STDC_NAMESPACE 69 | namespace std{ 70 | using ::ispunct; 71 | using ::isspace; 72 | #if !defined(BOOST_NO_CWCTYPE) 73 | using ::iswpunct; 74 | using ::iswspace; 75 | #endif 76 | } 77 | #endif 78 | 79 | namespace boost{ 80 | //=========================================================================== 81 | // The escaped_list_separator class. Which is a model of TokenizerFunction 82 | // An escaped list is a super-set of what is commonly known as a comma 83 | // separated value (csv) list.It is separated into fields by a comma or 84 | // other character. If the delimiting character is inside quotes, then it is 85 | // counted as a regular character.To allow for embedded quotes in a field, 86 | // there can be escape sequences using the \ much like C. 87 | // The role of the comma, the quotation mark, and the escape 88 | // character (backslash \), can be assigned to other characters. 89 | 90 | struct escaped_list_error : public std::runtime_error{ 91 | escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { } 92 | }; 93 | 94 | 95 | // The out of the box GCC 2.95 on cygwin does not have a char_traits class. 96 | // MSVC does not like the following typename 97 | template ::traits_type > 99 | class escaped_list_separator { 100 | 101 | private: 102 | typedef std::basic_string string_type; 103 | struct char_eq { 104 | Char e_; 105 | char_eq(Char e):e_(e) { } 106 | bool operator()(Char c) { 107 | return Traits::eq(e_,c); 108 | } 109 | }; 110 | string_type escape_; 111 | string_type c_; 112 | string_type quote_; 113 | bool last_; 114 | 115 | bool is_escape(Char e) { 116 | char_eq f(e); 117 | return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end(); 118 | } 119 | bool is_c(Char e) { 120 | char_eq f(e); 121 | return std::find_if(c_.begin(),c_.end(),f)!=c_.end(); 122 | } 123 | bool is_quote(Char e) { 124 | char_eq f(e); 125 | return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end(); 126 | } 127 | template 128 | void do_escape(iterator& next,iterator end,Token& tok) { 129 | if (++next == end) 130 | BOOST_THROW_EXCEPTION(escaped_list_error(std::string("cannot end with escape"))); 131 | if (Traits::eq(*next,'n')) { 132 | tok+='\n'; 133 | return; 134 | } 135 | else if (is_quote(*next)) { 136 | tok+=*next; 137 | return; 138 | } 139 | else if (is_c(*next)) { 140 | tok+=*next; 141 | return; 142 | } 143 | else if (is_escape(*next)) { 144 | tok+=*next; 145 | return; 146 | } 147 | else 148 | BOOST_THROW_EXCEPTION(escaped_list_error(std::string("unknown escape sequence"))); 149 | } 150 | 151 | public: 152 | 153 | explicit escaped_list_separator(Char e = '\\', 154 | Char c = ',',Char q = '\"') 155 | : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { } 156 | 157 | escaped_list_separator(string_type e, string_type c, string_type q) 158 | : escape_(e), c_(c), quote_(q), last_(false) { } 159 | 160 | void reset() {last_=false;} 161 | 162 | template 163 | bool operator()(InputIterator& next,InputIterator end,Token& tok) { 164 | bool bInQuote = false; 165 | tok = Token(); 166 | 167 | if (next == end) { 168 | if (last_) { 169 | last_ = false; 170 | return true; 171 | } 172 | else 173 | return false; 174 | } 175 | last_ = false; 176 | for (;next != end;++next) { 177 | if (is_escape(*next)) { 178 | do_escape(next,end,tok); 179 | } 180 | else if (is_c(*next)) { 181 | if (!bInQuote) { 182 | // If we are not in quote, then we are done 183 | ++next; 184 | // The last character was a c, that means there is 185 | // 1 more blank field 186 | last_ = true; 187 | return true; 188 | } 189 | else tok+=*next; 190 | } 191 | else if (is_quote(*next)) { 192 | bInQuote=!bInQuote; 193 | } 194 | else { 195 | tok += *next; 196 | } 197 | } 198 | return true; 199 | } 200 | }; 201 | 202 | //=========================================================================== 203 | // The classes here are used by offset_separator and char_separator to implement 204 | // faster assigning of tokens using assign instead of += 205 | 206 | namespace tokenizer_detail { 207 | //=========================================================================== 208 | // Tokenizer was broken for wide character separators, at least on Windows, since 209 | // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts 210 | // if higher values are passed in. The traits extension class should take care of this. 211 | // Assuming that the conditional will always get optimized out in the function 212 | // implementations, argument types are not a problem since both forms of character classifiers 213 | // expect an int. 214 | 215 | #if !defined(BOOST_NO_CWCTYPE) 216 | template 217 | struct traits_extension_details : public traits { 218 | typedef typename traits::char_type char_type; 219 | static bool isspace(char_type c) 220 | { 221 | return std::iswspace(c) != 0; 222 | } 223 | static bool ispunct(char_type c) 224 | { 225 | return std::iswpunct(c) != 0; 226 | } 227 | }; 228 | 229 | template 230 | struct traits_extension_details : public traits { 231 | typedef typename traits::char_type char_type; 232 | static bool isspace(char_type c) 233 | { 234 | return std::isspace(c) != 0; 235 | } 236 | static bool ispunct(char_type c) 237 | { 238 | return std::ispunct(c) != 0; 239 | } 240 | }; 241 | #endif 242 | 243 | 244 | // In case there is no cwctype header, we implement the checks manually. 245 | // We make use of the fact that the tested categories should fit in ASCII. 246 | template 247 | struct traits_extension : public traits { 248 | typedef typename traits::char_type char_type; 249 | static bool isspace(char_type c) 250 | { 251 | #if !defined(BOOST_NO_CWCTYPE) 252 | return traits_extension_details::isspace(c); 253 | #else 254 | return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0; 255 | #endif 256 | } 257 | 258 | static bool ispunct(char_type c) 259 | { 260 | #if !defined(BOOST_NO_CWCTYPE) 261 | return traits_extension_details::ispunct(c); 262 | #else 263 | return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0; 264 | #endif 265 | } 266 | }; 267 | 268 | // The assign_or_plus_equal struct contains functions that implement 269 | // assign, +=, and clearing based on the iterator type. The 270 | // generic case does nothing for plus_equal and clearing, while 271 | // passing through the call for assign. 272 | // 273 | // When an input iterator is being used, the situation is reversed. 274 | // The assign method does nothing, plus_equal invokes operator +=, 275 | // and the clearing method sets the supplied token to the default 276 | // token constructor's result. 277 | // 278 | 279 | template 280 | struct assign_or_plus_equal { 281 | template 282 | static void assign(Iterator b, Iterator e, Token &t) { 283 | t.assign(b, e); 284 | } 285 | 286 | template 287 | static void plus_equal(Token &, const Value &) { } 288 | 289 | // If we are doing an assign, there is no need for the 290 | // the clear. 291 | // 292 | template 293 | static void clear(Token &) { } 294 | }; 295 | 296 | template <> 297 | struct assign_or_plus_equal { 298 | template 299 | static void assign(Iterator , Iterator , Token &) { } 300 | template 301 | static void plus_equal(Token &t, const Value &v) { 302 | t += v; 303 | } 304 | template 305 | static void clear(Token &t) { 306 | t = Token(); 307 | } 308 | }; 309 | 310 | 311 | template 312 | struct pointer_iterator_category{ 313 | typedef std::random_access_iterator_tag type; 314 | }; 315 | 316 | 317 | template 318 | struct class_iterator_category{ 319 | typedef typename Iterator::iterator_category type; 320 | }; 321 | 322 | 323 | 324 | // This portably gets the iterator_tag without partial template specialization 325 | template 326 | struct get_iterator_category{ 327 | typedef typename conditional::value, 328 | pointer_iterator_category, 329 | class_iterator_category 330 | >::type cat; 331 | 332 | typedef typename cat::type iterator_category; 333 | }; 334 | 335 | 336 | } // namespace tokenizer_detail 337 | 338 | 339 | //=========================================================================== 340 | // The offset_separator class, which is a model of TokenizerFunction. 341 | // Offset breaks a string into tokens based on a range of offsets 342 | 343 | class offset_separator { 344 | private: 345 | 346 | std::vector offsets_; 347 | unsigned int current_offset_; 348 | bool wrap_offsets_; 349 | bool return_partial_last_; 350 | 351 | public: 352 | template 353 | offset_separator(Iter begin, Iter end, bool wrap_offsets = true, 354 | bool return_partial_last = true) 355 | : offsets_(begin,end), current_offset_(0), 356 | wrap_offsets_(wrap_offsets), 357 | return_partial_last_(return_partial_last) { } 358 | 359 | offset_separator() 360 | : offsets_(1,1), current_offset_(), 361 | wrap_offsets_(true), return_partial_last_(true) { } 362 | 363 | void reset() { 364 | current_offset_ = 0; 365 | } 366 | 367 | template 368 | bool operator()(InputIterator& next, InputIterator end, Token& tok) 369 | { 370 | typedef tokenizer_detail::assign_or_plus_equal< 371 | BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category< 372 | InputIterator 373 | >::iterator_category 374 | > assigner; 375 | 376 | BOOST_ASSERT(!offsets_.empty()); 377 | 378 | assigner::clear(tok); 379 | InputIterator start(next); 380 | 381 | if (next == end) 382 | return false; 383 | 384 | if (current_offset_ == offsets_.size()) 385 | { 386 | if (wrap_offsets_) 387 | current_offset_=0; 388 | else 389 | return false; 390 | } 391 | 392 | int c = offsets_[current_offset_]; 393 | int i = 0; 394 | for (; i < c; ++i) { 395 | if (next == end)break; 396 | assigner::plus_equal(tok,*next++); 397 | } 398 | assigner::assign(start,next,tok); 399 | 400 | if (!return_partial_last_) 401 | if (i < (c-1) ) 402 | return false; 403 | 404 | ++current_offset_; 405 | return true; 406 | } 407 | }; 408 | 409 | 410 | //=========================================================================== 411 | // The char_separator class breaks a sequence of characters into 412 | // tokens based on the character delimiters (very much like bad old 413 | // strtok). A delimiter character can either be kept or dropped. A 414 | // kept delimiter shows up as an output token, whereas a dropped 415 | // delimiter does not. 416 | 417 | // This class replaces the char_delimiters_separator class. The 418 | // constructor for the char_delimiters_separator class was too 419 | // confusing and needed to be deprecated. However, because of the 420 | // default arguments to the constructor, adding the new constructor 421 | // would cause ambiguity, so instead I deprecated the whole class. 422 | // The implementation of the class was also simplified considerably. 423 | 424 | enum empty_token_policy { drop_empty_tokens, keep_empty_tokens }; 425 | 426 | // The out of the box GCC 2.95 on cygwin does not have a char_traits class. 427 | template ::traits_type > 429 | class char_separator 430 | { 431 | typedef tokenizer_detail::traits_extension Traits; 432 | typedef std::basic_string string_type; 433 | public: 434 | explicit 435 | char_separator(const Char* dropped_delims, 436 | const Char* kept_delims = 0, 437 | empty_token_policy empty_tokens = drop_empty_tokens) 438 | : m_dropped_delims(dropped_delims), 439 | m_use_ispunct(false), 440 | m_use_isspace(false), 441 | m_empty_tokens(empty_tokens), 442 | m_output_done(false) 443 | { 444 | // Borland workaround 445 | if (kept_delims) 446 | m_kept_delims = kept_delims; 447 | } 448 | 449 | // use ispunct() for kept delimiters and isspace for dropped. 450 | explicit 451 | char_separator() 452 | : m_use_ispunct(true), 453 | m_use_isspace(true), 454 | m_empty_tokens(drop_empty_tokens), 455 | m_output_done(false) { } 456 | 457 | void reset() { } 458 | 459 | template 460 | bool operator()(InputIterator& next, InputIterator end, Token& tok) 461 | { 462 | typedef tokenizer_detail::assign_or_plus_equal< 463 | BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category< 464 | InputIterator 465 | >::iterator_category 466 | > assigner; 467 | 468 | assigner::clear(tok); 469 | 470 | // skip past all dropped_delims 471 | if (m_empty_tokens == drop_empty_tokens) 472 | for (; next != end && is_dropped(*next); ++next) 473 | { } 474 | 475 | InputIterator start(next); 476 | 477 | if (m_empty_tokens == drop_empty_tokens) { 478 | 479 | if (next == end) 480 | return false; 481 | 482 | 483 | // if we are on a kept_delims move past it and stop 484 | if (is_kept(*next)) { 485 | assigner::plus_equal(tok,*next); 486 | ++next; 487 | } else 488 | // append all the non delim characters 489 | for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next) 490 | assigner::plus_equal(tok,*next); 491 | } 492 | else { // m_empty_tokens == keep_empty_tokens 493 | 494 | // Handle empty token at the end 495 | if (next == end) 496 | { 497 | if (m_output_done == false) 498 | { 499 | m_output_done = true; 500 | assigner::assign(start,next,tok); 501 | return true; 502 | } 503 | else 504 | return false; 505 | } 506 | 507 | if (is_kept(*next)) { 508 | if (m_output_done == false) 509 | m_output_done = true; 510 | else { 511 | assigner::plus_equal(tok,*next); 512 | ++next; 513 | m_output_done = false; 514 | } 515 | } 516 | else if (m_output_done == false && is_dropped(*next)) { 517 | m_output_done = true; 518 | } 519 | else { 520 | if (is_dropped(*next)) 521 | start=++next; 522 | for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next) 523 | assigner::plus_equal(tok,*next); 524 | m_output_done = true; 525 | } 526 | } 527 | assigner::assign(start,next,tok); 528 | return true; 529 | } 530 | 531 | private: 532 | string_type m_kept_delims; 533 | string_type m_dropped_delims; 534 | bool m_use_ispunct; 535 | bool m_use_isspace; 536 | empty_token_policy m_empty_tokens; 537 | bool m_output_done; 538 | 539 | bool is_kept(Char E) const 540 | { 541 | if (m_kept_delims.length()) 542 | return m_kept_delims.find(E) != string_type::npos; 543 | else if (m_use_ispunct) { 544 | return Traits::ispunct(E) != 0; 545 | } else 546 | return false; 547 | } 548 | bool is_dropped(Char E) const 549 | { 550 | if (m_dropped_delims.length()) 551 | return m_dropped_delims.find(E) != string_type::npos; 552 | else if (m_use_isspace) { 553 | return Traits::isspace(E) != 0; 554 | } else 555 | return false; 556 | } 557 | }; 558 | 559 | //=========================================================================== 560 | // The following class is DEPRECATED, use class char_separators instead. 561 | // 562 | // The char_delimiters_separator class, which is a model of 563 | // TokenizerFunction. char_delimiters_separator breaks a string 564 | // into tokens based on character delimiters. There are 2 types of 565 | // delimiters. returnable delimiters can be returned as 566 | // tokens. These are often punctuation. nonreturnable delimiters 567 | // cannot be returned as tokens. These are often whitespace 568 | 569 | // The out of the box GCC 2.95 on cygwin does not have a char_traits class. 570 | template ::traits_type > 572 | class char_delimiters_separator { 573 | private: 574 | 575 | typedef tokenizer_detail::traits_extension Traits; 576 | typedef std::basic_string string_type; 577 | string_type returnable_; 578 | string_type nonreturnable_; 579 | bool return_delims_; 580 | bool no_ispunct_; 581 | bool no_isspace_; 582 | 583 | bool is_ret(Char E)const 584 | { 585 | if (returnable_.length()) 586 | return returnable_.find(E) != string_type::npos; 587 | else{ 588 | if (no_ispunct_) {return false;} 589 | else{ 590 | int r = Traits::ispunct(E); 591 | return r != 0; 592 | } 593 | } 594 | } 595 | bool is_nonret(Char E)const 596 | { 597 | if (nonreturnable_.length()) 598 | return nonreturnable_.find(E) != string_type::npos; 599 | else{ 600 | if (no_isspace_) {return false;} 601 | else{ 602 | int r = Traits::isspace(E); 603 | return r != 0; 604 | } 605 | } 606 | } 607 | 608 | public: 609 | explicit char_delimiters_separator(bool return_delims = false, 610 | const Char* returnable = 0, 611 | const Char* nonreturnable = 0) 612 | : returnable_(returnable ? returnable : string_type().c_str()), 613 | nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()), 614 | return_delims_(return_delims), no_ispunct_(returnable!=0), 615 | no_isspace_(nonreturnable!=0) { } 616 | 617 | void reset() { } 618 | 619 | public: 620 | 621 | template 622 | bool operator()(InputIterator& next, InputIterator end,Token& tok) { 623 | tok = Token(); 624 | 625 | // skip past all nonreturnable delims 626 | // skip past the returnable only if we are not returning delims 627 | for (;next!=end && ( is_nonret(*next) || (is_ret(*next) 628 | && !return_delims_ ) );++next) { } 629 | 630 | if (next == end) { 631 | return false; 632 | } 633 | 634 | // if we are to return delims and we are one a returnable one 635 | // move past it and stop 636 | if (is_ret(*next) && return_delims_) { 637 | tok+=*next; 638 | ++next; 639 | } 640 | else 641 | // append all the non delim characters 642 | for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next) 643 | tok+=*next; 644 | 645 | 646 | return true; 647 | } 648 | }; 649 | 650 | 651 | } //namespace boost 652 | 653 | #endif 654 | -------------------------------------------------------------------------------- /include/boost/token_iterator.hpp: -------------------------------------------------------------------------------- 1 | // Boost token_iterator.hpp -------------------------------------------------// 2 | 3 | // Copyright John R. Bandela 2001 4 | // Distributed under the Boost Software License, Version 1.0. (See 5 | // accompanying file LICENSE_1_0.txt or copy at 6 | // http://www.boost.org/LICENSE_1_0.txt) 7 | 8 | // See http://www.boost.org/libs/tokenizer for documentation. 9 | 10 | // Revision History: 11 | // 16 Jul 2003 John Bandela 12 | // Allowed conversions from convertible base iterators 13 | // 03 Jul 2003 John Bandela 14 | // Converted to new iterator adapter 15 | 16 | 17 | 18 | #ifndef BOOST_TOKENIZER_POLICY_JRB070303_HPP_ 19 | #define BOOST_TOKENIZER_POLICY_JRB070303_HPP_ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | namespace boost 28 | { 29 | template 30 | class token_iterator 31 | : public iterator_facade< 32 | token_iterator 33 | , Type 34 | , typename iterators::minimum_category< 35 | forward_traversal_tag 36 | , typename iterator_traversal::type 37 | >::type 38 | , const Type& 39 | > 40 | { 41 | 42 | #ifdef __DCC__ 43 | friend class boost::iterator_core_access; 44 | #else 45 | friend class iterator_core_access; 46 | #endif 47 | TokenizerFunc f_; 48 | Iterator begin_; 49 | Iterator end_; 50 | bool valid_; 51 | Type tok_; 52 | 53 | void increment(){ 54 | BOOST_ASSERT(valid_); 55 | valid_ = f_(begin_,end_,tok_); 56 | } 57 | 58 | const Type& dereference() const { 59 | BOOST_ASSERT(valid_); 60 | return tok_; 61 | } 62 | template 63 | bool equal(const Other& a) const{ 64 | return (a.valid_ && valid_) 65 | ?( (a.begin_==begin_) && (a.end_ == end_) ) 66 | :(a.valid_==valid_); 67 | 68 | } 69 | 70 | void initialize(){ 71 | if(valid_) return; 72 | f_.reset(); 73 | valid_ = (begin_ != end_)? 74 | f_(begin_,end_,tok_):false; 75 | } 76 | public: 77 | token_iterator():begin_(),end_(),valid_(false),tok_() { } 78 | 79 | token_iterator(TokenizerFunc f, Iterator begin, Iterator e = Iterator()) 80 | : f_(f),begin_(begin),end_(e),valid_(false),tok_(){ initialize(); } 81 | 82 | token_iterator(Iterator begin, Iterator e = Iterator()) 83 | : f_(),begin_(begin),end_(e),valid_(false),tok_() {initialize();} 84 | 85 | template 86 | token_iterator( 87 | token_iterator const& t 88 | , typename enable_if_convertible::type* = 0) 89 | : f_(t.tokenizer_function()),begin_(t.base()) 90 | ,end_(t.end()),valid_(!t.at_end()),tok_(t.current_token()) {} 91 | 92 | Iterator base()const{return begin_;} 93 | 94 | Iterator end()const{return end_;} 95 | 96 | TokenizerFunc tokenizer_function()const{return f_;} 97 | 98 | Type current_token()const{return tok_;} 99 | 100 | bool at_end()const{return !valid_;} 101 | 102 | 103 | 104 | 105 | }; 106 | template < 107 | class TokenizerFunc = char_delimiters_separator, 108 | class Iterator = std::string::const_iterator, 109 | class Type = std::string 110 | > 111 | class token_iterator_generator { 112 | 113 | private: 114 | public: 115 | typedef token_iterator type; 116 | }; 117 | 118 | 119 | // Type has to be first because it needs to be explicitly specified 120 | // because there is no way the function can deduce it. 121 | template 122 | typename token_iterator_generator::type 123 | make_token_iterator(Iterator begin, Iterator end,const TokenizerFunc& fun){ 124 | typedef typename 125 | token_iterator_generator::type ret_type; 126 | return ret_type(fun,begin,end); 127 | } 128 | 129 | } // namespace boost 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /include/boost/tokenizer.hpp: -------------------------------------------------------------------------------- 1 | // Boost tokenizer.hpp -----------------------------------------------------// 2 | 3 | // (c) Copyright Jeremy Siek and John R. Bandela 2001. 4 | 5 | // Distributed under the Boost Software License, Version 1.0. (See 6 | // accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | // See http://www.boost.org/libs/tokenizer for documenation 10 | 11 | // Revision History: 12 | // 03 Jul 2003 John Bandela 13 | // Converted to new iterator adapter 14 | // 02 Feb 2002 Jeremy Siek 15 | // Removed tabs and a little cleanup. 16 | 17 | #ifndef BOOST_TOKENIZER_JRB070303_HPP_ 18 | #define BOOST_TOKENIZER_JRB070303_HPP_ 19 | 20 | #include 21 | 22 | namespace boost { 23 | 24 | 25 | //=========================================================================== 26 | // A container-view of a tokenized "sequence" 27 | template < 28 | typename TokenizerFunc = char_delimiters_separator, 29 | typename Iterator = std::string::const_iterator, 30 | typename Type = std::string 31 | > 32 | class tokenizer { 33 | private: 34 | typedef token_iterator_generator TGen; 35 | 36 | // It seems that MSVC does not like the unqualified use of iterator, 37 | // Thus we use iter internally when it is used unqualified and 38 | // the users of this class will always qualify iterator. 39 | typedef typename TGen::type iter; 40 | 41 | public: 42 | 43 | typedef iter iterator; 44 | typedef iter const_iterator; 45 | typedef Type value_type; 46 | typedef value_type& reference; 47 | typedef const value_type& const_reference; 48 | typedef value_type* pointer; 49 | typedef const pointer const_pointer; 50 | typedef void size_type; 51 | typedef void difference_type; 52 | 53 | tokenizer(Iterator first, Iterator last, 54 | const TokenizerFunc& f = TokenizerFunc()) 55 | : first_(first), last_(last), f_(f) { } 56 | 57 | template 58 | tokenizer(const Container& c) 59 | : first_(c.begin()), last_(c.end()), f_() { } 60 | 61 | template 62 | tokenizer(const Container& c,const TokenizerFunc& f) 63 | : first_(c.begin()), last_(c.end()), f_(f) { } 64 | 65 | void assign(Iterator first, Iterator last){ 66 | first_ = first; 67 | last_ = last; 68 | } 69 | 70 | void assign(Iterator first, Iterator last, const TokenizerFunc& f){ 71 | assign(first,last); 72 | f_ = f; 73 | } 74 | 75 | template 76 | void assign(const Container& c){ 77 | assign(c.begin(),c.end()); 78 | } 79 | 80 | 81 | template 82 | void assign(const Container& c, const TokenizerFunc& f){ 83 | assign(c.begin(),c.end(),f); 84 | } 85 | 86 | iter begin() const { return iter(f_,first_,last_); } 87 | iter end() const { return iter(f_,last_,last_); } 88 | 89 | private: 90 | Iterator first_; 91 | Iterator last_; 92 | TokenizerFunc f_; 93 | }; 94 | 95 | 96 | } // namespace boost 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boostorg/tokenizer/a1207c1a424ee565344fe5ef00605cb93f5210c2/index.html -------------------------------------------------------------------------------- /meta/libraries.json: -------------------------------------------------------------------------------- 1 | { 2 | "key": "tokenizer", 3 | "name": "Tokenizer", 4 | "authors": [ 5 | "John Bandela" 6 | ], 7 | "description": "Break of a string or other character sequence into a series of tokens.", 8 | "category": [ 9 | "Iterators", 10 | "String" 11 | ], 12 | "maintainers": [ 13 | "John R. Bandela " 14 | ], 15 | "cxxstd": "03" 16 | } 17 | -------------------------------------------------------------------------------- /test/Jamfile.v2: -------------------------------------------------------------------------------- 1 | #~ Copyright Rene Rivera 2008 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | import testing ; 6 | 7 | project : requirements /boost/tokenizer//boost_tokenizer ; 8 | 9 | run examples.cpp : : : /boost/array//boost_array ; 10 | run simple_example_1.cpp ; 11 | run simple_example_2.cpp ; 12 | run simple_example_3.cpp ; 13 | run simple_example_4.cpp ; 14 | run simple_example_5.cpp ; 15 | -------------------------------------------------------------------------------- /test/cmake_test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2024 Alexander Grund 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt 4 | 5 | cmake_minimum_required(VERSION 3.5...3.16) 6 | 7 | project(cmake_subdir_test LANGUAGES CXX) 8 | 9 | # Those 2 should work the same 10 | # while using find_package for the installed Boost avoids the need to manually specify dependencies 11 | if(BOOST_CI_INSTALL_TEST) 12 | find_package(boost_tokenizer REQUIRED) 13 | else() 14 | set(BOOST_INCLUDE_LIBRARIES tokenizer) 15 | add_subdirectory(../../../.. deps/boost EXCLUDE_FROM_ALL) 16 | endif() 17 | 18 | add_executable(main main.cpp) 19 | target_link_libraries(main Boost::tokenizer) 20 | 21 | enable_testing() 22 | add_test(NAME main COMMAND main) 23 | -------------------------------------------------------------------------------- /test/cmake_test/main.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright John R. Bandela 2001. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // See http://www.boost.org/libs/tokenizer for documenation 8 | 9 | // simple_example_2.cpp 10 | #include 11 | #include 12 | #include 13 | 14 | int main(){ 15 | using namespace std; 16 | using namespace boost; 17 | string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3"; 18 | tokenizer > tok(s); 19 | for(tokenizer >::iterator beg=tok.begin(); beg!=tok.end();++beg){ 20 | cout << *beg << "\n"; 21 | } 22 | return 0; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /test/examples.cpp: -------------------------------------------------------------------------------- 1 | // Boost tokenizer examples -------------------------------------------------// 2 | 3 | // (c) Copyright John R. Bandela 2001. 4 | 5 | // Distributed under the Boost Software License, Version 1.0. (See 6 | // accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | // See http://www.boost.org for updates, documentation, and revision history. 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | int main() 21 | { 22 | using namespace boost; 23 | 24 | // Use tokenizer 25 | { 26 | const std::string test_string = ";;Hello|world||-foo--bar;yow;baz|"; 27 | std::string answer[] = { "Hello", "world", "foo", "bar", "yow", "baz" }; 28 | typedef tokenizer > Tok; 29 | char_separator sep("-;|"); 30 | Tok t(test_string, sep); 31 | BOOST_TEST(std::equal(t.begin(),t.end(),answer)); 32 | } 33 | { 34 | const std::string test_string = ";;Hello|world||-foo--bar;yow;baz|"; 35 | std::string answer[] = { "", "", "Hello", "|", "world", "|", "", "|", "", 36 | "foo", "", "bar", "yow", "baz", "|", "" }; 37 | typedef tokenizer > Tok; 38 | char_separator sep("-;", "|", boost::keep_empty_tokens); 39 | Tok t(test_string, sep); 40 | BOOST_TEST(std::equal(t.begin(), t.end(), answer)); 41 | } 42 | { 43 | const std::string test_string = "This,,is, a.test.."; 44 | std::string answer[] = {"This","is","a","test"}; 45 | typedef tokenizer<> Tok; 46 | Tok t(test_string); 47 | BOOST_TEST(std::equal(t.begin(),t.end(),answer)); 48 | } 49 | 50 | { 51 | const std::string test_string = "Field 1,\"embedded,comma\",quote \\\", escape \\\\"; 52 | std::string answer[] = {"Field 1","embedded,comma","quote \""," escape \\"}; 53 | typedef tokenizer > Tok; 54 | Tok t(test_string); 55 | BOOST_TEST(std::equal(t.begin(),t.end(),answer)); 56 | } 57 | 58 | { 59 | const std::string test_string = ",1,;2\\\";3\\;,4,5^\\,\'6,7\';"; 60 | std::string answer[] = {"","1","","2\"","3;","4","5\\","6,7",""}; 61 | typedef tokenizer > Tok; 62 | escaped_list_separator sep("\\^",",;","\"\'"); 63 | Tok t(test_string,sep); 64 | BOOST_TEST(std::equal(t.begin(),t.end(),answer)); 65 | } 66 | 67 | { 68 | const std::string test_string = "12252001"; 69 | std::string answer[] = {"12","25","2001"}; 70 | typedef tokenizer Tok; 71 | boost::array offsets = {{2,2,4}}; 72 | offset_separator func(offsets.begin(),offsets.end()); 73 | Tok t(test_string,func); 74 | BOOST_TEST(std::equal(t.begin(),t.end(),answer)); 75 | } 76 | 77 | // Use token_iterator_generator 78 | { 79 | const std::string test_string = "This,,is, a.test.."; 80 | std::string answer[] = {"This","is","a","test"}; 81 | typedef token_iterator_generator >::type Iter; 82 | Iter begin = make_token_iterator(test_string.begin(), 83 | test_string.end(),char_delimiters_separator()); 84 | Iter end; 85 | BOOST_TEST(std::equal(begin,end,answer)); 86 | } 87 | 88 | { 89 | const std::string test_string = "Field 1,\"embedded,comma\",quote \\\", escape \\\\"; 90 | std::string answer[] = {"Field 1","embedded,comma","quote \""," escape \\"}; 91 | typedef token_iterator_generator >::type Iter; 92 | Iter begin = make_token_iterator(test_string.begin(), 93 | test_string.end(),escaped_list_separator()); 94 | Iter begin_c(begin); 95 | Iter end; 96 | BOOST_TEST(std::equal(begin,end,answer)); 97 | 98 | while(begin_c != end) 99 | { 100 | BOOST_TEST(begin_c.at_end() == 0); 101 | ++begin_c; 102 | } 103 | BOOST_TEST(begin_c.at_end()); 104 | } 105 | 106 | { 107 | const std::string test_string = "12252001"; 108 | std::string answer[] = {"12","25","2001"}; 109 | typedef token_iterator_generator::type Iter; 110 | boost::array offsets = {{2,2,4}}; 111 | offset_separator func(offsets.begin(),offsets.end()); 112 | Iter begin = make_token_iterator(test_string.begin(), 113 | test_string.end(),func); 114 | Iter end= make_token_iterator(test_string.end(), 115 | test_string.end(),func); 116 | BOOST_TEST(std::equal(begin,end,answer)); 117 | } 118 | 119 | // Test copying 120 | { 121 | const std::string test_string = "abcdef"; 122 | token_iterator_generator::type beg, end, other; 123 | boost::array ar = {{1,2,3}}; 124 | offset_separator f(ar.begin(),ar.end()); 125 | beg = make_token_iterator(test_string.begin(),test_string.end(),f); 126 | 127 | ++beg; 128 | other = beg; 129 | ++other; 130 | 131 | BOOST_TEST(*beg=="bc"); 132 | BOOST_TEST(*other=="def"); 133 | 134 | other = make_token_iterator(test_string.begin(), 135 | test_string.end(),f); 136 | 137 | BOOST_TEST(*other=="a"); 138 | } 139 | 140 | // Test non-default constructed char_delimiters_separator 141 | { 142 | const std::string test_string = "how,are you, doing"; 143 | std::string answer[] = {"how",",","are you",","," doing"}; 144 | tokenizer<> t(test_string,char_delimiters_separator(true,",","")); 145 | BOOST_TEST(std::equal(t.begin(),t.end(),answer)); 146 | } 147 | 148 | return boost::report_errors(); 149 | } 150 | -------------------------------------------------------------------------------- /test/simple_example_1.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright John R. Bandela 2001. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // See http://www.boost.org/libs/tokenizer for documenation 8 | 9 | 10 | // simple_example_1.cpp 11 | #include 12 | #include 13 | #include 14 | 15 | int main(){ 16 | using namespace std; 17 | using namespace boost; 18 | string s = "This is, a test"; 19 | tokenizer<> tok(s); 20 | for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){ 21 | cout << *beg << "\n"; 22 | } 23 | return 0; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /test/simple_example_2.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright John R. Bandela 2001. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // See http://www.boost.org/libs/tokenizer for documenation 8 | 9 | // simple_example_2.cpp 10 | #include 11 | #include 12 | #include 13 | 14 | int main(){ 15 | using namespace std; 16 | using namespace boost; 17 | string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3"; 18 | tokenizer > tok(s); 19 | for(tokenizer >::iterator beg=tok.begin(); beg!=tok.end();++beg){ 20 | cout << *beg << "\n"; 21 | } 22 | return 0; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /test/simple_example_3.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright John R. Bandela 2001. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // See http://www.boost.org/libs/tokenizer for documenation 8 | 9 | // simple_example_3.cpp 10 | #include 11 | #include 12 | #include 13 | 14 | int main(){ 15 | using namespace std; 16 | using namespace boost; 17 | string s = "12252001"; 18 | int offsets[] = {2,2,4}; 19 | offset_separator f(offsets, offsets+3); 20 | tokenizer tok(s,f); 21 | for(tokenizer::iterator beg=tok.begin(); beg!=tok.end();++beg){ 22 | cout << *beg << "\n"; 23 | } 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /test/simple_example_4.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright John R. Bandela 2001. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // See http://www.boost.org/libs/tokenizer for documenation 8 | 9 | // simple_example_4.cpp 10 | #include 11 | #include 12 | #include 13 | 14 | int main(){ 15 | using namespace std; 16 | using namespace boost; 17 | string s = "This is, a test"; 18 | tokenizer > tok(s); 19 | for(tokenizer >::iterator beg=tok.begin(); beg!=tok.end();++beg){ 20 | cout << *beg << "\n"; 21 | } 22 | return 0; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /test/simple_example_5.cpp: -------------------------------------------------------------------------------- 1 | // (c) Copyright John R. Bandela 2001. 2 | 3 | // Distributed under the Boost Software License, Version 1.0. (See 4 | // accompanying file LICENSE_1_0.txt or copy at 5 | // http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | // See http://www.boost.org/libs/tokenizer for documenation 8 | 9 | /// simple_example_5.cpp 10 | #include 11 | #include 12 | #include 13 | 14 | #ifdef BOOST_BORLANDC 15 | // compiler bug fix: 16 | template class boost::token_iterator_generator::type; 17 | #endif 18 | 19 | int main(){ 20 | using namespace std; 21 | using namespace boost; 22 | string s = "12252001"; 23 | int offsets[] = {2,2,4}; 24 | offset_separator f(offsets, offsets+3); 25 | typedef token_iterator_generator::type Iter; 26 | Iter beg = make_token_iterator(s.begin(),s.end(),f); 27 | Iter end = make_token_iterator(s.end(),s.end(),f); 28 | // The above statement could also have been what is below 29 | // Iter end; 30 | for(;beg!=end;++beg){ 31 | cout << *beg << "\n"; 32 | } 33 | return 0; 34 | } 35 | --------------------------------------------------------------------------------