├── CMakeLists.txt ├── README.md ├── data ├── merges.txt └── vocab.json └── src ├── GPT2Tokenizer.hpp ├── config.hpp.in ├── ctre-unicode.hpp ├── cxxopts.hpp ├── main.cpp ├── simdjson.cpp └── simdjson.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | project(GPT2) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | find_path(ORT_INCLUDE_DIR 7 | NAMES "onnxruntime_cxx_api.h" 8 | HINTS ENV "HOME" 9 | PATH_SUFFIXES onnxruntime/include/onnxruntime/core/session 10 | REQUIRED) 11 | 12 | find_library(ORT_LIBRARY 13 | NAMES "libonnxruntime.so" 14 | HINTS ENV "HOME" 15 | PATH_SUFFIXES onnxruntime/build/Linux/Release onnxruntime/build/Linux/Debug 16 | REQUIRED) 17 | 18 | include(ExternalProject) 19 | 20 | ExternalProject_Add( 21 | onnx_model 22 | DOWNLOAD_DIR ${PROJECT_SOURCE_DIR}/data 23 | URL https://github.com/onnx/models/raw/master/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx 24 | DOWNLOAD_NO_EXTRACT FALSE 25 | CONFIGURE_COMMAND "" 26 | BUILD_COMMAND "" 27 | INSTALL_COMMAND "" 28 | ) 29 | 30 | set(VOCAB_FILE ${PROJECT_SOURCE_DIR}/data/vocab.json) 31 | set(MERGES_FILE ${PROJECT_SOURCE_DIR}/data/merges.txt) 32 | set(ONNX_FILE ${PROJECT_SOURCE_DIR}/data/gpt2-lm-head-10.onnx) 33 | 34 | configure_file( 35 | "${PROJECT_SOURCE_DIR}/src/config.hpp.in" 36 | "${PROJECT_BINARY_DIR}/src/config.hpp" 37 | ) 38 | 39 | add_executable(gpt2-generate ${PROJECT_SOURCE_DIR}/src/main.cpp 40 | ${PROJECT_SOURCE_DIR}/src/simdjson.cpp) 41 | target_include_directories(gpt2-generate PRIVATE 42 | ${ORT_INCLUDE_DIR} 43 | ${PROJECT_BINARY_DIR}/src) 44 | target_link_libraries(gpt2-generate PRIVATE ${ORT_LIBRARY}) 45 | 46 | 47 | 48 | # not that this would work on a windows machine.. 49 | if (MSVC) 50 | target_compile_options(gpt2-generate PRIVATE /W4 /WX) 51 | else() 52 | target_compile_options(gpt2-generate PRIVATE -Wall -Wextra -Werror) 53 | endif() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | GPT2 2 | ------ 3 | 4 | This is a simple repo with an implementation of the GPT2 tokenizer + execution of the GPT2 ONNX model provided [here](https://github.com/onnx/models/tree/master/text/machine_comprehension/gpt-2). 5 | 6 | ## Installation 7 | The cmake script will look for [onnxruntime](https://github.com/microsoft/onnxruntime) header files and dynamic library using the repo structure based from the $HOME directory. After compiling make sure that the dynamic library can be found by the runtime, ie. set `LD_LIBRARY_PATH` accordingly. 8 | 9 | The other three dependencies are already included in this project: 10 | * [simdjson](https://github.com/simdjson/simdjson) 11 | * [ctre](https://github.com/hanickadot/compile-time-regular-expressions) 12 | * [cxxopts](https://github.com/jarro2783/cxxopts/) 13 | 14 | Compiling the binary requires a C++17 compliant compiler. 15 | Additionally the ONNX model is downloaded during the build process from the [ONNX Model Zoo](https://github.com/onnx/models) repo. The model is 634 MB large, so it may take a while to download it :) 16 | 17 | The vocabulary and merges files are provided in this repository, but were originally obtained from the [transformers](https://github.com/huggingface/transformers) repo. 18 | 19 | ```bash 20 | mkdir build && cd build 21 | cmake .. 22 | make 23 | ``` 24 | 25 | ## Usage 26 | ```bash 27 | cd build 28 | ./gpt2-generate -t "I've got a q" -n 5 29 | Prediction: "I've got a qwerty knife!!" 30 | ``` -------------------------------------------------------------------------------- /src/GPT2Tokenizer.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "ctre-unicode.hpp" 9 | #include "simdjson.h" 10 | 11 | // the code here is based on the transformers python package -> transformers.models.gpt2.GPT2Tokenizer 12 | 13 | template 14 | inline void hash_combine(std::size_t& seed, const T& v) 15 | { 16 | std::hash hasher; 17 | seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); 18 | } 19 | 20 | std::unordered_map bytes_to_unicode() { 21 | // Because I have no idea what I am doing the code_map was copy pasted from bytes_to_unicode() 22 | // definetely not crossplatform, but works on POSIX maybe 23 | static std::unordered_map code_map = {{33, "!"},{34, "\""},{35, "#"},{36, "$"},{37, "%"},{38, "&"},{39, "\'"},{40, "("},{41, ")"},{42, "*"},{43, "+"},{44, ","},{45, "-"},{46, "."},{47, "/"},{48, "0"},{49, "1"},{50, "2"},{51, "3"},{52, "4"},{53, "5"},{54, "6"},{55, "7"},{56, "8"},{57, "9"},{58, ":"},{59, ";"},{60, "<"},{61, "="},{62, ">"},{63, "?"},{64, "@"},{65, "A"},{66, "B"},{67, "C"},{68, "D"},{69, "E"},{70, "F"},{71, "G"},{72, "H"},{73, "I"},{74, "J"},{75, "K"},{76, "L"},{77, "M"},{78, "N"},{79, "O"},{80, "P"},{81, "Q"},{82, "R"},{83, "S"},{84, "T"},{85, "U"},{86, "V"},{87, "W"},{88, "X"},{89, "Y"},{90, "Z"},{91, "["},{92, "\\"},{93, "]"},{94, "^"},{95, "_"},{96, "`"},{97, "a"},{98, "b"},{99, "c"},{100, "d"},{101, "e"},{102, "f"},{103, "g"},{104, "h"},{105, "i"},{106, "j"},{107, "k"},{108, "l"},{109, "m"},{110, "n"},{111, "o"},{112, "p"},{113, "q"},{114, "r"},{115, "s"},{116, "t"},{117, "u"},{118, "v"},{119, "w"},{120, "x"},{121, "y"},{122, "z"},{123, "{"},{124, "|"},{125, "}"},{126, "~"},{161, "¡"},{162, "¢"},{163, "£"},{164, "¤"},{165, "¥"},{166, "¦"},{167, "§"},{168, "¨"},{169, "©"},{170, "ª"},{171, "«"},{172, "¬"},{174, "®"},{175, "¯"},{176, "°"},{177, "±"},{178, "²"},{179, "³"},{180, "´"},{181, "µ"},{182, "¶"},{183, "·"},{184, "¸"},{185, "¹"},{186, "º"},{187, "»"},{188, "¼"},{189, "½"},{190, "¾"},{191, "¿"},{192, "À"},{193, "Á"},{194, "Â"},{195, "Ã"},{196, "Ä"},{197, "Å"},{198, "Æ"},{199, "Ç"},{200, "È"},{201, "É"},{202, "Ê"},{203, "Ë"},{204, "Ì"},{205, "Í"},{206, "Î"},{207, "Ï"},{208, "Ð"},{209, "Ñ"},{210, "Ò"},{211, "Ó"},{212, "Ô"},{213, "Õ"},{214, "Ö"},{215, "×"},{216, "Ø"},{217, "Ù"},{218, "Ú"},{219, "Û"},{220, "Ü"},{221, "Ý"},{222, "Þ"},{223, "ß"},{224, "à"},{225, "á"},{226, "â"},{227, "ã"},{228, "ä"},{229, "å"},{230, "æ"},{231, "ç"},{232, "è"},{233, "é"},{234, "ê"},{235, "ë"},{236, "ì"},{237, "í"},{238, "î"},{239, "ï"},{240, "ð"},{241, "ñ"},{242, "ò"},{243, "ó"},{244, "ô"},{245, "õ"},{246, "ö"},{247, "÷"},{248, "ø"},{249, "ù"},{250, "ú"},{251, "û"},{252, "ü"},{253, "ý"},{254, "þ"},{255, "ÿ"},{0, "Ā"},{1, "ā"},{2, "Ă"},{3, "ă"},{4, "Ą"},{5, "ą"},{6, "Ć"},{7, "ć"},{8, "Ĉ"},{9, "ĉ"},{10, "Ċ"},{11, "ċ"},{12, "Č"},{13, "č"},{14, "Ď"},{15, "ď"},{16, "Đ"},{17, "đ"},{18, "Ē"},{19, "ē"},{20, "Ĕ"},{21, "ĕ"},{22, "Ė"},{23, "ė"},{24, "Ę"},{25, "ę"},{26, "Ě"},{27, "ě"},{28, "Ĝ"},{29, "ĝ"},{30, "Ğ"},{31, "ğ"},{32, "Ġ"},{127, "ġ"},{128, "Ģ"},{129, "ģ"},{130, "Ĥ"},{131, "ĥ"},{132, "Ħ"},{133, "ħ"},{134, "Ĩ"},{135, "ĩ"},{136, "Ī"},{137, "ī"},{138, "Ĭ"},{139, "ĭ"},{140, "Į"},{141, "į"},{142, "İ"},{143, "ı"},{144, "IJ"},{145, "ij"},{146, "Ĵ"},{147, "ĵ"},{148, "Ķ"},{149, "ķ"},{150, "ĸ"},{151, "Ĺ"},{152, "ĺ"},{153, "Ļ"},{154, "ļ"},{155, "Ľ"},{156, "ľ"},{157, "Ŀ"},{158, "ŀ"},{159, "Ł"},{160, "ł"},{173, "Ń"}}; 24 | return code_map; 25 | } 26 | 27 | std::unordered_map unicode_to_bytes() { 28 | static std::unordered_map code_map = {{"!", 33},{"\"", 34},{"#", 35},{"$", 36},{"%", 37},{"&", 38},{"\'", 39},{"(", 40},{")", 41},{"*", 42},{"+", 43},{",", 44},{"-", 45},{".", 46},{"/", 47},{"0", 48},{"1", 49},{"2", 50},{"3", 51},{"4", 52},{"5", 53},{"6", 54},{"7", 55},{"8", 56},{"9", 57},{":", 58},{";", 59},{"<", 60},{"=", 61},{">", 62},{"?", 63},{"@", 64},{"A", 65},{"B", 66},{"C", 67},{"D", 68},{"E", 69},{"F", 70},{"G", 71},{"H", 72},{"I", 73},{"J", 74},{"K", 75},{"L", 76},{"M", 77},{"N", 78},{"O", 79},{"P", 80},{"Q", 81},{"R", 82},{"S", 83},{"T", 84},{"U", 85},{"V", 86},{"W", 87},{"X", 88},{"Y", 89},{"Z", 90},{"[", 91},{"\\", 92},{"]", 93},{"^", 94},{"_", 95},{"`", 96},{"a", 97},{"b", 98},{"c", 99},{"d", 100},{"e", 101},{"f", 102},{"g", 103},{"h", 104},{"i", 105},{"j", 106},{"k", 107},{"l", 108},{"m", 109},{"n", 110},{"o", 111},{"p", 112},{"q", 113},{"r", 114},{"s", 115},{"t", 116},{"u", 117},{"v", 118},{"w", 119},{"x", 120},{"y", 121},{"z", 122},{"{", 123},{"|", 124},{"}", 125},{"~", 126},{"¡", 161},{"¢", 162},{"£", 163},{"¤", 164},{"¥", 165},{"¦", 166},{"§", 167},{"¨", 168},{"©", 169},{"ª", 170},{"«", 171},{"¬", 172},{"®", 174},{"¯", 175},{"°", 176},{"±", 177},{"²", 178},{"³", 179},{"´", 180},{"µ", 181},{"¶", 182},{"·", 183},{"¸", 184},{"¹", 185},{"º", 186},{"»", 187},{"¼", 188},{"½", 189},{"¾", 190},{"¿", 191},{"À", 192},{"Á", 193},{"Â", 194},{"Ã", 195},{"Ä", 196},{"Å", 197},{"Æ", 198},{"Ç", 199},{"È", 200},{"É", 201},{"Ê", 202},{"Ë", 203},{"Ì", 204},{"Í", 205},{"Î", 206},{"Ï", 207},{"Ð", 208},{"Ñ", 209},{"Ò", 210},{"Ó", 211},{"Ô", 212},{"Õ", 213},{"Ö", 214},{"×", 215},{"Ø", 216},{"Ù", 217},{"Ú", 218},{"Û", 219},{"Ü", 220},{"Ý", 221},{"Þ", 222},{"ß", 223},{"à", 224},{"á", 225},{"â", 226},{"ã", 227},{"ä", 228},{"å", 229},{"æ", 230},{"ç", 231},{"è", 232},{"é", 233},{"ê", 234},{"ë", 235},{"ì", 236},{"í", 237},{"î", 238},{"ï", 239},{"ð", 240},{"ñ", 241},{"ò", 242},{"ó", 243},{"ô", 244},{"õ", 245},{"ö", 246},{"÷", 247},{"ø", 248},{"ù", 249},{"ú", 250},{"û", 251},{"ü", 252},{"ý", 253},{"þ", 254},{"ÿ", 255},{"Ā", 0},{"ā", 1},{"Ă", 2},{"ă", 3},{"Ą", 4},{"ą", 5},{"Ć", 6},{"ć", 7},{"Ĉ", 8},{"ĉ", 9},{"Ċ", 10},{"ċ", 11},{"Č", 12},{"č", 13},{"Ď", 14},{"ď", 15},{"Đ", 16},{"đ", 17},{"Ē", 18},{"ē", 19},{"Ĕ", 20},{"ĕ", 21},{"Ė", 22},{"ė", 23},{"Ę", 24},{"ę", 25},{"Ě", 26},{"ě", 27},{"Ĝ", 28},{"ĝ", 29},{"Ğ", 30},{"ğ", 31},{"Ġ", 32},{"ġ", 127},{"Ģ", 128},{"ģ", 129},{"Ĥ", 130},{"ĥ", 131},{"Ħ", 132},{"ħ", 133},{"Ĩ", 134},{"ĩ", 135},{"Ī", 136},{"ī", 137},{"Ĭ", 138},{"ĭ", 139},{"Į", 140},{"į", 141},{"İ", 142},{"ı", 143},{"IJ", 144},{"ij", 145},{"Ĵ", 146},{"ĵ", 147},{"Ķ", 148},{"ķ", 149},{"ĸ", 150},{"Ĺ", 151},{"ĺ", 152},{"Ļ", 153},{"ļ", 154},{"Ľ", 155},{"ľ", 156},{"Ŀ", 157},{"ŀ", 158},{"Ł", 159},{"ł", 160},{"Ń", 173}}; 29 | return code_map; 30 | } 31 | 32 | class GPT2Tokenizer { 33 | 34 | struct PairHash 35 | { 36 | std::size_t operator()(const std::pair& p) const noexcept 37 | { 38 | std::size_t seed = 0; 39 | hash_combine(seed, p.first); 40 | hash_combine(seed, p.second); 41 | return seed; 42 | } 43 | }; 44 | 45 | using BPE = std::pair; 46 | using BPERanks = std::unordered_map; 47 | using Encoder = std::unordered_map; 48 | using Decoder = std::unordered_map; 49 | 50 | // who knows what this does 51 | static constexpr std::string_view pattern {"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+"}; 52 | 53 | public: 54 | 55 | static std::optional load(std::string_view vocab_file, std::string_view merges_file); 56 | 57 | std::vector encode(const std::string&); 58 | std::string decode(const std::vector&); 59 | std::vector tokenize(const std::string&); 60 | 61 | size_t vocab_size() const noexcept { return m_encoder.size(); } 62 | 63 | protected: 64 | 65 | GPT2Tokenizer() = default; 66 | 67 | BPERanks m_bpe_ranks; 68 | Encoder m_encoder; 69 | Decoder m_decoder; 70 | std::unordered_map m_byte_encoder; 71 | std::unordered_map m_byte_decoder; 72 | 73 | private: 74 | std::vector bpe(const std::string& token); 75 | }; 76 | 77 | std::optional GPT2Tokenizer::load(std::string_view vocab_file, std::string_view merges_file) { 78 | 79 | // load merges file 80 | std::ifstream merges_file_stream; 81 | // assuming null-terminated string 82 | merges_file_stream.open(merges_file.data()); 83 | 84 | if (!merges_file_stream.good()) { 85 | return std::nullopt; 86 | } 87 | 88 | BPERanks bpe_ranks; 89 | 90 | std::string merges_version; 91 | std::getline(merges_file_stream, merges_version); 92 | 93 | for (struct{std::string line; size_t i{0};} it; std::getline(merges_file_stream, it.line); ++it.i) { 94 | const size_t split_point = it.line.find(' '); 95 | std::pair p{{it.line.begin(), it.line.begin()+split_point}, 96 | {it.line.begin() + split_point + 1, it.line.end()}}; 97 | bpe_ranks.emplace(std::move(p), it.i); 98 | } 99 | 100 | simdjson::dom::parser parser; 101 | simdjson::dom::object object; 102 | // assuming null-terminated string 103 | simdjson::dom::element doc = parser.load(vocab_file.data()); 104 | 105 | auto error = doc.get(object); 106 | if (error) { 107 | return std::nullopt; 108 | } 109 | 110 | Encoder encoder; 111 | Decoder decoder; 112 | 113 | for (const auto& [key, value] : object) { 114 | encoder.emplace(key, value); 115 | decoder.emplace(value, key); 116 | } 117 | 118 | auto result = GPT2Tokenizer(); 119 | result.m_bpe_ranks = std::move(bpe_ranks); 120 | result.m_encoder = std::move(encoder); 121 | result.m_decoder = std::move(decoder); 122 | result.m_byte_encoder = bytes_to_unicode(); 123 | result.m_byte_decoder = unicode_to_bytes(); 124 | 125 | return result; 126 | } 127 | 128 | 129 | std::vector GPT2Tokenizer::encode(const std::string& text) { 130 | std::vector tokens = tokenize(text); 131 | std::vector token_ids; 132 | token_ids.reserve(tokens.size()); 133 | std::transform(tokens.begin(), tokens.end(), std::back_inserter(token_ids), 134 | [this](const std::string& token){ 135 | return m_encoder[token]; 136 | }); 137 | return token_ids; 138 | } 139 | 140 | size_t codepoint_length(const char c) { 141 | if((c & 0xf8) == 0xf0) return 4; 142 | else if((c & 0xf0) == 0xe0) return 3; 143 | else if((c & 0xe0) == 0xc0) return 2; 144 | else return 1; 145 | } 146 | 147 | std::string GPT2Tokenizer::decode(const std::vector& token_ids) { 148 | std::string decoded_string; 149 | for (const auto& id: token_ids) { 150 | std::string decoded_token = m_decoder[id]; 151 | for (size_t i = 0; i < decoded_token.size();) { 152 | int length = codepoint_length(decoded_token[i]); 153 | decoded_string += m_byte_decoder[decoded_token.substr(i, length)]; 154 | i+=length; 155 | } 156 | } 157 | return decoded_string; 158 | } 159 | 160 | 161 | std::vector GPT2Tokenizer::tokenize(const std::string& text) { 162 | std::vector result; 163 | for (auto match: ctre::range(text)) { 164 | std::string token = match.to_string(); 165 | std::string byte_token; 166 | for (const auto& t: token) { 167 | byte_token += m_byte_encoder[t]; 168 | } 169 | std::vector bpe_result = bpe(byte_token); 170 | result.reserve(result.size()+bpe_result.size()); 171 | result.insert(result.end(), bpe_result.begin(), bpe_result.end()); 172 | } 173 | 174 | return result; 175 | } 176 | 177 | 178 | std::vector GPT2Tokenizer::bpe(const std::string& token) { 179 | 180 | std::vector ranks; 181 | std::vector word; 182 | ranks.reserve(token.size()-1); 183 | word.reserve(token.size()); 184 | 185 | // this essentially avoids having literal spaces ' ' in a string 186 | // at the same time we fetch the ranks of the bigrams 187 | { 188 | size_t i = 0; 189 | while (true) { 190 | int length = codepoint_length(token[i]); 191 | int next_length = codepoint_length(token[i+length]); 192 | ranks.push_back( 193 | m_bpe_ranks.find({token.substr(i,length), token.substr(i+length,next_length)}) 194 | ); 195 | word.push_back(token.substr(i,length)); 196 | i+=length; 197 | if (i >= token.size()) break; 198 | if (i+next_length >= token.size()) { 199 | word.emplace_back(token.substr(i,next_length)); 200 | break; 201 | } 202 | } 203 | } 204 | 205 | while (true) { 206 | const auto bigram = std::min_element(ranks.begin(), ranks.end(), 207 | [this](const auto& lhs, const auto& rhs) -> bool { 208 | if (lhs == m_bpe_ranks.end() && lhs == m_bpe_ranks.end()) { 209 | return false; 210 | } 211 | else if (lhs == m_bpe_ranks.end() || rhs == m_bpe_ranks.end()) { 212 | return (lhs != m_bpe_ranks.end()); 213 | } 214 | else { 215 | return lhs->second < rhs->second; 216 | } 217 | }); 218 | if (*bigram == m_bpe_ranks.end()) { 219 | // could not find any matches in ranks 220 | break; 221 | } 222 | const auto [first, second] = (*bigram)->first; 223 | std::vector new_word; 224 | 225 | size_t i = 0; 226 | while (i < word.size()) { 227 | const auto wordIterator = std::find(word.begin() + i, word.end(), first); 228 | if (wordIterator == word.end()) { 229 | std::copy(word.begin() + i, word.end(), std::back_inserter(new_word)); 230 | break; 231 | } 232 | 233 | std::copy(word.begin() + i, wordIterator, std::back_inserter(new_word)); 234 | i = std::distance(word.begin(), wordIterator); 235 | 236 | if (word[i] == first && i < word.size() -1 && word[i+1] == second) { 237 | new_word.push_back(first + second); 238 | i += 2; 239 | } else { 240 | new_word.push_back(word[i]); 241 | i += 1; 242 | } 243 | } 244 | word = std::move(new_word); 245 | if (word.size() == 1) break; 246 | else { 247 | for (size_t i = 0; i < word.size()-1; ++i) { 248 | ranks[i] = m_bpe_ranks.find({word[i], word[i+1]}); 249 | } 250 | ranks.resize(word.size()-1); 251 | } 252 | } 253 | 254 | return word; 255 | } 256 | -------------------------------------------------------------------------------- /src/config.hpp.in: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static constexpr std::string_view vocab_file = "@VOCAB_FILE@"; 4 | static constexpr std::string_view merges_file = "@MERGES_FILE@"; 5 | static constexpr std::string_view model_file = "@ONNX_FILE@"; 6 | -------------------------------------------------------------------------------- /src/cxxopts.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2014, 2015, 2016, 2017 Jarryd Beck 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | */ 24 | 25 | #ifndef CXXOPTS_HPP_INCLUDED 26 | #define CXXOPTS_HPP_INCLUDED 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #ifdef __cpp_lib_optional 45 | #include 46 | #define CXXOPTS_HAS_OPTIONAL 47 | #endif 48 | 49 | #if __cplusplus >= 201603L 50 | #define CXXOPTS_NODISCARD [[nodiscard]] 51 | #else 52 | #define CXXOPTS_NODISCARD 53 | #endif 54 | 55 | #ifndef CXXOPTS_VECTOR_DELIMITER 56 | #define CXXOPTS_VECTOR_DELIMITER ',' 57 | #endif 58 | 59 | #define CXXOPTS__VERSION_MAJOR 3 60 | #define CXXOPTS__VERSION_MINOR 0 61 | #define CXXOPTS__VERSION_PATCH 0 62 | 63 | namespace cxxopts 64 | { 65 | static constexpr struct { 66 | uint8_t major, minor, patch; 67 | } version = { 68 | CXXOPTS__VERSION_MAJOR, 69 | CXXOPTS__VERSION_MINOR, 70 | CXXOPTS__VERSION_PATCH 71 | }; 72 | } // namespace cxxopts 73 | 74 | //when we ask cxxopts to use Unicode, help strings are processed using ICU, 75 | //which results in the correct lengths being computed for strings when they 76 | //are formatted for the help output 77 | //it is necessary to make sure that can be found by the 78 | //compiler, and that icu-uc is linked in to the binary. 79 | 80 | #ifdef CXXOPTS_USE_UNICODE 81 | #include 82 | 83 | namespace cxxopts 84 | { 85 | using String = icu::UnicodeString; 86 | 87 | inline 88 | String 89 | toLocalString(std::string s) 90 | { 91 | return icu::UnicodeString::fromUTF8(std::move(s)); 92 | } 93 | 94 | class UnicodeStringIterator : public 95 | std::iterator 96 | { 97 | public: 98 | 99 | UnicodeStringIterator(const icu::UnicodeString* string, int32_t pos) 100 | : s(string) 101 | , i(pos) 102 | { 103 | } 104 | 105 | value_type 106 | operator*() const 107 | { 108 | return s->char32At(i); 109 | } 110 | 111 | bool 112 | operator==(const UnicodeStringIterator& rhs) const 113 | { 114 | return s == rhs.s && i == rhs.i; 115 | } 116 | 117 | bool 118 | operator!=(const UnicodeStringIterator& rhs) const 119 | { 120 | return !(*this == rhs); 121 | } 122 | 123 | UnicodeStringIterator& 124 | operator++() 125 | { 126 | ++i; 127 | return *this; 128 | } 129 | 130 | UnicodeStringIterator 131 | operator+(int32_t v) 132 | { 133 | return UnicodeStringIterator(s, i + v); 134 | } 135 | 136 | private: 137 | const icu::UnicodeString* s; 138 | int32_t i; 139 | }; 140 | 141 | inline 142 | String& 143 | stringAppend(String&s, String a) 144 | { 145 | return s.append(std::move(a)); 146 | } 147 | 148 | inline 149 | String& 150 | stringAppend(String& s, size_t n, UChar32 c) 151 | { 152 | for (size_t i = 0; i != n; ++i) 153 | { 154 | s.append(c); 155 | } 156 | 157 | return s; 158 | } 159 | 160 | template 161 | String& 162 | stringAppend(String& s, Iterator begin, Iterator end) 163 | { 164 | while (begin != end) 165 | { 166 | s.append(*begin); 167 | ++begin; 168 | } 169 | 170 | return s; 171 | } 172 | 173 | inline 174 | size_t 175 | stringLength(const String& s) 176 | { 177 | return s.length(); 178 | } 179 | 180 | inline 181 | std::string 182 | toUTF8String(const String& s) 183 | { 184 | std::string result; 185 | s.toUTF8String(result); 186 | 187 | return result; 188 | } 189 | 190 | inline 191 | bool 192 | empty(const String& s) 193 | { 194 | return s.isEmpty(); 195 | } 196 | } 197 | 198 | namespace std 199 | { 200 | inline 201 | cxxopts::UnicodeStringIterator 202 | begin(const icu::UnicodeString& s) 203 | { 204 | return cxxopts::UnicodeStringIterator(&s, 0); 205 | } 206 | 207 | inline 208 | cxxopts::UnicodeStringIterator 209 | end(const icu::UnicodeString& s) 210 | { 211 | return cxxopts::UnicodeStringIterator(&s, s.length()); 212 | } 213 | } 214 | 215 | //ifdef CXXOPTS_USE_UNICODE 216 | #else 217 | 218 | namespace cxxopts 219 | { 220 | using String = std::string; 221 | 222 | template 223 | T 224 | toLocalString(T&& t) 225 | { 226 | return std::forward(t); 227 | } 228 | 229 | inline 230 | size_t 231 | stringLength(const String& s) 232 | { 233 | return s.length(); 234 | } 235 | 236 | inline 237 | String& 238 | stringAppend(String&s, const String& a) 239 | { 240 | return s.append(a); 241 | } 242 | 243 | inline 244 | String& 245 | stringAppend(String& s, size_t n, char c) 246 | { 247 | return s.append(n, c); 248 | } 249 | 250 | template 251 | String& 252 | stringAppend(String& s, Iterator begin, Iterator end) 253 | { 254 | return s.append(begin, end); 255 | } 256 | 257 | template 258 | std::string 259 | toUTF8String(T&& t) 260 | { 261 | return std::forward(t); 262 | } 263 | 264 | inline 265 | bool 266 | empty(const std::string& s) 267 | { 268 | return s.empty(); 269 | } 270 | } // namespace cxxopts 271 | 272 | //ifdef CXXOPTS_USE_UNICODE 273 | #endif 274 | 275 | namespace cxxopts 276 | { 277 | namespace 278 | { 279 | #ifdef _WIN32 280 | const std::string LQUOTE("\'"); 281 | const std::string RQUOTE("\'"); 282 | #else 283 | const std::string LQUOTE("‘"); 284 | const std::string RQUOTE("’"); 285 | #endif 286 | } // namespace 287 | 288 | #if defined(__GNUC__) 289 | // GNU GCC with -Weffc++ will issue a warning regarding the upcoming class, we want to silence it: 290 | // warning: base class 'class std::enable_shared_from_this' has accessible non-virtual destructor 291 | #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" 292 | #pragma GCC diagnostic push 293 | // This will be ignored under other compilers like LLVM clang. 294 | #endif 295 | class Value : public std::enable_shared_from_this 296 | { 297 | public: 298 | 299 | virtual ~Value() = default; 300 | 301 | virtual 302 | std::shared_ptr 303 | clone() const = 0; 304 | 305 | virtual void 306 | parse(const std::string& text) const = 0; 307 | 308 | virtual void 309 | parse() const = 0; 310 | 311 | virtual bool 312 | has_default() const = 0; 313 | 314 | virtual bool 315 | is_container() const = 0; 316 | 317 | virtual bool 318 | has_implicit() const = 0; 319 | 320 | virtual std::string 321 | get_default_value() const = 0; 322 | 323 | virtual std::string 324 | get_implicit_value() const = 0; 325 | 326 | virtual std::shared_ptr 327 | default_value(const std::string& value) = 0; 328 | 329 | virtual std::shared_ptr 330 | implicit_value(const std::string& value) = 0; 331 | 332 | virtual std::shared_ptr 333 | no_implicit_value() = 0; 334 | 335 | virtual bool 336 | is_boolean() const = 0; 337 | }; 338 | #if defined(__GNUC__) 339 | #pragma GCC diagnostic pop 340 | #endif 341 | class OptionException : public std::exception 342 | { 343 | public: 344 | explicit OptionException(std::string message) 345 | : m_message(std::move(message)) 346 | { 347 | } 348 | 349 | CXXOPTS_NODISCARD 350 | const char* 351 | what() const noexcept override 352 | { 353 | return m_message.c_str(); 354 | } 355 | 356 | private: 357 | std::string m_message; 358 | }; 359 | 360 | class OptionSpecException : public OptionException 361 | { 362 | public: 363 | 364 | explicit OptionSpecException(const std::string& message) 365 | : OptionException(message) 366 | { 367 | } 368 | }; 369 | 370 | class OptionParseException : public OptionException 371 | { 372 | public: 373 | explicit OptionParseException(const std::string& message) 374 | : OptionException(message) 375 | { 376 | } 377 | }; 378 | 379 | class option_exists_error : public OptionSpecException 380 | { 381 | public: 382 | explicit option_exists_error(const std::string& option) 383 | : OptionSpecException("Option " + LQUOTE + option + RQUOTE + " already exists") 384 | { 385 | } 386 | }; 387 | 388 | class invalid_option_format_error : public OptionSpecException 389 | { 390 | public: 391 | explicit invalid_option_format_error(const std::string& format) 392 | : OptionSpecException("Invalid option format " + LQUOTE + format + RQUOTE) 393 | { 394 | } 395 | }; 396 | 397 | class option_syntax_exception : public OptionParseException { 398 | public: 399 | explicit option_syntax_exception(const std::string& text) 400 | : OptionParseException("Argument " + LQUOTE + text + RQUOTE + 401 | " starts with a - but has incorrect syntax") 402 | { 403 | } 404 | }; 405 | 406 | class option_not_exists_exception : public OptionParseException 407 | { 408 | public: 409 | explicit option_not_exists_exception(const std::string& option) 410 | : OptionParseException("Option " + LQUOTE + option + RQUOTE + " does not exist") 411 | { 412 | } 413 | }; 414 | 415 | class missing_argument_exception : public OptionParseException 416 | { 417 | public: 418 | explicit missing_argument_exception(const std::string& option) 419 | : OptionParseException( 420 | "Option " + LQUOTE + option + RQUOTE + " is missing an argument" 421 | ) 422 | { 423 | } 424 | }; 425 | 426 | class option_requires_argument_exception : public OptionParseException 427 | { 428 | public: 429 | explicit option_requires_argument_exception(const std::string& option) 430 | : OptionParseException( 431 | "Option " + LQUOTE + option + RQUOTE + " requires an argument" 432 | ) 433 | { 434 | } 435 | }; 436 | 437 | class option_not_has_argument_exception : public OptionParseException 438 | { 439 | public: 440 | option_not_has_argument_exception 441 | ( 442 | const std::string& option, 443 | const std::string& arg 444 | ) 445 | : OptionParseException( 446 | "Option " + LQUOTE + option + RQUOTE + 447 | " does not take an argument, but argument " + 448 | LQUOTE + arg + RQUOTE + " given" 449 | ) 450 | { 451 | } 452 | }; 453 | 454 | class option_not_present_exception : public OptionParseException 455 | { 456 | public: 457 | explicit option_not_present_exception(const std::string& option) 458 | : OptionParseException("Option " + LQUOTE + option + RQUOTE + " not present") 459 | { 460 | } 461 | }; 462 | 463 | class option_has_no_value_exception : public OptionException 464 | { 465 | public: 466 | explicit option_has_no_value_exception(const std::string& option) 467 | : OptionException( 468 | option.empty() ? 469 | ("Option " + LQUOTE + option + RQUOTE + " has no value") : 470 | "Option has no value") 471 | { 472 | } 473 | }; 474 | 475 | class argument_incorrect_type : public OptionParseException 476 | { 477 | public: 478 | explicit argument_incorrect_type 479 | ( 480 | const std::string& arg 481 | ) 482 | : OptionParseException( 483 | "Argument " + LQUOTE + arg + RQUOTE + " failed to parse" 484 | ) 485 | { 486 | } 487 | }; 488 | 489 | class option_required_exception : public OptionParseException 490 | { 491 | public: 492 | explicit option_required_exception(const std::string& option) 493 | : OptionParseException( 494 | "Option " + LQUOTE + option + RQUOTE + " is required but not present" 495 | ) 496 | { 497 | } 498 | }; 499 | 500 | template 501 | void throw_or_mimic(const std::string& text) 502 | { 503 | static_assert(std::is_base_of::value, 504 | "throw_or_mimic only works on std::exception and " 505 | "deriving classes"); 506 | 507 | #ifndef CXXOPTS_NO_EXCEPTIONS 508 | // If CXXOPTS_NO_EXCEPTIONS is not defined, just throw 509 | throw T{text}; 510 | #else 511 | // Otherwise manually instantiate the exception, print what() to stderr, 512 | // and exit 513 | T exception{text}; 514 | std::cerr << exception.what() << std::endl; 515 | std::exit(EXIT_FAILURE); 516 | #endif 517 | } 518 | 519 | namespace values 520 | { 521 | namespace 522 | { 523 | std::basic_regex integer_pattern 524 | ("(-)?(0x)?([0-9a-zA-Z]+)|((0x)?0)"); 525 | std::basic_regex truthy_pattern 526 | ("(t|T)(rue)?|1"); 527 | std::basic_regex falsy_pattern 528 | ("(f|F)(alse)?|0"); 529 | } // namespace 530 | 531 | namespace detail 532 | { 533 | template 534 | struct SignedCheck; 535 | 536 | template 537 | struct SignedCheck 538 | { 539 | template 540 | void 541 | operator()(bool negative, U u, const std::string& text) 542 | { 543 | if (negative) 544 | { 545 | if (u > static_cast((std::numeric_limits::min)())) 546 | { 547 | throw_or_mimic(text); 548 | } 549 | } 550 | else 551 | { 552 | if (u > static_cast((std::numeric_limits::max)())) 553 | { 554 | throw_or_mimic(text); 555 | } 556 | } 557 | } 558 | }; 559 | 560 | template 561 | struct SignedCheck 562 | { 563 | template 564 | void 565 | operator()(bool, U, const std::string&) const {} 566 | }; 567 | 568 | template 569 | void 570 | check_signed_range(bool negative, U value, const std::string& text) 571 | { 572 | SignedCheck::is_signed>()(negative, value, text); 573 | } 574 | } // namespace detail 575 | 576 | template 577 | void 578 | checked_negate(R& r, T&& t, const std::string&, std::true_type) 579 | { 580 | // if we got to here, then `t` is a positive number that fits into 581 | // `R`. So to avoid MSVC C4146, we first cast it to `R`. 582 | // See https://github.com/jarro2783/cxxopts/issues/62 for more details. 583 | r = static_cast(-static_cast(t-1)-1); 584 | } 585 | 586 | template 587 | void 588 | checked_negate(R&, T&&, const std::string& text, std::false_type) 589 | { 590 | throw_or_mimic(text); 591 | } 592 | 593 | template 594 | void 595 | integer_parser(const std::string& text, T& value) 596 | { 597 | std::smatch match; 598 | std::regex_match(text, match, integer_pattern); 599 | 600 | if (match.length() == 0) 601 | { 602 | throw_or_mimic(text); 603 | } 604 | 605 | if (match.length(4) > 0) 606 | { 607 | value = 0; 608 | return; 609 | } 610 | 611 | using US = typename std::make_unsigned::type; 612 | 613 | constexpr bool is_signed = std::numeric_limits::is_signed; 614 | const bool negative = match.length(1) > 0; 615 | const uint8_t base = match.length(2) > 0 ? 16 : 10; 616 | 617 | auto value_match = match[3]; 618 | 619 | US result = 0; 620 | 621 | for (auto iter = value_match.first; iter != value_match.second; ++iter) 622 | { 623 | US digit = 0; 624 | 625 | if (*iter >= '0' && *iter <= '9') 626 | { 627 | digit = static_cast(*iter - '0'); 628 | } 629 | else if (base == 16 && *iter >= 'a' && *iter <= 'f') 630 | { 631 | digit = static_cast(*iter - 'a' + 10); 632 | } 633 | else if (base == 16 && *iter >= 'A' && *iter <= 'F') 634 | { 635 | digit = static_cast(*iter - 'A' + 10); 636 | } 637 | else 638 | { 639 | throw_or_mimic(text); 640 | } 641 | 642 | const US next = static_cast(result * base + digit); 643 | if (result > next) 644 | { 645 | throw_or_mimic(text); 646 | } 647 | 648 | result = next; 649 | } 650 | 651 | detail::check_signed_range(negative, result, text); 652 | 653 | if (negative) 654 | { 655 | checked_negate(value, result, text, std::integral_constant()); 656 | } 657 | else 658 | { 659 | value = static_cast(result); 660 | } 661 | } 662 | 663 | template 664 | void stringstream_parser(const std::string& text, T& value) 665 | { 666 | std::stringstream in(text); 667 | in >> value; 668 | if (!in) { 669 | throw_or_mimic(text); 670 | } 671 | } 672 | 673 | inline 674 | void 675 | parse_value(const std::string& text, uint8_t& value) 676 | { 677 | integer_parser(text, value); 678 | } 679 | 680 | inline 681 | void 682 | parse_value(const std::string& text, int8_t& value) 683 | { 684 | integer_parser(text, value); 685 | } 686 | 687 | inline 688 | void 689 | parse_value(const std::string& text, uint16_t& value) 690 | { 691 | integer_parser(text, value); 692 | } 693 | 694 | inline 695 | void 696 | parse_value(const std::string& text, int16_t& value) 697 | { 698 | integer_parser(text, value); 699 | } 700 | 701 | inline 702 | void 703 | parse_value(const std::string& text, uint32_t& value) 704 | { 705 | integer_parser(text, value); 706 | } 707 | 708 | inline 709 | void 710 | parse_value(const std::string& text, int32_t& value) 711 | { 712 | integer_parser(text, value); 713 | } 714 | 715 | inline 716 | void 717 | parse_value(const std::string& text, uint64_t& value) 718 | { 719 | integer_parser(text, value); 720 | } 721 | 722 | inline 723 | void 724 | parse_value(const std::string& text, int64_t& value) 725 | { 726 | integer_parser(text, value); 727 | } 728 | 729 | inline 730 | void 731 | parse_value(const std::string& text, bool& value) 732 | { 733 | std::smatch result; 734 | std::regex_match(text, result, truthy_pattern); 735 | 736 | if (!result.empty()) 737 | { 738 | value = true; 739 | return; 740 | } 741 | 742 | std::regex_match(text, result, falsy_pattern); 743 | if (!result.empty()) 744 | { 745 | value = false; 746 | return; 747 | } 748 | 749 | throw_or_mimic(text); 750 | } 751 | 752 | inline 753 | void 754 | parse_value(const std::string& text, std::string& value) 755 | { 756 | value = text; 757 | } 758 | 759 | // The fallback parser. It uses the stringstream parser to parse all types 760 | // that have not been overloaded explicitly. It has to be placed in the 761 | // source code before all other more specialized templates. 762 | template 763 | void 764 | parse_value(const std::string& text, T& value) { 765 | stringstream_parser(text, value); 766 | } 767 | 768 | template 769 | void 770 | parse_value(const std::string& text, std::vector& value) 771 | { 772 | std::stringstream in(text); 773 | std::string token; 774 | while(!in.eof() && std::getline(in, token, CXXOPTS_VECTOR_DELIMITER)) { 775 | T v; 776 | parse_value(token, v); 777 | value.emplace_back(std::move(v)); 778 | } 779 | } 780 | 781 | #ifdef CXXOPTS_HAS_OPTIONAL 782 | template 783 | void 784 | parse_value(const std::string& text, std::optional& value) 785 | { 786 | T result; 787 | parse_value(text, result); 788 | value = std::move(result); 789 | } 790 | #endif 791 | 792 | inline 793 | void parse_value(const std::string& text, char& c) 794 | { 795 | if (text.length() != 1) 796 | { 797 | throw_or_mimic(text); 798 | } 799 | 800 | c = text[0]; 801 | } 802 | 803 | template 804 | struct type_is_container 805 | { 806 | static constexpr bool value = false; 807 | }; 808 | 809 | template 810 | struct type_is_container> 811 | { 812 | static constexpr bool value = true; 813 | }; 814 | 815 | template 816 | class abstract_value : public Value 817 | { 818 | using Self = abstract_value; 819 | 820 | public: 821 | abstract_value() 822 | : m_result(std::make_shared()) 823 | , m_store(m_result.get()) 824 | { 825 | } 826 | 827 | explicit abstract_value(T* t) 828 | : m_store(t) 829 | { 830 | } 831 | 832 | ~abstract_value() override = default; 833 | 834 | abstract_value& operator=(const abstract_value&) = default; 835 | 836 | abstract_value(const abstract_value& rhs) 837 | { 838 | if (rhs.m_result) 839 | { 840 | m_result = std::make_shared(); 841 | m_store = m_result.get(); 842 | } 843 | else 844 | { 845 | m_store = rhs.m_store; 846 | } 847 | 848 | m_default = rhs.m_default; 849 | m_implicit = rhs.m_implicit; 850 | m_default_value = rhs.m_default_value; 851 | m_implicit_value = rhs.m_implicit_value; 852 | } 853 | 854 | void 855 | parse(const std::string& text) const override 856 | { 857 | parse_value(text, *m_store); 858 | } 859 | 860 | bool 861 | is_container() const override 862 | { 863 | return type_is_container::value; 864 | } 865 | 866 | void 867 | parse() const override 868 | { 869 | parse_value(m_default_value, *m_store); 870 | } 871 | 872 | bool 873 | has_default() const override 874 | { 875 | return m_default; 876 | } 877 | 878 | bool 879 | has_implicit() const override 880 | { 881 | return m_implicit; 882 | } 883 | 884 | std::shared_ptr 885 | default_value(const std::string& value) override 886 | { 887 | m_default = true; 888 | m_default_value = value; 889 | return shared_from_this(); 890 | } 891 | 892 | std::shared_ptr 893 | implicit_value(const std::string& value) override 894 | { 895 | m_implicit = true; 896 | m_implicit_value = value; 897 | return shared_from_this(); 898 | } 899 | 900 | std::shared_ptr 901 | no_implicit_value() override 902 | { 903 | m_implicit = false; 904 | return shared_from_this(); 905 | } 906 | 907 | std::string 908 | get_default_value() const override 909 | { 910 | return m_default_value; 911 | } 912 | 913 | std::string 914 | get_implicit_value() const override 915 | { 916 | return m_implicit_value; 917 | } 918 | 919 | bool 920 | is_boolean() const override 921 | { 922 | return std::is_same::value; 923 | } 924 | 925 | const T& 926 | get() const 927 | { 928 | if (m_store == nullptr) 929 | { 930 | return *m_result; 931 | } 932 | return *m_store; 933 | } 934 | 935 | protected: 936 | std::shared_ptr m_result{}; 937 | T* m_store{}; 938 | 939 | bool m_default = false; 940 | bool m_implicit = false; 941 | 942 | std::string m_default_value{}; 943 | std::string m_implicit_value{}; 944 | }; 945 | 946 | template 947 | class standard_value : public abstract_value 948 | { 949 | public: 950 | using abstract_value::abstract_value; 951 | 952 | CXXOPTS_NODISCARD 953 | std::shared_ptr 954 | clone() const override 955 | { 956 | return std::make_shared>(*this); 957 | } 958 | }; 959 | 960 | template <> 961 | class standard_value : public abstract_value 962 | { 963 | public: 964 | ~standard_value() override = default; 965 | 966 | standard_value() 967 | { 968 | set_default_and_implicit(); 969 | } 970 | 971 | explicit standard_value(bool* b) 972 | : abstract_value(b) 973 | { 974 | set_default_and_implicit(); 975 | } 976 | 977 | std::shared_ptr 978 | clone() const override 979 | { 980 | return std::make_shared>(*this); 981 | } 982 | 983 | private: 984 | 985 | void 986 | set_default_and_implicit() 987 | { 988 | m_default = true; 989 | m_default_value = "false"; 990 | m_implicit = true; 991 | m_implicit_value = "true"; 992 | } 993 | }; 994 | } // namespace values 995 | 996 | template 997 | std::shared_ptr 998 | value() 999 | { 1000 | return std::make_shared>(); 1001 | } 1002 | 1003 | template 1004 | std::shared_ptr 1005 | value(T& t) 1006 | { 1007 | return std::make_shared>(&t); 1008 | } 1009 | 1010 | class OptionAdder; 1011 | 1012 | class OptionDetails 1013 | { 1014 | public: 1015 | OptionDetails 1016 | ( 1017 | std::string short_, 1018 | std::string long_, 1019 | String desc, 1020 | std::shared_ptr val 1021 | ) 1022 | : m_short(std::move(short_)) 1023 | , m_long(std::move(long_)) 1024 | , m_desc(std::move(desc)) 1025 | , m_value(std::move(val)) 1026 | , m_count(0) 1027 | { 1028 | m_hash = std::hash{}(m_long + m_short); 1029 | } 1030 | 1031 | OptionDetails(const OptionDetails& rhs) 1032 | : m_desc(rhs.m_desc) 1033 | , m_value(rhs.m_value->clone()) 1034 | , m_count(rhs.m_count) 1035 | { 1036 | } 1037 | 1038 | OptionDetails(OptionDetails&& rhs) = default; 1039 | 1040 | CXXOPTS_NODISCARD 1041 | const String& 1042 | description() const 1043 | { 1044 | return m_desc; 1045 | } 1046 | 1047 | CXXOPTS_NODISCARD 1048 | const Value& 1049 | value() const { 1050 | return *m_value; 1051 | } 1052 | 1053 | CXXOPTS_NODISCARD 1054 | std::shared_ptr 1055 | make_storage() const 1056 | { 1057 | return m_value->clone(); 1058 | } 1059 | 1060 | CXXOPTS_NODISCARD 1061 | const std::string& 1062 | short_name() const 1063 | { 1064 | return m_short; 1065 | } 1066 | 1067 | CXXOPTS_NODISCARD 1068 | const std::string& 1069 | long_name() const 1070 | { 1071 | return m_long; 1072 | } 1073 | 1074 | size_t 1075 | hash() const 1076 | { 1077 | return m_hash; 1078 | } 1079 | 1080 | private: 1081 | std::string m_short{}; 1082 | std::string m_long{}; 1083 | String m_desc{}; 1084 | std::shared_ptr m_value{}; 1085 | int m_count; 1086 | 1087 | size_t m_hash{}; 1088 | }; 1089 | 1090 | struct HelpOptionDetails 1091 | { 1092 | std::string s; 1093 | std::string l; 1094 | String desc; 1095 | bool has_default; 1096 | std::string default_value; 1097 | bool has_implicit; 1098 | std::string implicit_value; 1099 | std::string arg_help; 1100 | bool is_container; 1101 | bool is_boolean; 1102 | }; 1103 | 1104 | struct HelpGroupDetails 1105 | { 1106 | std::string name{}; 1107 | std::string description{}; 1108 | std::vector options{}; 1109 | }; 1110 | 1111 | class OptionValue 1112 | { 1113 | public: 1114 | void 1115 | parse 1116 | ( 1117 | const std::shared_ptr& details, 1118 | const std::string& text 1119 | ) 1120 | { 1121 | ensure_value(details); 1122 | ++m_count; 1123 | m_value->parse(text); 1124 | m_long_name = &details->long_name(); 1125 | } 1126 | 1127 | void 1128 | parse_default(const std::shared_ptr& details) 1129 | { 1130 | ensure_value(details); 1131 | m_default = true; 1132 | m_long_name = &details->long_name(); 1133 | m_value->parse(); 1134 | } 1135 | 1136 | #if defined(__GNUC__) 1137 | #if __GNUC__ <= 10 && __GNUC_MINOR__ <= 1 1138 | #pragma GCC diagnostic push 1139 | #pragma GCC diagnostic ignored "-Werror=null-dereference" 1140 | #endif 1141 | #endif 1142 | 1143 | CXXOPTS_NODISCARD 1144 | size_t 1145 | count() const noexcept 1146 | { 1147 | return m_count; 1148 | } 1149 | 1150 | #if defined(__GNUC__) 1151 | #if __GNUC__ <= 10 && __GNUC_MINOR__ <= 1 1152 | #pragma GCC diagnostic pop 1153 | #endif 1154 | #endif 1155 | 1156 | // TODO: maybe default options should count towards the number of arguments 1157 | CXXOPTS_NODISCARD 1158 | bool 1159 | has_default() const noexcept 1160 | { 1161 | return m_default; 1162 | } 1163 | 1164 | template 1165 | const T& 1166 | as() const 1167 | { 1168 | if (m_value == nullptr) { 1169 | throw_or_mimic( 1170 | m_long_name == nullptr ? "" : *m_long_name); 1171 | } 1172 | 1173 | #ifdef CXXOPTS_NO_RTTI 1174 | return static_cast&>(*m_value).get(); 1175 | #else 1176 | return dynamic_cast&>(*m_value).get(); 1177 | #endif 1178 | } 1179 | 1180 | private: 1181 | void 1182 | ensure_value(const std::shared_ptr& details) 1183 | { 1184 | if (m_value == nullptr) 1185 | { 1186 | m_value = details->make_storage(); 1187 | } 1188 | } 1189 | 1190 | 1191 | const std::string* m_long_name = nullptr; 1192 | // Holding this pointer is safe, since OptionValue's only exist in key-value pairs, 1193 | // where the key has the string we point to. 1194 | std::shared_ptr m_value{}; 1195 | size_t m_count = 0; 1196 | bool m_default = false; 1197 | }; 1198 | 1199 | class KeyValue 1200 | { 1201 | public: 1202 | KeyValue(std::string key_, std::string value_) 1203 | : m_key(std::move(key_)) 1204 | , m_value(std::move(value_)) 1205 | { 1206 | } 1207 | 1208 | CXXOPTS_NODISCARD 1209 | const std::string& 1210 | key() const 1211 | { 1212 | return m_key; 1213 | } 1214 | 1215 | CXXOPTS_NODISCARD 1216 | const std::string& 1217 | value() const 1218 | { 1219 | return m_value; 1220 | } 1221 | 1222 | template 1223 | T 1224 | as() const 1225 | { 1226 | T result; 1227 | values::parse_value(m_value, result); 1228 | return result; 1229 | } 1230 | 1231 | private: 1232 | std::string m_key; 1233 | std::string m_value; 1234 | }; 1235 | 1236 | using ParsedHashMap = std::unordered_map; 1237 | using NameHashMap = std::unordered_map; 1238 | 1239 | class ParseResult 1240 | { 1241 | public: 1242 | 1243 | ParseResult() = default; 1244 | ParseResult(const ParseResult&) = default; 1245 | 1246 | ParseResult(NameHashMap&& keys, ParsedHashMap&& values, std::vector sequential, std::vector&& unmatched_args) 1247 | : m_keys(std::move(keys)) 1248 | , m_values(std::move(values)) 1249 | , m_sequential(std::move(sequential)) 1250 | , m_unmatched(std::move(unmatched_args)) 1251 | { 1252 | } 1253 | 1254 | ParseResult& operator=(ParseResult&&) = default; 1255 | ParseResult& operator=(const ParseResult&) = default; 1256 | 1257 | size_t 1258 | count(const std::string& o) const 1259 | { 1260 | auto iter = m_keys.find(o); 1261 | if (iter == m_keys.end()) 1262 | { 1263 | return 0; 1264 | } 1265 | 1266 | auto viter = m_values.find(iter->second); 1267 | 1268 | if (viter == m_values.end()) 1269 | { 1270 | return 0; 1271 | } 1272 | 1273 | return viter->second.count(); 1274 | } 1275 | 1276 | const OptionValue& 1277 | operator[](const std::string& option) const 1278 | { 1279 | auto iter = m_keys.find(option); 1280 | 1281 | if (iter == m_keys.end()) 1282 | { 1283 | throw_or_mimic(option); 1284 | } 1285 | 1286 | auto viter = m_values.find(iter->second); 1287 | 1288 | if (viter == m_values.end()) 1289 | { 1290 | throw_or_mimic(option); 1291 | } 1292 | 1293 | return viter->second; 1294 | } 1295 | 1296 | const std::vector& 1297 | arguments() const 1298 | { 1299 | return m_sequential; 1300 | } 1301 | 1302 | const std::vector& 1303 | unmatched() const 1304 | { 1305 | return m_unmatched; 1306 | } 1307 | 1308 | private: 1309 | NameHashMap m_keys{}; 1310 | ParsedHashMap m_values{}; 1311 | std::vector m_sequential{}; 1312 | std::vector m_unmatched{}; 1313 | }; 1314 | 1315 | struct Option 1316 | { 1317 | Option 1318 | ( 1319 | std::string opts, 1320 | std::string desc, 1321 | std::shared_ptr value = ::cxxopts::value(), 1322 | std::string arg_help = "" 1323 | ) 1324 | : opts_(std::move(opts)) 1325 | , desc_(std::move(desc)) 1326 | , value_(std::move(value)) 1327 | , arg_help_(std::move(arg_help)) 1328 | { 1329 | } 1330 | 1331 | std::string opts_; 1332 | std::string desc_; 1333 | std::shared_ptr value_; 1334 | std::string arg_help_; 1335 | }; 1336 | 1337 | using OptionMap = std::unordered_map>; 1338 | using PositionalList = std::vector; 1339 | using PositionalListIterator = PositionalList::const_iterator; 1340 | 1341 | class OptionParser 1342 | { 1343 | public: 1344 | OptionParser(const OptionMap& options, const PositionalList& positional, bool allow_unrecognised) 1345 | : m_options(options) 1346 | , m_positional(positional) 1347 | , m_allow_unrecognised(allow_unrecognised) 1348 | { 1349 | } 1350 | 1351 | ParseResult 1352 | parse(int argc, const char* const* argv); 1353 | 1354 | bool 1355 | consume_positional(const std::string& a, PositionalListIterator& next); 1356 | 1357 | void 1358 | checked_parse_arg 1359 | ( 1360 | int argc, 1361 | const char* const* argv, 1362 | int& current, 1363 | const std::shared_ptr& value, 1364 | const std::string& name 1365 | ); 1366 | 1367 | void 1368 | add_to_option(OptionMap::const_iterator iter, const std::string& option, const std::string& arg); 1369 | 1370 | void 1371 | parse_option 1372 | ( 1373 | const std::shared_ptr& value, 1374 | const std::string& name, 1375 | const std::string& arg = "" 1376 | ); 1377 | 1378 | void 1379 | parse_default(const std::shared_ptr& details); 1380 | 1381 | private: 1382 | 1383 | void finalise_aliases(); 1384 | 1385 | const OptionMap& m_options; 1386 | const PositionalList& m_positional; 1387 | 1388 | std::vector m_sequential{}; 1389 | bool m_allow_unrecognised; 1390 | 1391 | ParsedHashMap m_parsed{}; 1392 | NameHashMap m_keys{}; 1393 | }; 1394 | 1395 | class Options 1396 | { 1397 | public: 1398 | 1399 | explicit Options(std::string program, std::string help_string = "") 1400 | : m_program(std::move(program)) 1401 | , m_help_string(toLocalString(std::move(help_string))) 1402 | , m_custom_help("[OPTION...]") 1403 | , m_positional_help("positional parameters") 1404 | , m_show_positional(false) 1405 | , m_allow_unrecognised(false) 1406 | , m_options(std::make_shared()) 1407 | { 1408 | } 1409 | 1410 | Options& 1411 | positional_help(std::string help_text) 1412 | { 1413 | m_positional_help = std::move(help_text); 1414 | return *this; 1415 | } 1416 | 1417 | Options& 1418 | custom_help(std::string help_text) 1419 | { 1420 | m_custom_help = std::move(help_text); 1421 | return *this; 1422 | } 1423 | 1424 | Options& 1425 | show_positional_help() 1426 | { 1427 | m_show_positional = true; 1428 | return *this; 1429 | } 1430 | 1431 | Options& 1432 | allow_unrecognised_options() 1433 | { 1434 | m_allow_unrecognised = true; 1435 | return *this; 1436 | } 1437 | 1438 | ParseResult 1439 | parse(int argc, const char* const* argv); 1440 | 1441 | OptionAdder 1442 | add_options(std::string group = ""); 1443 | 1444 | void 1445 | add_options 1446 | ( 1447 | const std::string& group, 1448 | std::initializer_list