├── .clang-format ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── Url.cpp ├── Url.hpp ├── flags.cpp ├── flags.hpp ├── main.cpp ├── run.sh ├── testdata └── urls_small.txt ├── utils.cpp └── utils.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Microsoft 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveMacros: false 7 | AlignConsecutiveAssignments: false 8 | AlignConsecutiveDeclarations: false 9 | AlignEscapedNewlines: Right 10 | AlignOperands: true 11 | AlignTrailingComments: true 12 | AllowAllArgumentsOnNextLine: true 13 | AllowAllConstructorInitializersOnNextLine: true 14 | AllowAllParametersOfDeclarationOnNextLine: true 15 | AllowShortBlocksOnASingleLine: Never 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortFunctionsOnASingleLine: None 18 | AllowShortLambdasOnASingleLine: All 19 | AllowShortIfStatementsOnASingleLine: Never 20 | AllowShortLoopsOnASingleLine: false 21 | AlwaysBreakAfterDefinitionReturnType: None 22 | AlwaysBreakAfterReturnType: None 23 | AlwaysBreakBeforeMultilineStrings: false 24 | AlwaysBreakTemplateDeclarations: MultiLine 25 | BinPackArguments: true 26 | BinPackParameters: true 27 | BraceWrapping: 28 | AfterCaseLabel: false 29 | AfterClass: true 30 | AfterControlStatement: true 31 | AfterEnum: true 32 | AfterFunction: true 33 | AfterNamespace: true 34 | AfterObjCDeclaration: true 35 | AfterStruct: true 36 | AfterUnion: false 37 | AfterExternBlock: true 38 | BeforeCatch: true 39 | BeforeElse: true 40 | IndentBraces: false 41 | SplitEmptyFunction: true 42 | SplitEmptyRecord: true 43 | SplitEmptyNamespace: true 44 | BreakBeforeBinaryOperators: None 45 | BreakBeforeBraces: Custom 46 | BreakBeforeInheritanceComma: false 47 | BreakInheritanceList: BeforeColon 48 | BreakBeforeTernaryOperators: true 49 | BreakConstructorInitializersBeforeComma: false 50 | BreakConstructorInitializers: BeforeColon 51 | BreakAfterJavaFieldAnnotations: false 52 | BreakStringLiterals: true 53 | ColumnLimit: 120 54 | CommentPragmas: '^ IWYU pragma:' 55 | CompactNamespaces: false 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 57 | ConstructorInitializerIndentWidth: 4 58 | ContinuationIndentWidth: 4 59 | Cpp11BracedListStyle: true 60 | DeriveLineEnding: true 61 | DerivePointerAlignment: false 62 | DisableFormat: false 63 | ExperimentalAutoDetectBinPacking: false 64 | FixNamespaceComments: true 65 | ForEachMacros: 66 | - foreach 67 | - Q_FOREACH 68 | - BOOST_FOREACH 69 | IncludeBlocks: Preserve 70 | IncludeCategories: 71 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 72 | Priority: 2 73 | SortPriority: 0 74 | - Regex: '^(<|"(gtest|gmock|isl|json)/)' 75 | Priority: 3 76 | SortPriority: 0 77 | - Regex: '.*' 78 | Priority: 1 79 | SortPriority: 0 80 | IncludeIsMainRegex: '(Test)?$' 81 | IncludeIsMainSourceRegex: '' 82 | IndentCaseLabels: false 83 | IndentGotoLabels: true 84 | IndentPPDirectives: None 85 | IndentWidth: 4 86 | IndentWrappedFunctionNames: false 87 | JavaScriptQuotes: Leave 88 | JavaScriptWrapImports: true 89 | KeepEmptyLinesAtTheStartOfBlocks: true 90 | MacroBlockBegin: '' 91 | MacroBlockEnd: '' 92 | MaxEmptyLinesToKeep: 1 93 | NamespaceIndentation: None 94 | ObjCBinPackProtocolList: Auto 95 | ObjCBlockIndentWidth: 2 96 | ObjCSpaceAfterProperty: false 97 | ObjCSpaceBeforeProtocolList: true 98 | PenaltyBreakAssignment: 2 99 | PenaltyBreakBeforeFirstCallParameter: 19 100 | PenaltyBreakComment: 300 101 | PenaltyBreakFirstLessLess: 120 102 | PenaltyBreakString: 1000 103 | PenaltyBreakTemplateDeclaration: 10 104 | PenaltyExcessCharacter: 1000000 105 | PenaltyReturnTypeOnItsOwnLine: 1000 106 | PointerAlignment: Right 107 | ReflowComments: true 108 | SortIncludes: true 109 | SortUsingDeclarations: true 110 | SpaceAfterCStyleCast: false 111 | SpaceAfterLogicalNot: false 112 | SpaceAfterTemplateKeyword: true 113 | SpaceBeforeAssignmentOperators: true 114 | SpaceBeforeCpp11BracedList: true 115 | SpaceBeforeCtorInitializerColon: true 116 | SpaceBeforeInheritanceColon: true 117 | SpaceBeforeParens: ControlStatements 118 | SpaceBeforeRangeBasedForLoopColon: false 119 | SpaceInEmptyBlock: false 120 | SpaceInEmptyParentheses: false 121 | SpacesBeforeTrailingComments: 1 122 | SpacesInAngles: false 123 | SpacesInConditionalStatement: false 124 | SpacesInContainerLiterals: true 125 | SpacesInCStyleCastParentheses: false 126 | SpacesInParentheses: false 127 | SpacesInSquareBrackets: false 128 | SpaceBeforeSquareBrackets: false 129 | Standard: Latest 130 | StatementMacros: 131 | - Q_UNUSED 132 | - QT_REQUIRE_VERSION 133 | TabWidth: 4 134 | UseCRLF: false 135 | UseTab: Never 136 | ... 137 | 138 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-build-debug 2 | .DS_Store 3 | .idea 4 | 5 | CMakeCache.txt 6 | urldedupe 7 | a.out -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | project(urldedupe) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | add_executable(urldedupe main.cpp utils.cpp flags.cpp flags.hpp utils.hpp Url.cpp Url.hpp) 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ameen Maali 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # urldedupe 2 | 3 | urldedupe is a tool to quickly pass in a list of URLs, and get back a list of deduplicated (unique) 4 | URL and query string combination. This is useful to ensure you don't have a URL list will hundreds of duplicated parameters 5 | with differing qs values. For an example run, take the following URL list passed in: 6 | 7 | ``` 8 | https://google.com 9 | https://google.com/home?qs=value 10 | https://google.com/home?qs=secondValue 11 | https://google.com/home?qs=newValue&secondQs=anotherValue 12 | https://google.com/home?qs=asd&secondQs=das 13 | ``` 14 | 15 | Passing through `urldedupe` will only maintain the non-duplicate URL & query string (ignoring values) combinations: 16 | 17 | ``` 18 | $ cat urls.txt | urldedupe 19 | https://google.com 20 | https://google.com/home?qs=value 21 | https://google.com/home?qs=newValue&secondQs=anotherValue 22 | ``` 23 | 24 | It's also possible to deduplicate similar URLs. This is done with `-s|--similar` flag, to deduplicate endpoints such as API endpoints with different IDs, or assets: 25 | 26 | ``` 27 | $ cat urls.txt 28 | https://site.com/api/users/123 29 | https://site.com/api/users/222 30 | https://site.com/api/users/412/profile 31 | https://site.com/users/photos/photo.jpg 32 | https://site.com/users/photos/myPhoto.jpg 33 | https://site.com/users/photos/photo.png 34 | ``` 35 | 36 | Becomes: 37 | 38 | ``` 39 | $ cat urls.txt | urldedupe -s 40 | https://site.com/api/users/123 41 | https://site.com/api/users/412/profile 42 | https://site.com/users/photos/photo.jpg 43 | ``` 44 | 45 | Why C++? Because it's super fast?!?! No not really, I'm working on my C++ skills and mostly just wanted to create a real-world C++ project as opposed to educational related work. 46 | 47 | ## Installation 48 | Use the binary already compiled within the repository...Or better yet to not run a random binary from myself who can be very shady, compile from source: 49 | 50 | You'll need `cmake` installed and C++ 17 or higher. 51 | 52 | Clone the repository & navigate to it: 53 | ``` 54 | git clone https://github.com/ameenmaali/urldedupe.git 55 | cd urldedupe 56 | ``` 57 | 58 | In the `urldedupe` directory 59 | ``` 60 | cmake CMakeLists.txt 61 | ``` 62 | 63 | If you don't have `cmake` installed, do that. On Mac OS X it is: 64 | ``` 65 | brew install cmake 66 | ``` 67 | 68 | Run make: 69 | ``` 70 | make 71 | ``` 72 | 73 | The `urldedupe` binary should now be created in the same directory. For easy use, you can move it to your `bin` directory. 74 | 75 | ## Usage 76 | `urldedupe` takes URLs from stdin, or a file with the `-u` flag, of which you will most likely want in a file such as: 77 | ``` 78 | $ cat urls.txt 79 | https://google.com/home/?q=2&d=asd 80 | https://my.site/profile?param1=1¶m2=2 81 | https://my.site/profile?param3=3 82 | ``` 83 | 84 | ## Help 85 | ``` 86 | $ ./urldedupe -h 87 | (-h|--help) - Usage/help info for urldedupe 88 | (-u|--urls) - Filename containing urls (use this if you don't pipe urls via stdin) 89 | (-V|--version) - Get current version for urldedupe 90 | (-r|--regex-parse) - This is significantly slower than normal parsing, but may be more thorough or accurate 91 | (-s|--similar) - Remove similar URLs (based on integers and image/font files) - i.e. /api/user/1 & /api/user/2 deduplicated 92 | (-qs|--query-strings-only) - Only include URLs if they have query strings 93 | (-ne|--no-extensions) - Do not include URLs if they have an extension (i.e. .png, .jpg, .woff, .js, .html) 94 | (-m|--mode) - The mode/filters to be enabled (can be 1 or more, comma separated). Default is none, available options are the other flags (--mode "r,s,qs,ne") 95 | ``` 96 | 97 | ## Examples 98 | 99 | Very simple, simply pass URLs from stdin or with the `-u` flag: 100 | 101 | `./urldedupe -u urls.txt` 102 | 103 | After moving the `urldedupe` binary to your `bin` dir..Pass in list from stdin and save to a file: 104 | 105 | `cat urls.txt | urldedupe > deduped_urls.txt` 106 | 107 | Deduplicate similar URLs with `-s|--similar` flag, such as API endpoints with different IDs, or assets: 108 | 109 | `cat urls.txt | urldedupe -s` 110 | 111 | ``` 112 | https://site.com/api/users/123 113 | https://site.com/api/users/222 114 | https://site.com/api/users/412/profile 115 | https://site.com/users/photos/photo.jpg 116 | https://site.com/users/photos/myPhoto.jpg 117 | https://site.com/users/photos/photo.png 118 | ``` 119 | 120 | Becomes: 121 | 122 | ``` 123 | https://site.com/api/users/123 124 | https://site.com/api/users/412/profile 125 | https://site.com/users/photos/photo.jpg 126 | ``` 127 | 128 | For all the bug bounty hunters, I recommend chaining with tools such as `waybackurls` or `gau` to get back only unique URLs as those sources are prone to have many similar/duplicated URLs: 129 | 130 | `cat waybackurls | urldedupe > deduped_urls.txt` 131 | 132 | For max thoroughness (usually not necessary), you can use an RFC complaint regex for URL parsing, but it is significantly slower for large data sets: 133 | 134 | `cat urls.txt | urldedupe -r > deduped_urls_regex.txt` 135 | 136 | Alternatively, use `-m|--mode` with the flag values you'd like to run with. For example, if you want 137 | to get URLs deduped based on similarity, include only URLs that have query strings, and do not have extensions... 138 | 139 | Instead of: 140 | 141 | `urldedupe -u urls.txt -s -qs -ne` 142 | 143 | You can also do: 144 | 145 | `urldedupe -u urls.txt -m "s,qs,ne"` 146 | -------------------------------------------------------------------------------- /Url.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Ameen Maali on 6/1/20. 3 | // 4 | 5 | #include 6 | #include 7 | 8 | #include "Url.hpp" 9 | #include "utils.hpp" 10 | 11 | Url::Url(const std::string &url, bool regex_mode) : url_string(url) 12 | { 13 | if (regex_mode) 14 | { 15 | this->regex_parse(); 16 | } 17 | else 18 | { 19 | this->parse(); 20 | } 21 | } 22 | 23 | Url::Url(const Url &other) : url_string(other.url_string) 24 | { 25 | scheme = clone_string_view(other.url_string, other.scheme, url_string); 26 | hostname = clone_string_view(other.url_string, other.hostname, url_string); 27 | path = clone_string_view(other.url_string, other.path, url_string); 28 | query_strings = clone_string_view(other.url_string, other.query_strings, url_string); 29 | fragment = clone_string_view(other.url_string, other.fragment, url_string); 30 | } 31 | 32 | std::string_view Url::get_scheme() const 33 | { 34 | return scheme; 35 | } 36 | 37 | std::string_view Url::get_hostname() const 38 | { 39 | return hostname; 40 | } 41 | 42 | std::string_view Url::get_path() const 43 | { 44 | return path; 45 | } 46 | 47 | std::string_view Url::get_query_strings() const 48 | { 49 | return query_strings; 50 | } 51 | 52 | std::string_view Url::get_fragment() const 53 | { 54 | return fragment; 55 | } 56 | 57 | const std::string &Url::get_url_string() const 58 | { 59 | return url_string; 60 | } 61 | 62 | bool Url::is_encoded(const std::string &u) 63 | { 64 | return u.find('%') != std::string::npos; 65 | } 66 | 67 | std::string Url::decode(const std::string &str) 68 | { 69 | std::string ret; 70 | auto len = str.length(); 71 | 72 | for (std::size_t i = 0; i < len; i++) 73 | { 74 | if (str[i] != '%') 75 | { 76 | if (str[i] == '+') 77 | { 78 | ret += ' '; 79 | } 80 | else 81 | { 82 | ret += str[i]; 83 | } 84 | } 85 | else 86 | { 87 | // If str[i+2] does not exist, this will crash. 88 | // This also means the URL is invalid, so... 89 | ret += hex_digit(str[i + 1]) * 16 + hex_digit(str[i + 2]); 90 | i = i + 2; 91 | } 92 | } 93 | return ret; 94 | } 95 | 96 | std::string Url::encode(const std::string &str) 97 | { 98 | std::string encoded_str {}; 99 | char bufHex[10]; 100 | const int len = str.length(); 101 | 102 | for (int i = 0; i < len; i++) 103 | { 104 | auto c = str[i]; 105 | 106 | if (c == ' ') 107 | { 108 | encoded_str += '+'; 109 | } 110 | else 111 | { 112 | if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') 113 | { 114 | encoded_str += c; 115 | } 116 | else 117 | { 118 | sprintf(bufHex, "%X", c); 119 | 120 | if (static_cast(c) < 16) 121 | { 122 | encoded_str += "%0"; 123 | } 124 | else 125 | { 126 | encoded_str += "%"; 127 | } 128 | encoded_str += bufHex; 129 | } 130 | } 131 | } 132 | return encoded_str; 133 | } 134 | 135 | void Url::regex_parse() 136 | { 137 | auto make_sv = [&](const std::ssub_match &m) -> std::string_view { 138 | if (not m.matched) 139 | return {}; 140 | auto start = m.first - std::begin(url_string); 141 | return std::string_view(url_string.data() + start, m.length()); 142 | }; 143 | std::smatch match; 144 | if (std::regex_match(url_string, match, URL_REGEX)) 145 | { 146 | scheme = make_sv(match[2]); 147 | hostname = make_sv(match[4]); 148 | path = make_sv(match[5]); 149 | query_strings = make_sv(match[7]); 150 | fragment = make_sv(match[9]); 151 | } 152 | } 153 | 154 | bool Url::parse() 155 | { 156 | std::string_view url_view {url_string}; 157 | 158 | auto current {url_view.find("://")}; 159 | if (current != std::string::npos) 160 | { 161 | scheme = url_view.substr(0, current + 3); 162 | url_view = url_view.substr(current + 3); 163 | } 164 | 165 | current = url_view.find('#'); 166 | if (current != std::string::npos) 167 | { 168 | fragment = url_view.substr(current + 1); 169 | url_view = url_view.substr(0, current); 170 | } 171 | 172 | current = url_view.find('?'); 173 | if (current != std::string::npos) 174 | { 175 | query_strings = url_view.substr(current + 1); 176 | url_view = url_view.substr(0, current); 177 | } 178 | 179 | current = url_view.find('/'); 180 | if (current != std::string::npos) 181 | { 182 | path = url_view.substr(current); 183 | } 184 | hostname = url_view.substr(0, current); 185 | 186 | return true; 187 | } 188 | 189 | std::string Url::get_url_key(bool similar_mode) 190 | { 191 | std::string url_key {}; 192 | url_key += this->hostname; 193 | if (similar_mode) 194 | { 195 | url_key += this->get_path_components(); 196 | } 197 | else 198 | { 199 | url_key += this->path; 200 | } 201 | 202 | std::string qs {get_query_strings()}; 203 | if (qs.empty()) 204 | return url_key; 205 | 206 | std::string token {}; 207 | size_t current; 208 | std::vector qs_vals {}; 209 | qs += "&"; 210 | while ((current = qs.find('&')) != std::string::npos) 211 | { 212 | token = qs.substr(0, current); 213 | qs.erase(0, current + 1); 214 | 215 | std::string qs_key {token.substr(0, token.find('='))}; 216 | qs_vals.push_back(qs_key); 217 | } 218 | 219 | url_key += "?"; 220 | for (const auto &x: qs_vals) 221 | url_key += x + "&"; 222 | 223 | return url_key; 224 | } 225 | 226 | std::string Url::get_path_components() const 227 | { 228 | std::string path_components {}; 229 | if (this->path.empty()) 230 | return path_components; 231 | 232 | std::string url_path {this->path}; 233 | std::string token {}; 234 | size_t current; 235 | 236 | // Add trailing slash to get all path components (including last) 237 | url_path += "/"; 238 | while ((current = url_path.find('/')) != std::string::npos) 239 | { 240 | token = url_path.substr(0, current); 241 | url_path.erase(0, current + 1); 242 | 243 | // Append to path_components depending on what time of component is found 244 | // Also, add back trailing slash to separate components 245 | if (is_number(token)) 246 | path_components += "dedupeInt/"; 247 | else if (is_asset(token)) 248 | path_components += "dedupeAsset/"; 249 | else 250 | path_components += token + "/"; 251 | } 252 | 253 | return path_components; 254 | } 255 | 256 | bool Url::is_asset(const std::string &str) 257 | { 258 | size_t current; 259 | current = str.find('.'); 260 | if (current == std::string::npos) 261 | return false; 262 | 263 | std::string extension = str.substr(current, std::string::npos); 264 | return find(ASSET_EXTENSIONS.begin(), ASSET_EXTENSIONS.end(), extension) != ASSET_EXTENSIONS.end(); 265 | } 266 | 267 | bool Url::has_extension() 268 | { 269 | std::filesystem::path fpath {this->path}; 270 | return fpath.has_extension(); 271 | } 272 | -------------------------------------------------------------------------------- /Url.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Ameen Maali on 6/1/20. 3 | // 4 | 5 | #ifndef URLDEDUPE_URL_HPP 6 | #define URLDEDUPE_URL_HPP 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | // RFC 3986 Recommendation for URL Regex: https://tools.ietf.org/html/rfc3986#page-51 13 | const std::regex URL_REGEX(R"(^(([^:\/?#]+):)?(//([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?)", std::regex::extended); 14 | const std::array ASSET_EXTENSIONS {".jpg", ".jpeg", ".png", ".gif", ".tiff", ".webm", 15 | ".svg", ".eot", ".ttf", ".woff", ".ico", ".woff2"}; 16 | 17 | class Url 18 | { 19 | private: 20 | const std::string url_string; 21 | std::string_view scheme; 22 | std::string_view hostname; 23 | std::string_view path; 24 | std::string_view query_strings; 25 | std::string_view fragment; 26 | 27 | public: 28 | Url(const std::string &url, bool regex_mode = false); 29 | Url(const Url &); 30 | Url(Url &&) = default; 31 | ~Url() = default; 32 | 33 | std::string_view get_scheme() const; 34 | std::string_view get_hostname() const; 35 | std::string_view get_path() const; 36 | std::string_view get_query_strings() const; 37 | std::string_view get_fragment() const; 38 | 39 | static bool is_encoded(const std::string &); 40 | static std::string decode(const std::string &); 41 | static std::string encode(const std::string &); 42 | 43 | static bool is_asset(const std::string &str); 44 | 45 | const std::string &get_url_string() const; 46 | 47 | void regex_parse(); 48 | bool parse(); 49 | 50 | std::string get_url_key(bool similar_mode); 51 | 52 | std::string get_path_components() const; 53 | 54 | bool has_extension(); 55 | }; 56 | 57 | #endif // URLDEDUPE_URL_HPP 58 | -------------------------------------------------------------------------------- /flags.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Ameen Maali on 6/1/20. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "flags.hpp" 10 | 11 | // clang-format off 12 | const auto flags = std::array { 13 | Flag{ 14 | .short_name="-h", 15 | .long_name="--help", 16 | .usage="Usage/help info for urldedupe", 17 | .required=false, 18 | .is_switch=true }, 19 | Flag{ 20 | .short_name="-u", 21 | .long_name="--urls", 22 | .usage="Filename containing urls (use this if you don't pipe urls via stdin)", 23 | .required=false, 24 | .is_switch=false }, 25 | Flag{ 26 | .short_name="-V", 27 | .long_name="--version", 28 | .usage="Get current version for urldedupe", 29 | .required=false, 30 | .is_switch=true }, 31 | Flag{ 32 | .short_name="-r", 33 | .long_name="--regex-parse", 34 | .usage="This is significantly slower than normal parsing, but may be more thorough or accurate", 35 | .required=false, 36 | .is_switch=true }, 37 | Flag{ 38 | .short_name="-s", 39 | .long_name="--similar", 40 | .usage="Remove similar URLs (based on integers and image/font files) - i.e. /api/user/1 & /api/user/2 deduplicated", 41 | .required=false, 42 | .is_switch=true }, 43 | Flag{ 44 | .short_name="-qs", 45 | .long_name="--query-strings-only", 46 | .usage="Only include URLs if they have query strings", 47 | .required=false, 48 | .is_switch=true }, 49 | Flag{ 50 | .short_name="-ne", 51 | .long_name="--no-extensions", 52 | .usage="Do not include URLs if they have an extension (i.e. .png, .jpg, .woff, .js, .html)", 53 | .required=false, 54 | .is_switch=true }, 55 | Flag{ 56 | .short_name="-m", 57 | .long_name="--mode", 58 | .usage="The mode/filters to be enabled (can be 1 or more, comma separated). Default is none, available options are the other flags (--mode \"r,s,qs,ne\")", 59 | .required=false, 60 | .is_switch=false }, 61 | }; 62 | // clang-format on 63 | 64 | std::vector